From d1bbef6d15357bab359177b093c0015dc61aab7c Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 21 Oct 2016 09:59:10 +0800 Subject: drm/i915/gvt: Fix failure when ACPI is not enabled Directly use memremap instead of acpi function for opregion mapping. Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/opregion.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c index 973c8a9..9521891 100644 --- a/drivers/gpu/drm/i915/gvt/opregion.c +++ b/drivers/gpu/drm/i915/gvt/opregion.c @@ -163,7 +163,7 @@ int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa) */ void intel_gvt_clean_opregion(struct intel_gvt *gvt) { - iounmap(gvt->opregion.opregion_va); + memunmap(gvt->opregion.opregion_va); gvt->opregion.opregion_va = NULL; } @@ -181,8 +181,8 @@ int intel_gvt_init_opregion(struct intel_gvt *gvt) pci_read_config_dword(gvt->dev_priv->drm.pdev, INTEL_GVT_PCI_OPREGION, &gvt->opregion.opregion_pa); - gvt->opregion.opregion_va = acpi_os_ioremap(gvt->opregion.opregion_pa, - INTEL_GVT_OPREGION_SIZE); + gvt->opregion.opregion_va = memremap(gvt->opregion.opregion_pa, + INTEL_GVT_OPREGION_SIZE, MEMREMAP_WB); if (!gvt->opregion.opregion_va) { gvt_err("fail to map host opregion\n"); return -EFAULT; -- cgit v1.1 From 7093f5ff75ae759ef9bdd514f917a1f57cd10d65 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 21 Oct 2016 12:33:56 +0800 Subject: drm/i915: GVT-g driver depends on 64BIT kernel We currently don't support GVT-g driver on i386 kernel. Add explicit dependence on 64bit kernel. Reviewed-by: Joonas Lahtinen Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 6aedc96..c72b007 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -86,6 +86,7 @@ config DRM_I915_USERPTR config DRM_I915_GVT bool "Enable Intel GVT-g graphics virtualization host support" depends on DRM_I915 + depends on 64BIT default n help Choose this option if you want to enable Intel GVT-g graphics -- cgit v1.1 From 1c860a306e76756f28f3b4d589a5ca1d1829dd90 Mon Sep 17 00:00:00 2001 From: Arkadiusz Hiler Date: Fri, 21 Oct 2016 13:11:50 +0200 Subject: drm/i915/gvt: Implement WaForceWakeRenderDuringMmioTLBInvalidate When invalidating RCS TLB the device can enter RC6 state interrupting the process, therefore the need for render forcewake for the whole procedure. This WA is needed for all production SKL SKUs. v2: reworked putting and getting forcewake with help of Mika Kuoppala v3: use I915_READ_FW and I915_WRITE_FW as we are handling forcewake on in the code path References: HSD#2136899, HSD#1404391274 Cc: Mika Kuoppala Cc: Zhenyu Wang Signed-off-by: Arkadiusz Hiler Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index feebb65..be1a7df 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -118,6 +118,7 @@ static u32 gen9_render_mocs_L3[32]; static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + enum forcewake_domains fw; i915_reg_t reg; u32 regs[] = { [RCS] = 0x4260, @@ -135,11 +136,25 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id) reg = _MMIO(regs[ring_id]); - I915_WRITE(reg, 0x1); + /* WaForceWakeRenderDuringMmioTLBInvalidate:skl + * we need to put a forcewake when invalidating RCS TLB caches, + * otherwise device can go to RC6 state and interrupt invalidation + * process + */ + fw = intel_uncore_forcewake_for_reg(dev_priv, reg, + FW_REG_READ | FW_REG_WRITE); + if (ring_id == RCS && IS_SKYLAKE(dev_priv)) + fw |= FORCEWAKE_RENDER; - if (wait_for_atomic((I915_READ(reg) == 0), 50)) + intel_uncore_forcewake_get(dev_priv, fw); + + I915_WRITE_FW(reg, 0x1); + + if (wait_for_atomic((I915_READ_FW(reg) == 0), 50)) gvt_err("timeout in invalidate ring (%d) tlb\n", ring_id); + intel_uncore_forcewake_put(dev_priv, fw); + gvt_dbg_core("invalidate TLB for ring %d\n", ring_id); } -- cgit v1.1 From 946260e5fb60fcf3a4fbe77840280b5191300564 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Sat, 22 Oct 2016 13:21:45 +0800 Subject: drm/i915/gvt: Fix broken mocs offset Fix missed mocs offset reg setting for save/restore function. Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/render.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c index be1a7df..3af894b 100644 --- a/drivers/gpu/drm/i915/gvt/render.c +++ b/drivers/gpu/drm/i915/gvt/render.c @@ -177,6 +177,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id) if (!IS_SKYLAKE(dev_priv)) return; + offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { gen9_render_mocs[ring_id][i] = I915_READ(offset); I915_WRITE(offset, vgpu_vreg(vgpu, offset)); @@ -214,6 +215,7 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id) if (!IS_SKYLAKE(dev_priv)) return; + offset.reg = regs[ring_id]; for (i = 0; i < 64; i++) { vgpu_vreg(vgpu, offset) = I915_READ(offset); I915_WRITE(offset, gen9_render_mocs[ring_id][i]); -- cgit v1.1 From 2d2c5ad83f772d7d7b0bb8348ecea42e88f89ab0 Mon Sep 17 00:00:00 2001 From: Daniel Stone Date: Fri, 21 Oct 2016 15:44:54 +0100 Subject: drm: i915: Wait for fences on new fb, not old The previous code would wait for fences on the framebuffer from the old plane state to complete, rather than the new, so you would see tearing everywhere. Fix this to wait on the new state before we make it active. Signed-off-by: Daniel Stone Fixes: 94f050246b42 ("drm/i915: nonblocking commit") Cc: stable@vger.kernel.org Cc: Daniel Vetter Cc: Maarten Lankhorst Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161021144454.6288-1-daniels@collabora.com --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 6ccedf2..d5d96cf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14401,7 +14401,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); + to_intel_plane_state(plane->state); if (!intel_plane_state->wait_req) continue; -- cgit v1.1 From 352cb4ef2d345fbe1955278890f9ea12cbe7bd21 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 21 Oct 2016 12:14:17 +0300 Subject: drm/i915: remove AGP dependency from DRM_I915 kconfig help text We haven't required AGP since 3e99a6b95614 ("drm/i915: Stop depending upon CONFIG_AGP_INTEL"). Split/rearrange the paragraphs a bit while at it. Reviewed-by: Daniel Vetter Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477041257-8219-1-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/Kconfig | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 1c1b19c..45a5eb7 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -24,16 +24,17 @@ config DRM_I915 including 830M, 845G, 852GM, 855GM, 865G, 915G, 945G, 965G, G35, G41, G43, G45 chipsets and Celeron, Pentium, Core i3, Core i5, Core i7 as well as Atom CPUs with integrated graphics. - If M is selected, the module will be called i915. AGP support - is required for this driver to work. This driver is used by - the Intel driver in X.org 6.8 and XFree86 4.4 and above. It - replaces the older i830 module that supported a subset of the - hardware in older X.org releases. + + This driver is used by the Intel driver in X.org 6.8 and + XFree86 4.4 and above. It replaces the older i830 module that + supported a subset of the hardware in older X.org releases. Note that the older i810/i815 chipsets require the use of the i810 driver instead, and the Atom z5xx series has an entirely different implementation. + If "M" is selected, the module will be called i915. + config DRM_I915_PRELIMINARY_HW_SUPPORT bool "Enable preliminary support for prerelease Intel hardware by default" depends on DRM_I915 -- cgit v1.1 From 275f039db56f998aaf6e2fc74acd1f653cdcb480 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Oct 2016 13:42:14 +0100 Subject: drm/i915: Move user fault tracking to a separate list We want to decouple RPM and struct_mutex, but currently RPM has to walk the list of bound objects and remove userspace mmapping before we suspend (otherwise userspace may continue to access the GTT whilst it is powered down). This currently requires the struct_mutex to walk the bound_list, but if we move that to a separate list and lock we can take the first step towards removing the struct_mutex. v2: Split runtime suspend unmapping vs regular unmapping, to make the locking (and barriers) clearer. Add the object to the userfault_list prior to inserting the first PTE, the race between add/revoke depends upon struct_mutex for regular unmappings and rpm for runtime-suspend. Signed-off-by: Chris Wilson Reviewed-by: Daniel Vetter #v1 Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 20 +++++++++++------ drivers/gpu/drm/i915/i915_gem.c | 42 +++++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_evict.c | 2 +- drivers/gpu/drm/i915/i915_gem_fence.c | 2 +- 5 files changed, 49 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index dc057c7..2d9617a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -107,7 +107,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return obj->fault_mappable ? 'g' : ' '; + return !list_empty(&obj->userfault_link) ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f022f43..a53172e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1361,6 +1361,14 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** Protects access to the userfault_list */ + spinlock_t userfault_lock; + + /** List of all objects in gtt_space, currently mmaped by userspace. + * All objects within this list must also be on bound_list. + */ + struct list_head userfault_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2205,6 +2213,11 @@ struct drm_i915_gem_object { struct drm_mm_node *stolen; struct list_head global_list; + /** + * Whether the object is currently in the GGTT mmap. + */ + struct list_head userfault_link; + /** Used in execbuf to temporarily hold a ref */ struct list_head obj_exec_link; @@ -2232,13 +2245,6 @@ struct drm_i915_gem_object { */ unsigned int madv:2; - /** - * Whether the current gtt mapping needs to be mappable (and isn't just - * mappable by accident). Track pin and fault separate for a more - * accurate mappable working set. - */ - unsigned int fault_mappable:1; - /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9bb72e5..33ebf6d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1839,16 +1839,19 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) if (ret) goto err_unpin; + /* Mark as being mmapped into userspace for later revocation */ + spin_lock(&dev_priv->mm.userfault_lock); + if (list_empty(&obj->userfault_link)) + list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); + spin_unlock(&dev_priv->mm.userfault_lock); + /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, min_t(u64, vma->size, area->vm_end - area->vm_start), &ggtt->mappable); - if (ret) - goto err_unpin; - obj->fault_mappable = true; err_unpin: __i915_vma_unpin(vma); err_unlock: @@ -1916,13 +1919,22 @@ err: void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + bool zap = false; + /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. */ - lockdep_assert_held(&obj->base.dev->struct_mutex); + lockdep_assert_held(&i915->drm.struct_mutex); - if (!obj->fault_mappable) + spin_lock(&i915->mm.userfault_lock); + if (!list_empty(&obj->userfault_link)) { + list_del_init(&obj->userfault_link); + zap = true; + } + spin_unlock(&i915->mm.userfault_lock); + if (!zap) return; drm_vma_node_unmap(&obj->base.vma_node, @@ -1936,8 +1948,6 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) * memory writes before touching registers / GSM. */ wmb(); - - obj->fault_mappable = false; } void @@ -1945,8 +1955,19 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) - i915_gem_release_mmap(obj); + spin_lock(&dev_priv->mm.userfault_lock); + while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list, + struct drm_i915_gem_object, + userfault_link))) { + list_del_init(&obj->userfault_link); + spin_unlock(&dev_priv->mm.userfault_lock); + + drm_vma_node_unmap(&obj->base.vma_node, + obj->base.dev->anon_inode->i_mapping); + + spin_lock(&dev_priv->mm.userfault_lock); + } + spin_unlock(&dev_priv->mm.userfault_lock); } /** @@ -4108,6 +4129,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, int i; INIT_LIST_HEAD(&obj->global_list); + INIT_LIST_HEAD(&obj->userfault_link); for (i = 0; i < I915_NUM_ENGINES; i++) init_request_active(&obj->last_read[i], i915_gem_object_retire__read); @@ -4521,6 +4543,7 @@ int i915_gem_init(struct drm_device *dev) int ret; mutex_lock(&dev->struct_mutex); + spin_lock_init(&dev_priv->mm.userfault_lock); if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; @@ -4640,6 +4663,7 @@ i915_gem_load_init(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); + INIT_LIST_HEAD(&dev_priv->mm.userfault_list); INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index b5e9e66..a934f37 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -56,7 +56,7 @@ mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind) if (WARN_ON(!list_empty(&vma->exec_list))) return false; - if (flags & PIN_NONFAULT && vma->obj->fault_mappable) + if (flags & PIN_NONFAULT && !list_empty(&vma->obj->userfault_link)) return false; list_add(&vma->exec_list, unwind); diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index a6daf2d..6701317 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -391,7 +391,7 @@ void i915_gem_restore_fences(struct drm_device *dev) */ if (vma && !i915_gem_object_is_tiled(vma->obj)) { GEM_BUG_ON(!reg->dirty); - GEM_BUG_ON(vma->obj->fault_mappable); + GEM_BUG_ON(!list_empty(&vma->obj->userfault_link)); list_move(®->link, &dev_priv->mm.fence_list); vma->fence = NULL; -- cgit v1.1 From 9c870d03674f11b49ef4f48b04d709ffa7cf1390 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Oct 2016 13:42:15 +0100 Subject: drm/i915: Use RPM as the barrier for controlling user mmap access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We can remove the false coupling between RPM and struct mutex by the observation that we can use the RPM wakeref as the barrier around user mmap access. That is as we tear down the user's PTE atomically from within rpm suspend and then to fault in new PTE requires the rpm wakeref, means that no user access is possible through those PTE without RPM being awake. Having made that observation, we can then remove the presumption of having to take rpm outside of struct_mutex and so allow fine grained acquisition of a wakeref around hw access rather than having to remember to acquire the wakeref early on. v2: Rejig placement of the new intel_runtime_pm_get() to be as tight as possible around the GTT pread/pwrite. Signed-off-by: Chris Wilson Cc: Imre Deak Cc: Daniel Vetter Cc: Ville Syrjälä Reviewed-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 56 ++++++++++++++++++++-------------- drivers/gpu/drm/i915/i915_drv.c | 19 ------------ drivers/gpu/drm/i915/i915_gem.c | 42 +++++++++++++------------ drivers/gpu/drm/i915/i915_gem_gtt.c | 17 ++++++++--- drivers/gpu/drm/i915/i915_gem_tiling.c | 4 --- 5 files changed, 69 insertions(+), 69 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 2d9617a..f85e6fb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -743,17 +743,32 @@ static int i915_interrupt_info(struct seq_file *m, void *data) I915_READ(VLV_IIR_RW)); seq_printf(m, "Display IMR:\t%08x\n", I915_READ(VLV_IMR)); - for_each_pipe(dev_priv, pipe) + for_each_pipe(dev_priv, pipe) { + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_PIPE(pipe); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) { + seq_printf(m, "Pipe %c power disabled\n", + pipe_name(pipe)); + continue; + } + seq_printf(m, "Pipe %c stat:\t%08x\n", pipe_name(pipe), I915_READ(PIPESTAT(pipe))); + intel_display_power_put(dev_priv, power_domain); + } + + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); seq_printf(m, "Port hotplug:\t%08x\n", I915_READ(PORT_HOTPLUG_EN)); seq_printf(m, "DPFLIPSTAT:\t%08x\n", I915_READ(VLV_DPFLIPSTAT)); seq_printf(m, "DPINVGTT:\t%08x\n", I915_READ(DPINVGTT)); + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); for (i = 0; i < 4; i++) { seq_printf(m, "GT Interrupt IMR %d:\t%08x\n", @@ -1396,14 +1411,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) static int ironlake_drpc_info(struct seq_file *m) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; u32 rgvmodectl, rstdbyctl; u16 crstandvid; - int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); rgvmodectl = I915_READ(MEMMODECTL); @@ -1411,7 +1421,6 @@ static int ironlake_drpc_info(struct seq_file *m) crstandvid = I915_READ16(CRSTANDVID); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); seq_printf(m, "HD boost: %s\n", yesno(rgvmodectl & MEMMODE_BOOST_EN)); seq_printf(m, "Boost freq: %d\n", @@ -1757,6 +1766,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) bool sr_enabled = false; intel_runtime_pm_get(dev_priv); + intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); if (HAS_PCH_SPLIT(dev_priv)) sr_enabled = I915_READ(WM1_LP_ILK) & WM1_LP_SR_EN; @@ -1770,6 +1780,7 @@ static int i915_sr_status(struct seq_file *m, void *unused) else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) sr_enabled = I915_READ(FW_BLC_SELF_VLV) & FW_CSPWRDWNEN; + intel_display_power_put(dev_priv, POWER_DOMAIN_INIT); intel_runtime_pm_put(dev_priv); seq_printf(m, "self-refresh: %s\n", @@ -2091,12 +2102,7 @@ static const char *swizzle_string(unsigned swizzle) static int i915_swizzle_info(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; - int ret; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); seq_printf(m, "bit6 swizzle for X-tiling = %s\n", @@ -2136,7 +2142,6 @@ static int i915_swizzle_info(struct seq_file *m, void *data) seq_puts(m, "L-shaped memory detected\n"); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -2542,11 +2547,22 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; else { for_each_pipe(dev_priv, pipe) { + enum transcoder cpu_transcoder = + intel_pipe_to_cpu_transcoder(dev_priv, pipe); + enum intel_display_power_domain power_domain; + + power_domain = POWER_DOMAIN_TRANSCODER(cpu_transcoder); + if (!intel_display_power_get_if_enabled(dev_priv, + power_domain)) + continue; + stat[pipe] = I915_READ(VLV_PSRSTAT(pipe)) & VLV_EDP_PSR_CURR_STATE_MASK; if ((stat[pipe] == VLV_EDP_PSR_ACTIVE_NORFB_UP) || (stat[pipe] == VLV_EDP_PSR_ACTIVE_SF_UPDATE)) enabled = true; + + intel_display_power_put(dev_priv, power_domain); } } @@ -3094,6 +3110,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) struct intel_engine_cs *engine; enum intel_engine_id id; + intel_runtime_pm_get(dev_priv); + for_each_engine(engine, dev_priv, id) { struct intel_breadcrumbs *b = &engine->breadcrumbs; struct drm_i915_gem_request *rq; @@ -3213,6 +3231,8 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_puts(m, "\n"); } + intel_runtime_pm_put(dev_priv); + return 0; } @@ -4799,13 +4819,9 @@ i915_wedged_set(void *data, u64 val) if (i915_reset_in_progress(&dev_priv->gpu_error)) return -EAGAIN; - intel_runtime_pm_get(dev_priv); - i915_handle_error(dev_priv, val, "Manually setting wedged to %llu", val); - intel_runtime_pm_put(dev_priv); - return 0; } @@ -5040,22 +5056,16 @@ static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - struct drm_device *dev = &dev_priv->drm; u32 snpcr; - int ret; if (!(IS_GEN6(dev_priv) || IS_GEN7(dev_priv))) return -ENODEV; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); snpcr = I915_READ(GEN6_MBCUNIT_SNPCR); intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); *val = (snpcr & GEN6_MBC_SNPCR_MASK) >> GEN6_MBC_SNPCR_SHIFT; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 912d534..885d33f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2301,24 +2301,6 @@ static int intel_runtime_suspend(struct device *kdev) DRM_DEBUG_KMS("Suspending device\n"); - /* - * We could deadlock here in case another thread holding struct_mutex - * calls RPM suspend concurrently, since the RPM suspend will wait - * first for this RPM suspend to finish. In this case the concurrent - * RPM resume will be followed by its RPM suspend counterpart. Still - * for consistency return -EAGAIN, which will reschedule this suspend. - */ - if (!mutex_trylock(&dev->struct_mutex)) { - DRM_DEBUG_KMS("device lock contention, deffering suspend\n"); - /* - * Bump the expiration timestamp, otherwise the suspend won't - * be rescheduled. - */ - pm_runtime_mark_last_busy(kdev); - - return -EAGAIN; - } - disable_rpm_wakeref_asserts(dev_priv); /* @@ -2326,7 +2308,6 @@ static int intel_runtime_suspend(struct device *kdev) * an RPM reference. */ i915_gem_release_all_mmaps(dev_priv); - mutex_unlock(&dev->struct_mutex); intel_guc_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 33ebf6d..63bf51b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -826,6 +826,7 @@ i915_gem_gtt_pread(struct drm_device *dev, uint64_t offset; int ret; + intel_runtime_pm_get(to_i915(dev)); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); @@ -926,6 +927,7 @@ out_unpin: i915_vma_unpin(vma); } out: + intel_runtime_pm_put(to_i915(dev)); return ret; } @@ -1060,12 +1062,9 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) { - intel_runtime_pm_get(to_i915(dev)); + if (ret == -EFAULT || ret == -ENODEV) ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); - intel_runtime_pm_put(to_i915(dev)); - } i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); @@ -1126,6 +1125,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, if (i915_gem_object_is_tiled(obj)) return -EFAULT; + intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK); if (!IS_ERR(vma)) { @@ -1234,6 +1234,7 @@ out_unpin: i915_vma_unpin(vma); } out: + intel_runtime_pm_put(i915); return ret; } @@ -1466,12 +1467,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * perspective, requiring manual detiling by the client. */ if (!i915_gem_object_has_struct_page(obj) || - cpu_write_needs_clflush(obj)) { - ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); + cpu_write_needs_clflush(obj)) /* Note that the gtt paths might fail with non-page-backed user * pointers (e.g. gtt mappings when moving data between - * textures). Fallback to the shmem path in that case. */ - } + * textures). Fallback to the shmem path in that case. + */ + ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) @@ -1840,6 +1841,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) goto err_unpin; /* Mark as being mmapped into userspace for later revocation */ + assert_rpm_wakelock_held(dev_priv); spin_lock(&dev_priv->mm.userfault_lock); if (list_empty(&obj->userfault_link)) list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); @@ -1925,8 +1927,13 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user * pagefault then has to wait until we release the mutex. + * + * Note that RPM complicates somewhat by adding an additional + * requirement that operations to the GGTT be made holding the RPM + * wakeref. */ lockdep_assert_held(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); spin_lock(&i915->mm.userfault_lock); if (!list_empty(&obj->userfault_link)) { @@ -1935,7 +1942,7 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) } spin_unlock(&i915->mm.userfault_lock); if (!zap) - return; + goto out; drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); @@ -1948,6 +1955,9 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) * memory writes before touching registers / GSM. */ wmb(); + +out: + intel_runtime_pm_put(i915); } void @@ -3476,7 +3486,7 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *i915 = to_i915(dev); struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; enum i915_cache_level level; @@ -3493,23 +3503,21 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, * cacheline, whereas normally such cachelines would get * invalidated. */ - if (!HAS_LLC(dev) && !HAS_SNOOP(dev)) + if (!HAS_LLC(i915) && !HAS_SNOOP(i915)) return -ENODEV; level = I915_CACHE_LLC; break; case I915_CACHING_DISPLAY: - level = HAS_WT(dev_priv) ? I915_CACHE_WT : I915_CACHE_NONE; + level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE; break; default: return -EINVAL; } - intel_runtime_pm_get(dev_priv); - ret = i915_mutex_lock_interruptible(dev); if (ret) - goto rpm_put; + return ret; obj = i915_gem_object_lookup(file, args->handle); if (!obj) { @@ -3518,13 +3526,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, } ret = i915_gem_object_set_cache_level(obj, level); - i915_gem_object_put(obj); unlock: mutex_unlock(&dev->struct_mutex); -rpm_put: - intel_runtime_pm_put(dev_priv); - return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 062fb0a..3303635 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2667,6 +2667,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { + struct drm_i915_private *i915 = to_i915(vma->vm->dev); struct drm_i915_gem_object *obj = vma->obj; u32 pte_flags = 0; int ret; @@ -2679,8 +2680,10 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; + intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); + intel_runtime_pm_put(i915); /* * Without aliasing PPGTT there's no difference between @@ -2696,6 +2699,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, enum i915_cache_level cache_level, u32 flags) { + struct drm_i915_private *i915 = to_i915(vma->vm->dev); u32 pte_flags; int ret; @@ -2710,14 +2714,15 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_GLOBAL_BIND) { + intel_runtime_pm_get(i915); vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); + intel_runtime_pm_put(i915); } if (flags & I915_VMA_LOCAL_BIND) { - struct i915_hw_ppgtt *appgtt = - to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, vma->pages, vma->node.start, cache_level, pte_flags); @@ -2728,12 +2733,16 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, static void ggtt_unbind_vma(struct i915_vma *vma) { - struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; + struct drm_i915_private *i915 = to_i915(vma->vm->dev); + struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt; const u64 size = min(vma->size, vma->node.size); - if (vma->flags & I915_VMA_GLOBAL_BIND) + if (vma->flags & I915_VMA_GLOBAL_BIND) { + intel_runtime_pm_get(i915); vma->vm->clear_range(vma->vm, vma->node.start, size); + intel_runtime_pm_put(i915); + } if (vma->flags & I915_VMA_LOCAL_BIND && appgtt) appgtt->base.clear_range(&appgtt->base, diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index c21bc00..71f80d2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -205,8 +205,6 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, return -EINVAL; } - intel_runtime_pm_get(dev_priv); - mutex_lock(&dev->struct_mutex); if (obj->pin_display || obj->framebuffer_references) { err = -EBUSY; @@ -302,8 +300,6 @@ err: i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); - intel_runtime_pm_put(dev_priv); - return err; } -- cgit v1.1 From 3594a3e21f1f77d46e89195f36292a2fd0f4aa83 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Oct 2016 13:42:16 +0100 Subject: drm/i915: Remove superfluous locking around userfault_list Now that we have reduced the access to the list to either (a) under the struct_mutex whilst holding the RPM wakeref (so that concurrent writers to the list are serialised by struct_mutex) and (b) under the atomic runtime suspend (which cannot run concurrently with any other accessor due to the atomic nature of the runtime suspend) we can remove the extra locking around the list itself. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 --- drivers/gpu/drm/i915/i915_gem.c | 33 ++++++++++++--------------------- 2 files changed, 12 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a53172e..ba87a85 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1361,9 +1361,6 @@ struct i915_gem_mm { */ struct list_head unbound_list; - /** Protects access to the userfault_list */ - spinlock_t userfault_lock; - /** List of all objects in gtt_space, currently mmaped by userspace. * All objects within this list must also be on bound_list. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 63bf51b..3805343 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1842,10 +1842,8 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) /* Mark as being mmapped into userspace for later revocation */ assert_rpm_wakelock_held(dev_priv); - spin_lock(&dev_priv->mm.userfault_lock); if (list_empty(&obj->userfault_link)) list_add(&obj->userfault_link, &dev_priv->mm.userfault_list); - spin_unlock(&dev_priv->mm.userfault_lock); /* Finally, remap it using the new GTT offset */ ret = remap_io_mapping(area, @@ -1922,7 +1920,6 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - bool zap = false; /* Serialisation between user GTT access and our code depends upon * revoking the CPU's PTE whilst the mutex is held. The next user @@ -1935,15 +1932,10 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj) lockdep_assert_held(&i915->drm.struct_mutex); intel_runtime_pm_get(i915); - spin_lock(&i915->mm.userfault_lock); - if (!list_empty(&obj->userfault_link)) { - list_del_init(&obj->userfault_link); - zap = true; - } - spin_unlock(&i915->mm.userfault_lock); - if (!zap) + if (list_empty(&obj->userfault_link)) goto out; + list_del_init(&obj->userfault_link); drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); @@ -1963,21 +1955,21 @@ out: void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) { - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj, *on; - spin_lock(&dev_priv->mm.userfault_lock); - while ((obj = list_first_entry_or_null(&dev_priv->mm.userfault_list, - struct drm_i915_gem_object, - userfault_link))) { - list_del_init(&obj->userfault_link); - spin_unlock(&dev_priv->mm.userfault_lock); + /* + * Only called during RPM suspend. All users of the userfault_list + * must be holding an RPM wakeref to ensure that this can not + * run concurrently with themselves (and use the struct_mutex for + * protection between themselves). + */ + list_for_each_entry_safe(obj, on, + &dev_priv->mm.userfault_list, userfault_link) { + list_del_init(&obj->userfault_link); drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); - - spin_lock(&dev_priv->mm.userfault_lock); } - spin_unlock(&dev_priv->mm.userfault_lock); } /** @@ -4547,7 +4539,6 @@ int i915_gem_init(struct drm_device *dev) int ret; mutex_lock(&dev->struct_mutex); - spin_lock_init(&dev_priv->mm.userfault_lock); if (!i915.enable_execlists) { dev_priv->gt.resume = intel_legacy_submission_resume; -- cgit v1.1 From 2eedfc7d58997cbd7fd747aa757b8ceac8a2dc50 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Oct 2016 13:42:17 +0100 Subject: drm/i915: Remove RPM sequence checking We only used the RPM sequence checking inside the lowlevel GTT accessors, when we had to rely on callers taking the wakeref on our behalf. Now that we take the RPM wakeref inside the GTT management routines themselves, we can forgo the sanitycheck of the callers. Signed-off-by: Chris Wilson Cc: Imre Deak Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_gtt.c | 55 +-------------------------------- drivers/gpu/drm/i915/intel_drv.h | 17 ---------- drivers/gpu/drm/i915/intel_pm.c | 1 - drivers/gpu/drm/i915/intel_runtime_pm.c | 3 +- 5 files changed, 2 insertions(+), 75 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ba87a85..ad1a1fb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1687,7 +1687,6 @@ struct skl_wm_level { */ struct i915_runtime_pm { atomic_t wakeref_count; - atomic_t atomic_seq; bool suspended; bool irqs_enabled; }; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 3303635..947d5ad 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2395,16 +2395,11 @@ static void gen8_ggtt_insert_page(struct i915_address_space *vm, gen8_pte_t __iomem *pte = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + (offset >> PAGE_SHIFT); - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); gen8_set_pte(pte, gen8_pte_encode(addr, level)); I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } static void gen8_ggtt_insert_entries(struct i915_address_space *vm, @@ -2418,11 +2413,8 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, gen8_pte_t __iomem *gtt_entries; gen8_pte_t gtt_entry; dma_addr_t addr; - int rpm_atomic_seq; int i = 0; - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); - gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); for_each_sgt_dma(addr, sgt_iter, st) { @@ -2446,8 +2438,6 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } struct insert_entries { @@ -2486,16 +2476,11 @@ static void gen6_ggtt_insert_page(struct i915_address_space *vm, gen6_pte_t __iomem *pte = (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + (offset >> PAGE_SHIFT); - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); iowrite32(vm->pte_encode(addr, level, flags), pte); I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } /* @@ -2515,11 +2500,8 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, gen6_pte_t __iomem *gtt_entries; gen6_pte_t gtt_entry; dma_addr_t addr; - int rpm_atomic_seq; int i = 0; - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); - gtt_entries = (gen6_pte_t __iomem *)ggtt->gsm + (start >> PAGE_SHIFT); for_each_sgt_dma(addr, sgt_iter, st) { @@ -2542,8 +2524,6 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); POSTING_READ(GFX_FLSH_CNTL_GEN6); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } static void nop_clear_range(struct i915_address_space *vm, @@ -2554,7 +2534,6 @@ static void nop_clear_range(struct i915_address_space *vm, static void gen8_ggtt_clear_range(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct drm_i915_private *dev_priv = to_i915(vm->dev); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; @@ -2562,9 +2541,6 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, (gen8_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); if (WARN(num_entries > max_entries, "First entry = %d; Num entries = %d (max=%d)\n", @@ -2576,15 +2552,12 @@ static void gen8_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) gen8_set_pte(>t_base[i], scratch_pte); readl(gtt_base); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } static void gen6_ggtt_clear_range(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct drm_i915_private *dev_priv = to_i915(vm->dev); struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); unsigned first_entry = start >> PAGE_SHIFT; unsigned num_entries = length >> PAGE_SHIFT; @@ -2592,9 +2565,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, (gen6_pte_t __iomem *)ggtt->gsm + first_entry; const int max_entries = ggtt_total_entries(ggtt) - first_entry; int i; - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); if (WARN(num_entries > max_entries, "First entry = %d; Num entries = %d (max=%d)\n", @@ -2607,8 +2577,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, for (i = 0; i < num_entries; i++) iowrite32(scratch_pte, >t_base[i]); readl(gtt_base); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } static void i915_ggtt_insert_page(struct i915_address_space *vm, @@ -2617,16 +2585,10 @@ static void i915_ggtt_insert_page(struct i915_address_space *vm, enum i915_cache_level cache_level, u32 unused) { - struct drm_i915_private *dev_priv = to_i915(vm->dev); unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } static void i915_ggtt_insert_entries(struct i915_address_space *vm, @@ -2634,33 +2596,18 @@ static void i915_ggtt_insert_entries(struct i915_address_space *vm, uint64_t start, enum i915_cache_level cache_level, u32 unused) { - struct drm_i915_private *dev_priv = to_i915(vm->dev); unsigned int flags = (cache_level == I915_CACHE_NONE) ? AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags); - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); - } static void i915_ggtt_clear_range(struct i915_address_space *vm, uint64_t start, uint64_t length) { - struct drm_i915_private *dev_priv = to_i915(vm->dev); - unsigned first_entry = start >> PAGE_SHIFT; - unsigned num_entries = length >> PAGE_SHIFT; - int rpm_atomic_seq; - - rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); - - intel_gtt_clear_range(first_entry, num_entries); - - assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); + intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT); } static int ggtt_bind_vma(struct i915_vma *vma, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c06a33e..95a7d30 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1664,23 +1664,6 @@ assert_rpm_wakelock_held(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("RPM wakelock ref not held during HW access"); } -static inline int -assert_rpm_atomic_begin(struct drm_i915_private *dev_priv) -{ - int seq = atomic_read(&dev_priv->pm.atomic_seq); - - assert_rpm_wakelock_held(dev_priv); - - return seq; -} - -static inline void -assert_rpm_atomic_end(struct drm_i915_private *dev_priv, int begin_seq) -{ - WARN_ONCE(atomic_read(&dev_priv->pm.atomic_seq) != begin_seq, - "HW access outside of RPM atomic section\n"); -} - /** * disable_rpm_wakeref_asserts - disable the RPM assert checks * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ea01b40..8a6dbfd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8043,5 +8043,4 @@ void intel_pm_setup(struct drm_device *dev) dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); - atomic_set(&dev_priv->pm.atomic_seq, 0); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index ee56a87..82edba2 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -2736,8 +2736,7 @@ void intel_runtime_pm_put(struct drm_i915_private *dev_priv) struct device *kdev = &pdev->dev; assert_rpm_wakelock_held(dev_priv); - if (atomic_dec_and_test(&dev_priv->pm.wakeref_count)) - atomic_inc(&dev_priv->pm.atomic_seq); + atomic_dec(&dev_priv->pm.wakeref_count); pm_runtime_mark_last_busy(kdev); pm_runtime_put_autosuspend(kdev); -- cgit v1.1 From 7c108fd8feac9a4f380c22444ff8527e8d25bc75 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 24 Oct 2016 13:42:18 +0100 Subject: drm/i915: Move fence cancellation to runtime suspend At the moment, we have dependency on the RPM as a barrier itself in both i915_gem_release_all_mmaps() and i915_gem_restore_fences(). i915_gem_restore_fences() is also called along !runtime pm paths, but we can move the markup of lost fences alongside releasing the mmaps into a common i915_gem_runtime_suspend(). This has the advantage of locating all the tricky barrier dependencies into one location. v2: Just mark the fence as invalid (fence->dirty) so that upon waking we will be sure to clear the fence after use, or restore it to the correct value before use. This makes sure that if the fence is left intact across the sleep, we do not leave it pointing to a region of GTT for the next unsuspecting user. Suggested-by: Daniel Vetter Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: Imre Deak Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/20161024124218.18252-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 6 ++---- drivers/gpu/drm/i915/i915_drv.h | 3 ++- drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++-- drivers/gpu/drm/i915/i915_gem_fence.c | 12 +++++------- 4 files changed, 28 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 885d33f..99e4e04 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2278,10 +2278,8 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv, vlv_check_no_gt_access(dev_priv); - if (rpm_resume) { + if (rpm_resume) intel_init_clock_gating(dev); - i915_gem_restore_fences(dev); - } return ret; } @@ -2307,7 +2305,7 @@ static int intel_runtime_suspend(struct device *kdev) * We are safe here against re-faults, since the fault handler takes * an RPM reference. */ - i915_gem_release_all_mmaps(dev_priv); + i915_gem_runtime_suspend(dev_priv); intel_guc_suspend(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ad1a1fb..dd3acab 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3130,9 +3130,10 @@ void i915_vma_destroy(struct i915_vma *vma); int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); -void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); +void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); + int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 3805343..58d4ad2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1952,10 +1952,10 @@ out: intel_runtime_pm_put(i915); } -void -i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) +void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv) { struct drm_i915_gem_object *obj, *on; + int i; /* * Only called during RPM suspend. All users of the userfault_list @@ -1970,6 +1970,23 @@ i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv) drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->anon_inode->i_mapping); } + + /* The fence will be lost when the device powers down. If any were + * in use by hardware (i.e. they are pinned), we should not be powering + * down! All other fences will be reacquired by the user upon waking. + */ + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; + + if (WARN_ON(reg->pin_count)) + continue; + + if (!reg->vma) + continue; + + GEM_BUG_ON(!list_empty(®->vma->obj->userfault_link)); + reg->dirty = true; + } } /** diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 6701317..3c5a808 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -343,6 +343,9 @@ i915_vma_get_fence(struct i915_vma *vma) struct drm_i915_fence_reg *fence; struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; + /* Note that we revoke fences on runtime suspend. Therefore the user + * must keep the device awake whilst using the fence. + */ assert_rpm_wakelock_held(to_i915(vma->vm->dev)); /* Just update our place in the LRU if our fence is getting reused. */ @@ -368,19 +371,14 @@ i915_vma_get_fence(struct i915_vma *vma) * @dev: DRM device * * Restore the hw fence state to match the software tracking again, to be called - * after a gpu reset and on resume. + * after a gpu reset and on resume. Note that on runtime suspend we only cancel + * the fences, to be reacquired by the user later. */ void i915_gem_restore_fences(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); int i; - /* Note that this may be called outside of struct_mutex, by - * runtime suspend/resume. The barrier we require is enforced by - * rpm itself - all access to fences/GTT are only within an rpm - * wakeref, and to acquire that wakeref you must pass through here. - */ - for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct i915_vma *vma = reg->vma; -- cgit v1.1 From 79f2624b1b9f776b173b41d743fb3dd7374b3827 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Oct 2016 13:55:45 -0200 Subject: drm/i915/fbc: fix CFB size calculation for gen8+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadwell and newer actually compress up to 2560 lines instead of 2048 (as documented in the FBC_CTL page). If we don't take this into consideration we end up reserving too little stolen memory for the CFB, so we may allocate something else (such as a ring) right after what we reserved, and the hardware will overwrite it with the contents of the CFB when FBC is active, causing GPU hangs. Another possibility is that the CFB may be allocated at the very end of the available space, so the CFB will overlap the reserved stolen area, leading to FIFO underruns. This bug has always been a problem on BDW (the only affected platform where FBC is enabled by default), but it's much easier to reproduce since the following commit: commit c58b735fc762e891481e92af7124b85cb0a51fce Author: Chris Wilson Date: Thu Aug 18 17:16:57 2016 +0100 drm/i915: Allocate rings from stolen Of course, you can only reproduce the bug if your screen is taller than 2048 lines. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98213 Fixes: a98ee79317b4 ("drm/i915/fbc: enable FBC by default on HSW and BDW") Cc: # v4.6+ Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477065346-13736-1-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/intel_fbc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 617189a..6345cb8 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -104,8 +104,10 @@ static int intel_fbc_calculate_cfb_size(struct drm_i915_private *dev_priv, int lines; intel_fbc_get_plane_source_size(cache, NULL, &lines); - if (INTEL_INFO(dev_priv)->gen >= 7) + if (INTEL_GEN(dev_priv) == 7) lines = min(lines, 2048); + else if (INTEL_GEN(dev_priv) >= 8) + lines = min(lines, 2560); /* Hardware needs the full buffer stride, not just the active area. */ return lines * cache->fb.stride; -- cgit v1.1 From 0fc6a9dc94e44c8eaca364b9da03d26862d6b068 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 21 Oct 2016 13:55:46 -0200 Subject: drm/i915/fbc: fix FBC_COMPRESSION_MASK on BDW+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its size is 11:0 instead of 10:0. Found by inspecting the spec. I'm not aware of any real-world IGT failures caused by this. Signed-off-by: Paulo Zanoni Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477065346-13736-2-git-send-email-paulo.r.zanoni@intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++++++---- drivers/gpu/drm/i915/i915_reg.h | 5 +++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f85e6fb..a5e575a 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1683,11 +1683,13 @@ static int i915_fbc_status(struct seq_file *m, void *unused) seq_printf(m, "FBC disabled: %s\n", dev_priv->fbc.no_fbc_reason); - if (intel_fbc_is_active(dev_priv) && - INTEL_GEN(dev_priv) >= 7) + if (intel_fbc_is_active(dev_priv) && INTEL_GEN(dev_priv) >= 7) { + uint32_t mask = INTEL_GEN(dev_priv) >= 8 ? + BDW_FBC_COMPRESSION_MASK : + IVB_FBC_COMPRESSION_MASK; seq_printf(m, "Compressing: %s\n", - yesno(I915_READ(FBC_STATUS2) & - FBC_COMPRESSION_MASK)); + yesno(I915_READ(FBC_STATUS2) & mask)); + } mutex_unlock(&dev_priv->fbc.lock); intel_runtime_pm_put(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 00efaa1..a9be3f0 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2188,8 +2188,9 @@ enum skl_disp_power_wells { #define FBC_FENCE_OFF _MMIO(0x3218) /* BSpec typo has 321Bh */ #define FBC_TAG(i) _MMIO(0x3300 + (i) * 4) -#define FBC_STATUS2 _MMIO(0x43214) -#define FBC_COMPRESSION_MASK 0x7ff +#define FBC_STATUS2 _MMIO(0x43214) +#define IVB_FBC_COMPRESSION_MASK 0x7ff +#define BDW_FBC_COMPRESSION_MASK 0xfff #define FBC_LL_SIZE (1536) -- cgit v1.1 From 31f09cb79e797797bc745d98f93ac50b2cbda9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Thu, 20 Oct 2016 18:05:57 -0400 Subject: drm/i915/gvt: fix bad 32 bit shift in gtt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since ioread32 returns a 32-bit value, it is impossible to left-shift this value by 32 bits (it produces a compilation error). Casting the return value of ioread32 fix this issue. Signed-off-by: Jérémy Lefaure Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index 2cc7613..b35bda0 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -276,7 +276,7 @@ static u64 read_pte64(struct drm_i915_private *dev_priv, unsigned long index) pte = readq(addr); #else pte = ioread32(addr); - pte |= ioread32(addr + 4) << 32; + pte |= (u64)ioread32(addr + 4) << 32; #endif return pte; } -- cgit v1.1 From 2cce9b2894661fbc08d8aa67881c616dd8e669b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lefaure?= Date: Thu, 20 Oct 2016 18:05:58 -0400 Subject: drm/i915/gvt: fix an error string format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is better to use %p format for void pointers instead of casting them because a void* is not necessary a 64 bits value. Signed-off-by: Jérémy Lefaure Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index b35bda0..6554da9 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1944,7 +1944,7 @@ static int create_scratch_page(struct intel_vgpu *vgpu) mfn = intel_gvt_hypervisor_virt_to_mfn(vaddr); if (mfn == INTEL_GVT_INVALID_ADDR) { - gvt_err("fail to translate vaddr:0x%llx\n", (u64)vaddr); + gvt_err("fail to translate vaddr: 0x%p\n", vaddr); __free_page(gtt->scratch_page); gtt->scratch_page = NULL; return -ENXIO; -- cgit v1.1 From 99c79fd4dfdaeb8874d5f3f14a9cd6ce7a436e1d Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Mon, 24 Oct 2016 15:57:47 +0800 Subject: drm/i915/gvt: use well wrapped set_mask_bits() instead of defining new one Macro set_mask_bits() is ready for us, just invoke it and remove our write_bits(). Signed-off-by: Du, Changbin Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index aafb57e..477b0d4 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1201,20 +1201,19 @@ static int gen8_update_plane_mmio_from_mi_display_flip( struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; struct intel_vgpu *vgpu = s->vgpu; -#define write_bits(reg, e, s, v) do { \ - vgpu_vreg(vgpu, reg) &= ~GENMASK(e, s); \ - vgpu_vreg(vgpu, reg) |= (v << s); \ -} while (0) - - write_bits(info->surf_reg, 31, 12, info->surf_val); - if (IS_SKYLAKE(dev_priv)) - write_bits(info->stride_reg, 9, 0, info->stride_val); - else - write_bits(info->stride_reg, 15, 6, info->stride_val); - write_bits(info->ctrl_reg, IS_SKYLAKE(dev_priv) ? 12 : 10, - 10, info->tile_val); - -#undef write_bits + set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12), + info->surf_val << 12); + if (IS_SKYLAKE(dev_priv)) { + set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0), + info->stride_val); + set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10), + info->tile_val << 10); + } else { + set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6), + info->stride_val << 6); + set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10), + info->tile_val << 10); + } vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++; intel_vgpu_trigger_virtual_event(vgpu, info->event); -- cgit v1.1 From 49129204c31cb3b6f0fbc23d6afe66c88c1c12ae Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Mon, 24 Oct 2016 15:58:44 +0800 Subject: drm/i915/gvt: get msi cap offset from pdev directly Linux PCI driver saves the msi and msix capability offset in pci_dev->msi_cap and pci_dev->msix_cap. We can use msi_cap in pci_dev directly, no need hardcode. Signed-off-by: Du, Changbin Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index 31b59d4..bf03777 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -103,19 +103,20 @@ int intel_gvt_init_host(void) static void init_device_info(struct intel_gvt *gvt) { struct intel_gvt_device_info *info = &gvt->device_info; + struct pci_dev *pdev = gvt->dev_priv->drm.pdev; if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) { info->max_support_vgpus = 8; info->cfg_space_size = 256; info->mmio_size = 2 * 1024 * 1024; info->mmio_bar = 0; - info->msi_cap_offset = IS_SKYLAKE(gvt->dev_priv) ? 0xac : 0x90; info->gtt_start_offset = 8 * 1024 * 1024; info->gtt_entry_size = 8; info->gtt_entry_size_shift = 3; info->gmadr_bytes_in_cmd = 8; info->max_surface_size = 36 * 1024 * 1024; } + info->msi_cap_offset = pdev->msi_cap; } static int gvt_service_thread(void *data) -- cgit v1.1 From b1e37103f251f31344c7a5cc086968d982de1b1b Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:27 +0530 Subject: drm/i915: Decouple GuC log setup from verbosity parameter GuC Log buffer allocation was tied up with verbosity level module param i915.guc_log_level. User would be given a provision to enable firmware logging at runtime, through a host2guc action, and not necessarily during Driver load time. But the address of log buffer can be passed only in init params, at firmware load time, so GuC has to be reset and firmware needs to be reloaded to pass the log buffer address at runtime. To avoid reset of GuC & reload of firmware, allocation of log buffer will be done always but logging would be enabled initially on GuC side based on the value of module parameter guc_log_level. v2: Update commit message to describe the constraint with allocation of log buffer at runtime. (Tvrtko) v3: Rebase. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 3 --- drivers/gpu/drm/i915/intel_guc_loader.c | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a1f76c8..a1bea7e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -853,9 +853,6 @@ static void guc_log_create(struct intel_guc *guc) unsigned long offset; uint32_t size, flags; - if (i915.guc_log_level < GUC_LOG_VERBOSITY_MIN) - return; - if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX) i915.guc_log_level = GUC_LOG_VERBOSITY_MAX; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 3c8eaae..740be0b 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -211,11 +211,13 @@ static void guc_params_init(struct drm_i915_private *dev_priv) params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | GUC_CTL_VCS2_ENABLED; + params[GUC_CTL_LOG_PARAMS] = guc->log_flags; + if (i915.guc_log_level >= 0) { - params[GUC_CTL_LOG_PARAMS] = guc->log_flags; params[GUC_CTL_DEBUG] = i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; - } + } else + params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED; if (guc->ads_vma) { u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; -- cgit v1.1 From 5d34e85a9ea526611445ffedbf883c94b55ee780 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:28 +0530 Subject: drm/i915: Add GuC ukernel logging related fields to fw interface file The first page of the GuC log buffer contains state info or meta data which is required to parse the logs contained in the subsequent pages. The structure representing the state info is added to interface file as Driver would need to handle log buffer flush interrupts from GuC. Added an enum for the different message/event types that can be send by the GuC ukernel to Host. Also added 2 new Host to GuC action types to inform GuC when Host has flushed the log buffer and forcefuly cause the GuC to send a new log buffer flush interrupt. v2: - Make documentation of log buffer state structure more elaborate & rename LOGBUFFERFLUSH action to LOG_BUFFER_FLUSH for consistency.(Tvrtko) v3: Add GuC log buffer layout diagram for more clarity. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_guc_fwif.h | 78 +++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index e40db2d..adb1ffd 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -419,15 +419,87 @@ struct guc_ads { u32 reserved2[4]; } __packed; +/* GuC logging structures */ + +enum guc_log_buffer_type { + GUC_ISR_LOG_BUFFER, + GUC_DPC_LOG_BUFFER, + GUC_CRASH_DUMP_LOG_BUFFER, + GUC_MAX_LOG_BUFFER +}; + +/** + * DOC: GuC Log buffer Layout + * + * Page0 +-------------------------------+ + * | ISR state header (32 bytes) | + * | DPC state header | + * | Crash dump state header | + * Page1 +-------------------------------+ + * | ISR logs | + * Page5 +-------------------------------+ + * | DPC logs | + * Page9 +-------------------------------+ + * | Crash Dump logs | + * +-------------------------------+ + * + * Below state structure is used for coordination of retrieval of GuC firmware + * logs. Separate state is maintained for each log buffer type. + * read_ptr points to the location where i915 read last in log buffer and + * is read only for GuC firmware. write_ptr is incremented by GuC with number + * of bytes written for each log entry and is read only for i915. + * When any type of log buffer becomes half full, GuC sends a flush interrupt. + * GuC firmware expects that while it is writing to 2nd half of the buffer, + * first half would get consumed by Host and then get a flush completed + * acknowledgment from Host, so that it does not end up doing any overwrite + * causing loss of logs. So when buffer gets half filled & i915 has requested + * for interrupt, GuC will set flush_to_file field, set the sampled_write_ptr + * to the value of write_ptr and raise the interrupt. + * On receiving the interrupt i915 should read the buffer, clear flush_to_file + * field and also update read_ptr with the value of sample_write_ptr, before + * sending an acknowledgment to GuC. marker & version fields are for internal + * usage of GuC and opaque to i915. buffer_full_cnt field is incremented every + * time GuC detects the log buffer overflow. + */ +struct guc_log_buffer_state { + u32 marker[2]; + u32 read_ptr; + u32 write_ptr; + u32 size; + u32 sampled_write_ptr; + union { + struct { + u32 flush_to_file:1; + u32 buffer_full_cnt:4; + u32 reserved:27; + }; + u32 flags; + }; + u32 version; +} __packed; + +union guc_log_control { + struct { + u32 logging_enabled:1; + u32 reserved1:3; + u32 verbosity:4; + u32 reserved2:24; + }; + u32 value; +} __packed; + /* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */ enum host2guc_action { HOST2GUC_ACTION_DEFAULT = 0x0, HOST2GUC_ACTION_SAMPLE_FORCEWAKE = 0x6, HOST2GUC_ACTION_ALLOCATE_DOORBELL = 0x10, HOST2GUC_ACTION_DEALLOCATE_DOORBELL = 0x20, + HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE = 0x30, + HOST2GUC_ACTION_FORCE_LOG_BUFFER_FLUSH = 0x302, HOST2GUC_ACTION_ENTER_S_STATE = 0x501, HOST2GUC_ACTION_EXIT_S_STATE = 0x502, HOST2GUC_ACTION_SLPC_REQUEST = 0x3003, + HOST2GUC_ACTION_UK_LOG_ENABLE_LOGGING = 0x0E000, HOST2GUC_ACTION_LIMIT }; @@ -449,4 +521,10 @@ enum guc2host_status { GUC2HOST_STATUS_GENERIC_FAIL = GUC2HOST_STATUS(0x0000F000) }; +/* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ +enum guc2host_message { + GUC2HOST_MSG_CRASH_DUMP_POSTED = (1 << 1), + GUC2HOST_MSG_FLUSH_LOG_BUFFER = (1 << 3) +}; + #endif -- cgit v1.1 From d6b40b4b179d5c3f57d1191052d4527af7321ad6 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:29 +0530 Subject: drm/i915: New structure to contain GuC logging related fields So far there were 2 fields related to GuC logs in 'intel_guc' structure. For the support of capturing GuC logs & storing them in a local buffer, multiple new fields would have to be added. This warrants a separate structure to contain the fields related to GuC logging state. Added a new structure 'intel_guc_log' and instance of it inside 'intel_guc' structure. v2: Rebase. Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 4 ++-- drivers/gpu/drm/i915/i915_guc_submission.c | 8 ++++---- drivers/gpu/drm/i915/intel_guc.h | 8 ++++++-- drivers/gpu/drm/i915/intel_guc_loader.c | 2 +- 4 files changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4751b61..c571639 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2500,10 +2500,10 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) struct drm_i915_gem_object *obj; int i = 0, pg; - if (!dev_priv->guc.log_vma) + if (!dev_priv->guc.log.vma) return 0; - obj = dev_priv->guc.log_vma->obj; + obj = dev_priv->guc.log.vma->obj; for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index a1bea7e..5e76b92 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -862,7 +862,7 @@ static void guc_log_create(struct intel_guc *guc) GUC_LOG_ISR_PAGES + 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - vma = guc->log_vma; + vma = guc->log.vma; if (!vma) { vma = guc_allocate_vma(guc, size); if (IS_ERR(vma)) { @@ -871,7 +871,7 @@ static void guc_log_create(struct intel_guc *guc) return; } - guc->log_vma = vma; + guc->log.vma = vma; } /* each allocated unit is a page */ @@ -881,7 +881,7 @@ static void guc_log_create(struct intel_guc *guc) (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; } static void guc_policies_init(struct guc_policies *policies) @@ -1065,7 +1065,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; i915_vma_unpin_and_release(&guc->ads_vma); - i915_vma_unpin_and_release(&guc->log_vma); + i915_vma_unpin_and_release(&guc->log.vma); if (guc->ctx_pool_vma) ida_destroy(&guc->ctx_ids); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 5cdf7aa..c732941 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -123,10 +123,14 @@ struct intel_guc_fw { uint32_t ucode_offset; }; +struct intel_guc_log { + uint32_t flags; + struct i915_vma *vma; +}; + struct intel_guc { struct intel_guc_fw guc_fw; - uint32_t log_flags; - struct i915_vma *log_vma; + struct intel_guc_log log; struct i915_vma *ads_vma; struct i915_vma *ctx_pool_vma; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 740be0b..a75abb8 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -211,7 +211,7 @@ static void guc_params_init(struct drm_i915_private *dev_priv) params[GUC_CTL_FEATURE] |= GUC_CTL_DISABLE_SCHEDULER | GUC_CTL_VCS2_ENABLED; - params[GUC_CTL_LOG_PARAMS] = guc->log_flags; + params[GUC_CTL_LOG_PARAMS] = guc->log.flags; if (i915.guc_log_level >= 0) { params[GUC_CTL_DEBUG] = -- cgit v1.1 From f4e9af4f5af5dab9aee632f3aff0bd8040f1b2c5 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:30 +0530 Subject: drm/i915: Add low level set of routines for programming PM IER/IIR/IMR register set So far PM IER/IIR/IMR registers were being used only for Turbo related interrupts. But interrupts coming from GuC also use the same set. As a precursor to supporting GuC interrupts, added new low level routines so as to allow sharing the programming of PM IER/IIR/IMR registers between Turbo & GuC. Also similar to PM IMR, maintaining a bitmask for PM IER register, to allow easy sharing of it between Turbo & GuC without involving a rmw operation. v2: - For appropriateness & avoid any ambiguity, rename old functions enable/disable pm_irq to mask/unmask pm_irq and rename new functions enable/disable pm_interrupts to enable/disable pm_irq. (Tvrtko) - Use u32 in place of uint32_t. (Tvrtko) v3: - Rename the fields pm_irq_mask & pm_ier_mask and do some cleanup. (Chris) - Rebase. v4: Fix the inadvertent disabling of User interrupt for VECS ring causing failure for certain IGTs. v5: Use dev_priv with HAS_VEBOX macro. (Tvrtko) Suggested-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/i915_irq.c | 75 ++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_drv.h | 3 ++ drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +- 4 files changed, 57 insertions(+), 28 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dd3acab..58d332b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1836,7 +1836,8 @@ struct drm_i915_private { u32 de_irq_mask[I915_MAX_PIPES]; }; u32 gt_irq_mask; - u32 pm_irq_mask; + u32 pm_imr; + u32 pm_ier; u32 pm_rps_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 23315e5..6412aac 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -303,18 +303,18 @@ static void snb_update_pm_irq(struct drm_i915_private *dev_priv, assert_spin_locked(&dev_priv->irq_lock); - new_val = dev_priv->pm_irq_mask; + new_val = dev_priv->pm_imr; new_val &= ~interrupt_mask; new_val |= (~enabled_irq_mask & interrupt_mask); - if (new_val != dev_priv->pm_irq_mask) { - dev_priv->pm_irq_mask = new_val; - I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->pm_irq_mask); + if (new_val != dev_priv->pm_imr) { + dev_priv->pm_imr = new_val; + I915_WRITE(gen6_pm_imr(dev_priv), dev_priv->pm_imr); POSTING_READ(gen6_pm_imr(dev_priv)); } } -void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) { if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; @@ -322,28 +322,54 @@ void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) snb_update_pm_irq(dev_priv, mask, mask); } -static void __gen6_disable_pm_irq(struct drm_i915_private *dev_priv, - uint32_t mask) +static void __gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) { snb_update_pm_irq(dev_priv, mask, 0); } -void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask) +void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask) { if (WARN_ON(!intel_irqs_enabled(dev_priv))) return; - __gen6_disable_pm_irq(dev_priv, mask); + __gen6_mask_pm_irq(dev_priv, mask); } -void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) +void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 reset_mask) { i915_reg_t reg = gen6_pm_iir(dev_priv); - spin_lock_irq(&dev_priv->irq_lock); - I915_WRITE(reg, dev_priv->pm_rps_events); - I915_WRITE(reg, dev_priv->pm_rps_events); + assert_spin_locked(&dev_priv->irq_lock); + + I915_WRITE(reg, reset_mask); + I915_WRITE(reg, reset_mask); POSTING_READ(reg); +} + +void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, u32 enable_mask) +{ + assert_spin_locked(&dev_priv->irq_lock); + + dev_priv->pm_ier |= enable_mask; + I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); + gen6_unmask_pm_irq(dev_priv, enable_mask); + /* unmask_pm_irq provides an implicit barrier (POSTING_READ) */ +} + +void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_mask) +{ + assert_spin_locked(&dev_priv->irq_lock); + + dev_priv->pm_ier &= ~disable_mask; + __gen6_mask_pm_irq(dev_priv, disable_mask); + I915_WRITE(gen6_pm_ier(dev_priv), dev_priv->pm_ier); + /* though a barrier is missing here, but don't really need a one */ +} + +void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + gen6_reset_pm_iir(dev_priv, dev_priv->pm_rps_events); dev_priv->rps.pm_iir = 0; spin_unlock_irq(&dev_priv->irq_lock); } @@ -357,8 +383,6 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) WARN_ON_ONCE(dev_priv->rps.pm_iir); WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); dev_priv->rps.interrupts_enabled = true; - I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) | - dev_priv->pm_rps_events); gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); @@ -379,9 +403,7 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) I915_WRITE(GEN6_PMINTRMSK, gen6_sanitize_rps_pm_mask(dev_priv, ~0u)); - __gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); - I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) & - ~dev_priv->pm_rps_events); + gen6_disable_pm_irq(dev_priv, dev_priv->pm_rps_events); spin_unlock_irq(&dev_priv->irq_lock); synchronize_irq(dev_priv->drm.irq); @@ -1085,7 +1107,7 @@ static void gen6_pm_rps_work(struct work_struct *work) pm_iir = dev_priv->rps.pm_iir; dev_priv->rps.pm_iir = 0; /* Make sure not to corrupt PMIMR state used by ringbuffer on GEN6 */ - gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); + gen6_unmask_pm_irq(dev_priv, dev_priv->pm_rps_events); client_boost = dev_priv->rps.client_boost; dev_priv->rps.client_boost = false; spin_unlock_irq(&dev_priv->irq_lock); @@ -1586,7 +1608,7 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) { if (pm_iir & dev_priv->pm_rps_events) { spin_lock(&dev_priv->irq_lock); - gen6_disable_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); + gen6_mask_pm_irq(dev_priv, pm_iir & dev_priv->pm_rps_events); if (dev_priv->rps.interrupts_enabled) { dev_priv->rps.pm_iir |= pm_iir & dev_priv->pm_rps_events; schedule_work(&dev_priv->rps.work); @@ -3545,11 +3567,13 @@ static void gen5_gt_irq_postinstall(struct drm_device *dev) * RPS interrupts will get enabled/disabled on demand when RPS * itself is enabled/disabled. */ - if (HAS_VEBOX(dev)) + if (HAS_VEBOX(dev_priv)) { pm_irqs |= PM_VEBOX_USER_INTERRUPT; + dev_priv->pm_ier |= PM_VEBOX_USER_INTERRUPT; + } - dev_priv->pm_irq_mask = 0xffffffff; - GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_irq_mask, pm_irqs); + dev_priv->pm_imr = 0xffffffff; + GEN5_IRQ_INIT(GEN6_PM, dev_priv->pm_imr, pm_irqs); } } @@ -3669,14 +3693,15 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) if (HAS_L3_DPF(dev_priv)) gt_interrupts[0] |= GT_RENDER_L3_PARITY_ERROR_INTERRUPT; - dev_priv->pm_irq_mask = 0xffffffff; + dev_priv->pm_ier = 0x0; + dev_priv->pm_imr = ~dev_priv->pm_ier; GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]); GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]); /* * RPS interrupts will get enabled/disabled on demand when RPS itself * is enabled/disabled. */ - GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, 0); + GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_imr, dev_priv->pm_ier); GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]); } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 16b33f5..07308eb 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1123,6 +1123,9 @@ void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); +void gen6_reset_pm_iir(struct drm_i915_private *dev_priv, u32 mask); +void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_enable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 32786ba..67a70c5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1608,7 +1608,7 @@ hsw_vebox_irq_enable(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE_IMR(engine, ~engine->irq_enable_mask); - gen6_enable_pm_irq(dev_priv, engine->irq_enable_mask); + gen6_unmask_pm_irq(dev_priv, engine->irq_enable_mask); } static void @@ -1617,7 +1617,7 @@ hsw_vebox_irq_disable(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE_IMR(engine, ~0); - gen6_disable_pm_irq(dev_priv, engine->irq_enable_mask); + gen6_mask_pm_irq(dev_priv, engine->irq_enable_mask); } static void -- cgit v1.1 From 26705e20752a4ad12dbbe334ed6402c7f0317326 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:31 +0530 Subject: drm/i915: Support for GuC interrupts There are certain types of interrupts which Host can receive from GuC. GuC ukernel sends an interrupt to Host for certain events, like for example retrieve/consume the logs generated by ukernel. This patch adds support to receive interrupts from GuC but currently enables & partially handles only the interrupt sent by GuC ukernel. Future patches will add support for handling other interrupt types. v2: - Use common low level routines for PM IER/IIR programming (Chris) - Rename interrupt functions to gen9_xxx from gen8_xxx (Chris) - Replace disabling of wake ref asserts with rpm get/put (Chris) v3: - Update comments for more clarity. (Tvrtko) - Remove the masking of GuC interrupt, which was kept masked till the start of bottom half, its not really needed as there is only a single instance of work item & wq is ordered. (Tvrtko) v4: - Rebase. - Rename guc_events to pm_guc_events so as to be indicative of the register/control block it is associated with. (Chris) - Add handling for back to back log buffer flush interrupts. v5: - Move the read & clearing of register, containing Guc2Host message bits, outside the irq spinlock. (Tvrtko) v6: - Move the log buffer flush interrupt related stuff to the following patch so as to do only generic bits in this patch. (Tvrtko) - Rebase. v7: - Remove the interrupts_enabled check from gen9_guc_irq_handler, want to process that last interrupt also before disabling the interrupt, sync against the work queued by irq handler will be done by caller disabling the interrupt. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_guc_submission.c | 5 +++ drivers/gpu/drm/i915/i915_irq.c | 56 +++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_reg.h | 11 ++++++ drivers/gpu/drm/i915/intel_drv.h | 3 ++ drivers/gpu/drm/i915/intel_guc.h | 3 ++ drivers/gpu/drm/i915/intel_guc_loader.c | 4 +++ 7 files changed, 79 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 58d332b..72215b7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1839,6 +1839,7 @@ struct drm_i915_private { u32 pm_imr; u32 pm_ier; u32 pm_rps_events; + u32 pm_guc_events; u32 pipestat_irq_mask[I915_MAX_PIPES]; struct i915_hotplug hotplug; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 5e76b92..6b77af2 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1086,6 +1086,8 @@ int intel_guc_suspend(struct drm_device *dev) if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) return 0; + gen9_disable_guc_interrupts(dev_priv); + ctx = dev_priv->kernel_context; data[0] = HOST2GUC_ACTION_ENTER_S_STATE; @@ -1112,6 +1114,9 @@ int intel_guc_resume(struct drm_device *dev) if (guc->guc_fw.guc_fw_load_status != GUC_FIRMWARE_SUCCESS) return 0; + if (i915.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + ctx = dev_priv->kernel_context; data[0] = HOST2GUC_ACTION_EXIT_S_STATE; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 6412aac..b37959f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -170,6 +170,7 @@ static void gen5_assert_iir_is_zero(struct drm_i915_private *dev_priv, } while (0) static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); +static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir); /* For display hotplug interrupt */ static inline void @@ -417,6 +418,38 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) gen6_reset_rps_interrupts(dev_priv); } +void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + gen6_reset_pm_iir(dev_priv, dev_priv->pm_guc_events); + spin_unlock_irq(&dev_priv->irq_lock); +} + +void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + if (!dev_priv->guc.interrupts_enabled) { + WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & + dev_priv->pm_guc_events); + dev_priv->guc.interrupts_enabled = true; + gen6_enable_pm_irq(dev_priv, dev_priv->pm_guc_events); + } + spin_unlock_irq(&dev_priv->irq_lock); +} + +void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + dev_priv->guc.interrupts_enabled = false; + + gen6_disable_pm_irq(dev_priv, dev_priv->pm_guc_events); + + spin_unlock_irq(&dev_priv->irq_lock); + synchronize_irq(dev_priv->drm.irq); + + gen9_reset_guc_interrupts(dev_priv); +} + /** * bdw_update_port_irq - update DE port interrupt * @dev_priv: driver private @@ -1346,11 +1379,13 @@ static irqreturn_t gen8_gt_irq_ack(struct drm_i915_private *dev_priv, DRM_ERROR("The master control interrupt lied (GT3)!\n"); } - if (master_ctl & GEN8_GT_PM_IRQ) { + if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { gt_iir[2] = I915_READ_FW(GEN8_GT_IIR(2)); - if (gt_iir[2] & dev_priv->pm_rps_events) { + if (gt_iir[2] & (dev_priv->pm_rps_events | + dev_priv->pm_guc_events)) { I915_WRITE_FW(GEN8_GT_IIR(2), - gt_iir[2] & dev_priv->pm_rps_events); + gt_iir[2] & (dev_priv->pm_rps_events | + dev_priv->pm_guc_events)); ret = IRQ_HANDLED; } else DRM_ERROR("The master control interrupt lied (PM)!\n"); @@ -1382,6 +1417,9 @@ static void gen8_gt_irq_handler(struct drm_i915_private *dev_priv, if (gt_iir[2] & dev_priv->pm_rps_events) gen6_rps_irq_handler(dev_priv, gt_iir[2]); + + if (gt_iir[2] & dev_priv->pm_guc_events) + gen9_guc_irq_handler(dev_priv, gt_iir[2]); } static bool bxt_port_hotplug_long_detect(enum port port, u32 val) @@ -1628,6 +1666,13 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) } } +static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) +{ + if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) { + /* TODO: Handle events for which GuC interrupted host */ + } +} + static bool intel_pipe_handle_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -3699,7 +3744,7 @@ static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]); /* * RPS interrupts will get enabled/disabled on demand when RPS itself - * is enabled/disabled. + * is enabled/disabled. Same wil be the case for GuC interrupts. */ GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_imr, dev_priv->pm_ier); GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]); @@ -4485,6 +4530,9 @@ void intel_irq_init(struct drm_i915_private *dev_priv) INIT_WORK(&dev_priv->rps.work, gen6_pm_rps_work); INIT_WORK(&dev_priv->l3_parity.error_work, ivybridge_parity_work); + if (HAS_GUC_SCHED(dev)) + dev_priv->pm_guc_events = GEN9_GUC_TO_HOST_INT_EVENT; + /* Let's track the enabled rps events */ if (IS_VALLEYVIEW(dev_priv)) /* WaGsvRC0ResidencyMethod:vlv */ diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index a9be3f0..bdc7b35 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -6016,6 +6016,7 @@ enum { #define GEN8_DE_PIPE_A_IRQ (1<<16) #define GEN8_DE_PIPE_IRQ(pipe) (1<<(16+(pipe))) #define GEN8_GT_VECS_IRQ (1<<6) +#define GEN8_GT_GUC_IRQ (1<<5) #define GEN8_GT_PM_IRQ (1<<4) #define GEN8_GT_VCS2_IRQ (1<<3) #define GEN8_GT_VCS1_IRQ (1<<2) @@ -6027,6 +6028,16 @@ enum { #define GEN8_GT_IIR(which) _MMIO(0x44308 + (0x10 * (which))) #define GEN8_GT_IER(which) _MMIO(0x4430c + (0x10 * (which))) +#define GEN9_GUC_TO_HOST_INT_EVENT (1<<31) +#define GEN9_GUC_EXEC_ERROR_EVENT (1<<30) +#define GEN9_GUC_DISPLAY_EVENT (1<<29) +#define GEN9_GUC_SEMA_SIGNAL_EVENT (1<<28) +#define GEN9_GUC_IOMMU_MSG_EVENT (1<<27) +#define GEN9_GUC_DB_RING_EVENT (1<<26) +#define GEN9_GUC_DMA_DONE_EVENT (1<<25) +#define GEN9_GUC_FATAL_ERROR_EVENT (1<<24) +#define GEN9_GUC_NOTIFICATION_EVENT (1<<23) + #define GEN8_RCS_IRQ_SHIFT 0 #define GEN8_BCS_IRQ_SHIFT 16 #define GEN8_VCS1_IRQ_SHIFT 0 diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 07308eb..7dda79d 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1148,6 +1148,9 @@ void gen8_irq_power_well_post_enable(struct drm_i915_private *dev_priv, unsigned int pipe_mask); void gen8_irq_power_well_pre_disable(struct drm_i915_private *dev_priv, unsigned int pipe_mask); +void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv); +void gen9_enable_guc_interrupts(struct drm_i915_private *dev_priv); +void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv); /* intel_crt.c */ void intel_crt_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index c732941..81bd345 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -132,6 +132,9 @@ struct intel_guc { struct intel_guc_fw guc_fw; struct intel_guc_log log; + /* GuC2Host interrupt related state */ + bool interrupts_enabled; + struct i915_vma *ads_vma; struct i915_vma *ctx_pool_vma; struct ida ctx_ids; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index a75abb8..1aa8523 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -485,6 +485,7 @@ int intel_guc_setup(struct drm_device *dev) } guc_interrupts_release(dev_priv); + gen9_reset_guc_interrupts(dev_priv); guc_fw->guc_fw_load_status = GUC_FIRMWARE_PENDING; @@ -529,6 +530,9 @@ int intel_guc_setup(struct drm_device *dev) intel_guc_fw_status_repr(guc_fw->guc_fw_load_status)); if (i915.enable_guc_submission) { + if (i915.guc_log_level >= 0) + gen9_enable_guc_interrupts(dev_priv); + err = i915_guc_submission_enable(dev_priv); if (err) goto fail; -- cgit v1.1 From 4100b2ab3e8b2932554295ba79371d10687f1bb4 Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:32 +0530 Subject: drm/i915: Handle log buffer flush interrupt event from GuC GuC ukernel sends an interrupt to Host to flush the log buffer and expects Host to correspondingly update the read pointer information in the state structure, once it has consumed the log buffer contents by copying them to a file or buffer. Even if Host couldn't copy the contents, it can still update the read pointer so that logging state is not disturbed on GuC side. v2: - Use a dedicated workqueue for handling flush interrupt. (Tvrtko) - Reduce the overall log buffer copying time by skipping the copy of crash buffer area for regular cases and copying only the state structure data in first page. v3: - Create a vmalloc mapping of log buffer. (Chris) - Cover the flush acknowledgment under rpm get & put.(Chris) - Revert the change of skipping the copy of crash dump area, as not really needed, will be covered by subsequent patch. v4: - Destroy the wq under the same condition in which it was created, pass dev_piv pointer instead of dev to newly added GuC function, add more comments & rename variable for clarity. (Tvrtko) v5: - Allocate & destroy the dedicated wq, for handling flush interrupt, from the setup/teardown routines of GuC logging. (Chris) - Validate the log buffer size value retrieved from state structure and do some minor cleanup. (Tvrtko) - Fix error/warnings reported by checkpatch. (Tvrtko) - Rebase. v6: - Remove the interrupts_enabled check from guc_capture_logs_work, need to process that last work item also, queued just before disabling the interrupt as log buffer flush interrupt handling is a bit different case where GuC is actually expecting an ACK from host, which should be provided to keep the logging going. Sync against the work will be done by caller disabling the interrupt. - Don't sample the log buffer size value from state structure, directly use the expected value to move the pointer & do the copy and that cannot go wrong (out of bounds) as Driver only allocated the log buffer and the relay buffers. Driver should refrain from interpreting the log packet, as much possible and let Userspace parser detect the anomaly. (Chris) v7: - Use switch statement instead of 'if else' for retrieving the GuC log buffer size. (Tvrtko) - Refactored the log buffer copying function and shortended the name of couple of variables for better readability. (Tvrtko) v8: - Make the dedicated wq as a high priority one to further reduce the turnaround time of handing log buffer flush event from GuC. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 186 +++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_irq.c | 28 ++++- drivers/gpu/drm/i915/intel_guc.h | 4 + 3 files changed, 217 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6b77af2..1f27de2 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -170,6 +170,15 @@ static int host2guc_sample_forcewake(struct intel_guc *guc, return host2guc_action(guc, data, ARRAY_SIZE(data)); } +static int host2guc_logbuffer_flush_complete(struct intel_guc *guc) +{ + u32 data[1]; + + data[0] = HOST2GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE; + + return host2guc_action(guc, data, 1); +} + /* * Initialise, update, or clear doorbell data shared with the GuC * @@ -847,6 +856,163 @@ err: return NULL; } +static void guc_move_to_next_buf(struct intel_guc *guc) +{ +} + +static void *guc_get_write_buffer(struct intel_guc *guc) +{ + return NULL; +} + +static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) +{ + switch (type) { + case GUC_ISR_LOG_BUFFER: + return (GUC_LOG_ISR_PAGES + 1) * PAGE_SIZE; + case GUC_DPC_LOG_BUFFER: + return (GUC_LOG_DPC_PAGES + 1) * PAGE_SIZE; + case GUC_CRASH_DUMP_LOG_BUFFER: + return (GUC_LOG_CRASH_PAGES + 1) * PAGE_SIZE; + default: + MISSING_CASE(type); + } + + return 0; +} + +static void guc_read_update_log_buffer(struct intel_guc *guc) +{ + struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; + struct guc_log_buffer_state log_buf_state_local; + unsigned int buffer_size, write_offset; + enum guc_log_buffer_type type; + void *src_data, *dst_data; + + if (WARN_ON(!guc->log.buf_addr)) + return; + + /* Get the pointer to shared GuC log buffer */ + log_buf_state = src_data = guc->log.buf_addr; + + /* Get the pointer to local buffer to store the logs */ + log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); + + /* Actual logs are present from the 2nd page */ + src_data += PAGE_SIZE; + dst_data += PAGE_SIZE; + + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + /* Make a copy of the state structure, inside GuC log buffer + * (which is uncached mapped), on the stack to avoid reading + * from it multiple times. + */ + memcpy(&log_buf_state_local, log_buf_state, + sizeof(struct guc_log_buffer_state)); + buffer_size = guc_get_log_buffer_size(type); + write_offset = log_buf_state_local.sampled_write_ptr; + + /* Update the state of shared log buffer */ + log_buf_state->read_ptr = write_offset; + log_buf_state->flush_to_file = 0; + log_buf_state++; + + if (unlikely(!log_buf_snapshot_state)) + continue; + + /* First copy the state structure in snapshot buffer */ + memcpy(log_buf_snapshot_state, &log_buf_state_local, + sizeof(struct guc_log_buffer_state)); + + /* The write pointer could have been updated by GuC firmware, + * after sending the flush interrupt to Host, for consistency + * set write pointer value to same value of sampled_write_ptr + * in the snapshot buffer. + */ + log_buf_snapshot_state->write_ptr = write_offset; + log_buf_snapshot_state++; + + /* Now copy the actual logs. */ + memcpy(dst_data, src_data, buffer_size); + + src_data += buffer_size; + dst_data += buffer_size; + + /* FIXME: invalidate/flush for log buffer needed */ + } + + if (log_buf_snapshot_state) + guc_move_to_next_buf(guc); +} + +static void guc_capture_logs_work(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, struct drm_i915_private, guc.log.flush_work); + + i915_guc_capture_logs(dev_priv); +} + +static void guc_log_cleanup(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* First disable the flush interrupt */ + gen9_disable_guc_interrupts(dev_priv); + + if (guc->log.flush_wq) + destroy_workqueue(guc->log.flush_wq); + + guc->log.flush_wq = NULL; + + if (guc->log.buf_addr) + i915_gem_object_unpin_map(guc->log.vma->obj); + + guc->log.buf_addr = NULL; +} + +static int guc_log_create_extras(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + void *vaddr; + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + /* Nothing to do */ + if (i915.guc_log_level < 0) + return 0; + + if (!guc->log.buf_addr) { + /* Create a vmalloc mapping of log buffer pages */ + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + DRM_ERROR("Couldn't map log buffer pages %d\n", ret); + return ret; + } + + guc->log.buf_addr = vaddr; + } + + if (!guc->log.flush_wq) { + INIT_WORK(&guc->log.flush_work, guc_capture_logs_work); + + /* Need a dedicated wq to process log buffer flush interrupts + * from GuC without much delay so as to avoid any loss of logs. + */ + guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", WQ_HIGHPRI); + if (guc->log.flush_wq == NULL) { + DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); + return -ENOMEM; + } + } + + return 0; +} + static void guc_log_create(struct intel_guc *guc) { struct i915_vma *vma; @@ -872,6 +1038,13 @@ static void guc_log_create(struct intel_guc *guc) } guc->log.vma = vma; + + if (guc_log_create_extras(guc)) { + guc_log_cleanup(guc); + i915_vma_unpin_and_release(&guc->log.vma); + i915.guc_log_level = -1; + return; + } } /* each allocated unit is a page */ @@ -1065,6 +1238,7 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; i915_vma_unpin_and_release(&guc->ads_vma); + guc_log_cleanup(guc); i915_vma_unpin_and_release(&guc->log.vma); if (guc->ctx_pool_vma) @@ -1126,3 +1300,15 @@ int intel_guc_resume(struct drm_device *dev) return host2guc_action(guc, data, ARRAY_SIZE(data)); } + +void i915_guc_capture_logs(struct drm_i915_private *dev_priv) +{ + guc_read_update_log_buffer(&dev_priv->guc); + + /* Generally device is expected to be active only at this + * time, so get/put should be really quick. + */ + intel_runtime_pm_get(dev_priv); + host2guc_logbuffer_flush_complete(&dev_priv->guc); + intel_runtime_pm_put(dev_priv); +} diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index b37959f..253c221 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1669,7 +1669,33 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) { - /* TODO: Handle events for which GuC interrupted host */ + /* Sample the log buffer flush related bits & clear them out now + * itself from the message identity register to minimize the + * probability of losing a flush interrupt, when there are back + * to back flush interrupts. + * There can be a new flush interrupt, for different log buffer + * type (like for ISR), whilst Host is handling one (for DPC). + * Since same bit is used in message register for ISR & DPC, it + * could happen that GuC sets the bit for 2nd interrupt but Host + * clears out the bit on handling the 1st interrupt. + */ + u32 msg, flush; + + msg = I915_READ(SOFT_SCRATCH(15)); + flush = msg & (GUC2HOST_MSG_CRASH_DUMP_POSTED | + GUC2HOST_MSG_FLUSH_LOG_BUFFER); + if (flush) { + /* Clear the message bits that are handled */ + I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); + + /* Handle flush interrupt in bottom half */ + queue_work(dev_priv->guc.log.flush_wq, + &dev_priv->guc.log.flush_work); + } else { + /* Not clearing of unhandled event bits won't result in + * re-triggering of the interrupt. + */ + } } } diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 81bd345..fb72313 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -126,6 +126,9 @@ struct intel_guc_fw { struct intel_guc_log { uint32_t flags; struct i915_vma *vma; + void *buf_addr; + struct workqueue_struct *flush_wq; + struct work_struct flush_work; }; struct intel_guc { @@ -170,5 +173,6 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *rq); void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); +void i915_guc_capture_logs(struct drm_i915_private *dev_priv); #endif -- cgit v1.1 From f824083559af27d92db938733dba54617824d702 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:34 +0530 Subject: drm/i915: Add a relay backed debugfs interface for capturing GuC logs Added a new debugfs interface '/sys/kernel/debug/dri/guc_log' for the User to capture GuC firmware logs. Availed relay framework to implement the interface, where Driver will have to just use a relay API to store snapshots of the GuC log buffer in the buffer managed by relay. The snapshot will be taken when GuC firmware sends a log buffer flush interrupt and up to four snapshots could be stored in the relay buffer. The relay buffer will be operated in a mode where it will overwrite the data not yet collected by User. Besides mmap method, through which User can directly access the relay buffer contents, relay also supports the 'poll' method. Through the 'poll' call on log file, User can come to know whenever a new snapshot of the log buffer is taken by Driver, so can run in tandem with the Driver and capture the logs in a sustained/streaming manner, without any loss of data. v2: Defer the creation of relay channel & associated debugfs file, as debugfs setup is now done at the end of i915 Driver load. (Chris) v3: - Switch to no-overwrite mode for relay. - Fix the relay sub buffer switching sequence. v4: - Update i915 Kconfig to select RELAY config. (TvrtKo) - Log a message when there is no sub buffer available to capture the GuC log buffer. (Tvrtko) - Increase the number of relay sub buffers to 8 from 4, to have sufficient buffering for boot time logs v5: - Fix the alignment, indentation issues and some minor cleanup. (Tvrtko) - Update the comment to elaborate on why a relay channel has to be associated with the debugfs file. (Tvrtko) v6: - Move the write to 'is_global' after the NULL check on parent directory dentry pointer. (Tvrtko) v7: Add a BUG_ON to validate relay buffer allocation size. (Chris) Testcase: igt/tools/intel_guc_logger Suggested-by: Chris Wilson Signed-off-by: Sourab Gupta Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/Kconfig | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_guc_submission.c | 213 ++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_guc.h | 3 + 4 files changed, 217 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig index 45a5eb7..af13c624 100644 --- a/drivers/gpu/drm/i915/Kconfig +++ b/drivers/gpu/drm/i915/Kconfig @@ -11,6 +11,7 @@ config DRM_I915 select DRM_KMS_HELPER select DRM_PANEL select DRM_MIPI_DSI + select RELAY # i915 depends on ACPI_VIDEO when ACPI is enabled # but for select to work, need to select ACPI_VIDEO's dependencies, ick select BACKLIGHT_LCD_SUPPORT if ACPI diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 99e4e04..af3559d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1111,6 +1111,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Reveal our presence to userspace */ if (drm_dev_register(dev, 0) == 0) { i915_debugfs_register(dev_priv); + i915_guc_register(dev_priv); i915_setup_sysfs(dev_priv); } else DRM_ERROR("Failed to register driver for userspace access!\n"); @@ -1149,6 +1150,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_teardown_sysfs(dev_priv); + i915_guc_unregister(dev_priv); i915_debugfs_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1f27de2..c1d2a90 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -23,6 +23,8 @@ */ #include #include +#include +#include #include "i915_drv.h" #include "intel_guc.h" @@ -856,13 +858,160 @@ err: return NULL; } +/* + * Sub buffer switch callback. Called whenever relay has to switch to a new + * sub buffer, relay stays on the same sub buffer if 0 is returned. + */ +static int subbuf_start_callback(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + /* Use no-overwrite mode by default, where relay will stop accepting + * new data if there are no empty sub buffers left. + * There is no strict synchronization enforced by relay between Consumer + * and Producer. In overwrite mode, there is a possibility of getting + * inconsistent/garbled data, the producer could be writing on to the + * same sub buffer from which Consumer is reading. This can't be avoided + * unless Consumer is fast enough and can always run in tandem with + * Producer. + */ + if (relay_buf_full(buf)) + return 0; + + return 1; +} + +/* + * file_create() callback. Creates relay file in debugfs. + */ +static struct dentry *create_buf_file_callback(const char *filename, + struct dentry *parent, + umode_t mode, + struct rchan_buf *buf, + int *is_global) +{ + struct dentry *buf_file; + + if (!parent) + return NULL; + + /* This to enable the use of a single buffer for the relay channel and + * correspondingly have a single file exposed to User, through which + * it can collect the logs in order without any post-processing. + */ + *is_global = 1; + + /* Not using the channel filename passed as an argument, since for each + * channel relay appends the corresponding CPU number to the filename + * passed in relay_open(). This should be fine as relay just needs a + * dentry of the file associated with the channel buffer and that file's + * name need not be same as the filename passed as an argument. + */ + buf_file = debugfs_create_file("guc_log", mode, + parent, buf, &relay_file_operations); + return buf_file; +} + +/* + * file_remove() default callback. Removes relay file in debugfs. + */ +static int remove_buf_file_callback(struct dentry *dentry) +{ + debugfs_remove(dentry); + return 0; +} + +/* relay channel callbacks */ +static struct rchan_callbacks relay_callbacks = { + .subbuf_start = subbuf_start_callback, + .create_buf_file = create_buf_file_callback, + .remove_buf_file = remove_buf_file_callback, +}; + +static void guc_log_remove_relay_file(struct intel_guc *guc) +{ + relay_close(guc->log.relay_chan); +} + +static int guc_log_create_relay_file(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct rchan *guc_log_relay_chan; + struct dentry *log_dir; + size_t n_subbufs, subbuf_size; + + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ + log_dir = dev_priv->drm.primary->debugfs_root; + + /* If /sys/kernel/debug/dri/0 location do not exist, then debugfs is + * not mounted and so can't create the relay file. + * The relay API seems to fit well with debugfs only, for availing relay + * there are 3 requirements which can be met for debugfs file only in a + * straightforward/clean manner :- + * i) Need the associated dentry pointer of the file, while opening the + * relay channel. + * ii) Should be able to use 'relay_file_operations' fops for the file. + * iii) Set the 'i_private' field of file's inode to the pointer of + * relay channel buffer. + */ + if (!log_dir) { + DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); + return -ENODEV; + } + + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = guc->log.vma->obj->base.size; + + /* Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; + + guc_log_relay_chan = relay_open("guc_log", log_dir, subbuf_size, + n_subbufs, &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); + return -ENOMEM; + } + + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + /* FIXME: Cover the update under a lock ? */ + guc->log.relay_chan = guc_log_relay_chan; + return 0; +} + static void guc_move_to_next_buf(struct intel_guc *guc) { + /* Make sure the updates made in the sub buffer are visible when + * Consumer sees the following update to offset inside the sub buffer. + */ + smp_wmb(); + + /* All data has been written, so now move the offset of sub buffer. */ + relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size); + + /* Switch to the next sub buffer */ + relay_flush(guc->log.relay_chan); } static void *guc_get_write_buffer(struct intel_guc *guc) { - return NULL; + /* FIXME: Cover the check under a lock ? */ + if (!guc->log.relay_chan) + return NULL; + + /* Just get the base address of a new sub buffer and copy data into it + * ourselves. NULL will be returned in no-overwrite mode, if all sub + * buffers are full. Could have used the relay_write() to indirectly + * copy the data, but that would have been bit convoluted, as we need to + * write to only certain locations inside a sub buffer which cannot be + * done without using relay_reserve() along with relay_write(). So its + * better to use relay_reserve() alone. + */ + return relay_reserve(guc->log.relay_chan, 0); } static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) @@ -943,6 +1092,12 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) if (log_buf_snapshot_state) guc_move_to_next_buf(guc); + else { + /* Used rate limited to avoid deluge of messages, logs might be + * getting consumed by User at a slow rate. + */ + DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + } } static void guc_capture_logs_work(struct work_struct *work) @@ -967,6 +1122,11 @@ static void guc_log_cleanup(struct intel_guc *guc) guc->log.flush_wq = NULL; + if (guc->log.relay_chan) + guc_log_remove_relay_file(guc); + + guc->log.relay_chan = NULL; + if (guc->log.buf_addr) i915_gem_object_unpin_map(guc->log.vma->obj); @@ -1057,6 +1217,36 @@ static void guc_log_create(struct intel_guc *guc) guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; } +static int guc_log_late_setup(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + int ret; + + lockdep_assert_held(&dev_priv->drm.struct_mutex); + + if (i915.guc_log_level < 0) + return -EINVAL; + + /* If log_level was set as -1 at boot time, then setup needed to + * handle log buffer flush interrupts would not have been done yet, + * so do that now. + */ + ret = guc_log_create_extras(guc); + if (ret) + goto err; + + ret = guc_log_create_relay_file(guc); + if (ret) + goto err; + + return 0; +err: + guc_log_cleanup(guc); + /* logging will remain off */ + i915.guc_log_level = -1; + return ret; +} + static void guc_policies_init(struct guc_policies *policies) { struct guc_policy *policy; @@ -1238,7 +1428,6 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) struct intel_guc *guc = &dev_priv->guc; i915_vma_unpin_and_release(&guc->ads_vma); - guc_log_cleanup(guc); i915_vma_unpin_and_release(&guc->log.vma); if (guc->ctx_pool_vma) @@ -1312,3 +1501,23 @@ void i915_guc_capture_logs(struct drm_i915_private *dev_priv) host2guc_logbuffer_flush_complete(&dev_priv->guc); intel_runtime_pm_put(dev_priv); } + +void i915_guc_unregister(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_submission) + return; + + mutex_lock(&dev_priv->drm.struct_mutex); + guc_log_cleanup(&dev_priv->guc); + mutex_unlock(&dev_priv->drm.struct_mutex); +} + +void i915_guc_register(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_submission) + return; + + mutex_lock(&dev_priv->drm.struct_mutex); + guc_log_late_setup(&dev_priv->guc); + mutex_unlock(&dev_priv->drm.struct_mutex); +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index fb72313..e7999dd 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -129,6 +129,7 @@ struct intel_guc_log { void *buf_addr; struct workqueue_struct *flush_wq; struct work_struct flush_work; + struct rchan *relay_chan; }; struct intel_guc { @@ -174,5 +175,7 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); void i915_guc_capture_logs(struct drm_i915_private *dev_priv); +void i915_guc_register(struct drm_i915_private *dev_priv); +void i915_guc_unregister(struct drm_i915_private *dev_priv); #endif -- cgit v1.1 From 5dd7989bbd28a76f1740344e4e2d01dc848158a9 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:35 +0530 Subject: drm/i915: New lock to serialize the Host2GuC actions With the addition of new Host2GuC actions related to GuC logging, there is a need of a lock to serialize them, as they can execute concurrently with each other and also with other existing actions. v2: Use mutex in place of spinlock to serialize, as sleep can happen while waiting for the action's response from GuC. (Tvrtko) v3: To conform to the general rules, acquire mutex before taking the forcewake. (Tvrtko) Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +++ drivers/gpu/drm/i915/intel_guc.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index c1d2a90..cafff7c 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -87,6 +87,7 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) if (WARN_ON(len < 1 || len > 15)) return -EINVAL; + mutex_lock(&guc->action_lock); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); dev_priv->guc.action_count += 1; @@ -125,6 +126,7 @@ static int host2guc_action(struct intel_guc *guc, u32 *data, u32 len) dev_priv->guc.action_status = status; intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + mutex_unlock(&guc->action_lock); return ret; } @@ -1366,6 +1368,7 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) guc->ctx_pool_vma = vma; ida_init(&guc->ctx_ids); + mutex_init(&guc->action_lock); guc_log_create(guc); guc_addon_create(guc); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index e7999dd..907d13a 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -157,6 +157,9 @@ struct intel_guc { uint64_t submissions[I915_NUM_ENGINES]; uint32_t last_seqno[I915_NUM_ENGINES]; + + /* To serialize the Host2GuC actions */ + struct mutex action_lock; }; /* intel_guc_loader.c */ -- cgit v1.1 From 5aa1ee4b12bee127576b3ea41eeafda2c45bc118 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:36 +0530 Subject: drm/i915: Add stats for GuC log buffer flush interrupts GuC firmware sends an interrupt to flush the log buffer when it becomes half full. GuC firmware also tracks how many times the buffer overflowed. It would be useful to maintain a statistics of how many flush interrupts were received and for which type of log buffer, along with the overflow count of each buffer type. Augmented i915_log_info debugfs to report back these statistics. v2: - Update the logic to detect multiple overflows between the 2 flush interrupts and also log a message for overflow (Tvrtko) - Track the number of times there was no free sub buffer to capture the GuC log buffer. (Tvrtko) v3: - Fix the printf field width for overflow counter, set it to 10 as per the max value of u32, which takes 10 digits in decimal form. (Tvrtko) v4: - Move the log buffer overflow handling to a new function for better readability. (Tvrtko) Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 28 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_guc_submission.c | 31 +++++++++++++++++++++++++++++- drivers/gpu/drm/i915/i915_irq.c | 2 ++ drivers/gpu/drm/i915/intel_guc.h | 7 +++++++ 4 files changed, 67 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c571639..e24f69c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2416,6 +2416,32 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) return 0; } +static void i915_guc_log_info(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct intel_guc *guc = &dev_priv->guc; + + seq_puts(m, "\nGuC logging stats:\n"); + + seq_printf(m, "\tISR: flush count %10u, overflow count %10u\n", + guc->log.flush_count[GUC_ISR_LOG_BUFFER], + guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]); + + seq_printf(m, "\tDPC: flush count %10u, overflow count %10u\n", + guc->log.flush_count[GUC_DPC_LOG_BUFFER], + guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]); + + seq_printf(m, "\tCRASH: flush count %10u, overflow count %10u\n", + guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER], + guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]); + + seq_printf(m, "\tTotal flush interrupt count: %u\n", + guc->log.flush_interrupt_count); + + seq_printf(m, "\tCapture miss count: %u\n", + guc->log.capture_miss_count); +} + static void i915_guc_client_info(struct seq_file *m, struct drm_i915_private *dev_priv, struct i915_guc_client *client) @@ -2489,6 +2515,8 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "\nGuC execbuf client @ %p:\n", guc.execbuf_client); i915_guc_client_info(m, dev_priv, &client); + i915_guc_log_info(m, dev_priv); + /* Add more as required ... */ return 0; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index cafff7c..f2d71ca 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1016,6 +1016,29 @@ static void *guc_get_write_buffer(struct intel_guc *guc) return relay_reserve(guc->log.relay_chan, 0); } +static bool +guc_check_log_buf_overflow(struct intel_guc *guc, + enum guc_log_buffer_type type, unsigned int full_cnt) +{ + unsigned int prev_full_cnt = guc->log.prev_overflow_count[type]; + bool overflow = false; + + if (full_cnt != prev_full_cnt) { + overflow = true; + + guc->log.prev_overflow_count[type] = full_cnt; + guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt; + + if (full_cnt < prev_full_cnt) { + /* buffer_full_cnt is a 4 bit counter */ + guc->log.total_overflow_count[type] += 16; + } + DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); + } + + return overflow; +} + static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) { switch (type) { @@ -1036,7 +1059,7 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) { struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; struct guc_log_buffer_state log_buf_state_local; - unsigned int buffer_size, write_offset; + unsigned int buffer_size, write_offset, full_cnt; enum guc_log_buffer_type type; void *src_data, *dst_data; @@ -1062,6 +1085,11 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) sizeof(struct guc_log_buffer_state)); buffer_size = guc_get_log_buffer_size(type); write_offset = log_buf_state_local.sampled_write_ptr; + full_cnt = log_buf_state_local.buffer_full_cnt; + + /* Bookkeeping stuff */ + guc->log.flush_count[type] += log_buf_state_local.flush_to_file; + guc_check_log_buf_overflow(guc, type, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; @@ -1099,6 +1127,7 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * getting consumed by User at a slow rate. */ DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); + guc->log.capture_miss_count++; } } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 253c221..9111cfd 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1691,6 +1691,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) /* Handle flush interrupt in bottom half */ queue_work(dev_priv->guc.log.flush_wq, &dev_priv->guc.log.flush_work); + + dev_priv->guc.log.flush_interrupt_count++; } else { /* Not clearing of unhandled event bits won't result in * re-triggering of the interrupt. diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 907d13a..d034c98 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -130,6 +130,13 @@ struct intel_guc_log { struct workqueue_struct *flush_wq; struct work_struct flush_work; struct rchan *relay_chan; + + /* logging related stats */ + u32 capture_miss_count; + u32 flush_interrupt_count; + u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; + u32 flush_count[GUC_MAX_LOG_BUFFER]; }; struct intel_guc { -- cgit v1.1 From 6941f3c9ce4a86fdcc25a9af924e32c4b77cae96 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:37 +0530 Subject: drm/i915: Optimization to reduce the sampling time of GuC log buffer GuC firmware sends an interrupt to flush the log buffer when it becomes half full, so Driver doesn't really need to sample the complete buffer and can just copy only the newly written data by GuC into the local buffer, i.e. as per the read & write pointer values. Moreover the flush interrupt would generally come for one type of log buffer, when it becomes half full, so at that time the other 2 types of log buffer would comparatively have much lesser unread data in them. In case of overflow reported by GuC, Driver do need to copy the entire buffer as the whole buffer would contain the unread data. v2: Rebase. v3: Fix the blooper of doing the copy twice. (Tvrtko) v4: Add curlies for 'else' case also, matching the 'if'. (Tvrtko) Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index f2d71ca..7fb0077 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1057,11 +1057,12 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) static void guc_read_update_log_buffer(struct intel_guc *guc) { + unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; struct guc_log_buffer_state log_buf_state_local; - unsigned int buffer_size, write_offset, full_cnt; enum guc_log_buffer_type type; void *src_data, *dst_data; + bool new_overflow; if (WARN_ON(!guc->log.buf_addr)) return; @@ -1084,12 +1085,13 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) memcpy(&log_buf_state_local, log_buf_state, sizeof(struct guc_log_buffer_state)); buffer_size = guc_get_log_buffer_size(type); + read_offset = log_buf_state_local.read_ptr; write_offset = log_buf_state_local.sampled_write_ptr; full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ guc->log.flush_count[type] += log_buf_state_local.flush_to_file; - guc_check_log_buf_overflow(guc, type, full_cnt); + new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; @@ -1112,7 +1114,27 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) log_buf_snapshot_state++; /* Now copy the actual logs. */ - memcpy(dst_data, src_data, buffer_size); + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + DRM_ERROR("invalid log buffer state\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* Just copy the newly written data */ + if (read_offset > write_offset) { + memcpy(dst_data, src_data, write_offset); + bytes_to_copy = buffer_size - read_offset; + } else { + bytes_to_copy = write_offset - read_offset; + } + memcpy(dst_data + read_offset, + src_data + read_offset, bytes_to_copy); src_data += buffer_size; dst_data += buffer_size; -- cgit v1.1 From 72c0bc66ca57d1f88df1d330a84c3e362be53af3 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:38 +0530 Subject: drm/i915: Increase GuC log buffer size to reduce flush interrupts In cases where GuC generate logs at a very high rate, correspondingly the rate of flush interrupts is also very high. So far total 8 pages were allocated for storing both ISR & DPC logs. As per the half-full draining protocol followed by GuC, by doubling the number of pages, the frequency of flush interrupts can be cut down to almost half, which then helps in reducing the logging overhead. So now allocating 8 pages apiece for ISR & DPC logs. This also helps in reducing the output log file size, apart from reducing the flush interrupt count. With the original settings, 44 KB was needed for one snapshot. With modified settings, 76 KB is needed for a snapshot which will be equivalent to 2 snapshots of the original setting. So 12KB saving, every 88 KB, over the original setting. Suggested-by: Tvrtko Ursulin Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/intel_guc_fwif.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index adb1ffd..324ea90 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -104,9 +104,9 @@ #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) #define GUC_LOG_CRASH_PAGES 1 #define GUC_LOG_CRASH_SHIFT 4 -#define GUC_LOG_DPC_PAGES 3 +#define GUC_LOG_DPC_PAGES 7 #define GUC_LOG_DPC_SHIFT 6 -#define GUC_LOG_ISR_PAGES 3 +#define GUC_LOG_ISR_PAGES 7 #define GUC_LOG_ISR_SHIFT 9 #define GUC_LOG_BUF_ADDR_SHIFT 12 @@ -437,9 +437,9 @@ enum guc_log_buffer_type { * | Crash dump state header | * Page1 +-------------------------------+ * | ISR logs | - * Page5 +-------------------------------+ - * | DPC logs | * Page9 +-------------------------------+ + * | DPC logs | + * Page17 +-------------------------------+ * | Crash Dump logs | * +-------------------------------+ * -- cgit v1.1 From 27b85beae0e75232dedf5a141d3eb50f36567dd2 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:39 +0530 Subject: drm/i915: Augment i915 error state to include the dump of GuC log buffer Added the dump of GuC log buffer to i915 error state, as the contents of GuC log buffer would also be useful to determine that why the GPU reset was triggered. v2: - For uniformity use existing helper function print_error_obj() to dump out contents of GuC log buffer, pretty printing is better left to userspace. (Chris) - Skip the dumping of GuC log buffer when logging is disabled as it won't be of any use. - Rebase. v3: Rebase. Suggested-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 15 +++++++++++++++ 2 files changed, 16 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 72215b7..b4cb1f0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -778,6 +778,7 @@ struct drm_i915_error_state { struct intel_overlay_error_state *overlay; struct intel_display_error_state *display; struct drm_i915_error_object *semaphore; + struct drm_i915_error_object *guc_log; struct drm_i915_error_engine { int engine_id; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 242b9a9..48fece3 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -702,6 +702,8 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, print_error_obj(m, NULL, "Semaphores", error->semaphore); + print_error_obj(m, NULL, "GuC log buffer", error->guc_log); + if (error->overlay) intel_overlay_print_error_state(m, error->overlay); @@ -782,6 +784,7 @@ static void i915_error_state_free(struct kref *error_ref) } i915_error_object_free(error->semaphore); + i915_error_object_free(error->guc_log); for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); @@ -1408,6 +1411,17 @@ static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, error->pinned_bo = bo; } +static void i915_gem_capture_guc_log_buffer(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) +{ + /* Capturing log buf contents won't be useful if logging was disabled */ + if (!dev_priv->guc.log.vma || (i915.guc_log_level < 0)) + return; + + error->guc_log = i915_error_object_create(dev_priv, + dev_priv->guc.log.vma); +} + /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) @@ -1532,6 +1546,7 @@ static int capture(void *data) i915_gem_record_rings(error->i915, error); i915_capture_active_buffers(error->i915, error); i915_capture_pinned_buffers(error->i915, error); + i915_gem_capture_guc_log_buffer(error->i915, error); do_gettimeofday(&error->time); -- cgit v1.1 From 896a0cb0fe728b24597439db684b18420ed97fde Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:40 +0530 Subject: drm/i915: Support for forceful flush of GuC log buffer GuC firmware sends a flush interrupt to Host when the log buffer is half full and at that time only it updates the log buffer state. But in certain cases, as described below, it could be useful to have all that even when log buffer is only partially full. For that there is a force log buffer flush Host2GuC action supported by GuC firmware. For Validation requirements, a forceful flush is needed to collect the left over logs on disabling logging. The same can be done before proceeding with GPU/GuC reset as there could be some data in log buffer which is yet to be captured and those logs would be particularly useful to understand that why the reset was initiated. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 30 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 1 + 2 files changed, 31 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 7fb0077..9fd9a3a 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -183,6 +183,16 @@ static int host2guc_logbuffer_flush_complete(struct intel_guc *guc) return host2guc_action(guc, data, 1); } +static int host2guc_force_logbuffer_flush(struct intel_guc *guc) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_FORCE_LOG_BUFFER_FLUSH; + data[1] = 0; + + return host2guc_action(guc, data, 2); +} + /* * Initialise, update, or clear doorbell data shared with the GuC * @@ -1556,6 +1566,26 @@ void i915_guc_capture_logs(struct drm_i915_private *dev_priv) intel_runtime_pm_put(dev_priv); } +void i915_guc_flush_logs(struct drm_i915_private *dev_priv) +{ + if (!i915.enable_guc_submission || (i915.guc_log_level < 0)) + return; + + /* First disable the interrupts, will be renabled afterwards */ + gen9_disable_guc_interrupts(dev_priv); + + /* Before initiating the forceful flush, wait for any pending/ongoing + * flush to complete otherwise forceful flush may not actually happen. + */ + flush_work(&dev_priv->guc.log.flush_work); + + /* Ask GuC to update the log buffer state */ + host2guc_force_logbuffer_flush(&dev_priv->guc); + + /* GuC would have updated log buffer by now, so capture it */ + i915_guc_capture_logs(dev_priv); +} + void i915_guc_unregister(struct drm_i915_private *dev_priv) { if (!i915.enable_guc_submission) diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index d034c98..71da75e 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -185,6 +185,7 @@ void i915_guc_wq_unreserve(struct drm_i915_gem_request *request); void i915_guc_submission_disable(struct drm_i915_private *dev_priv); void i915_guc_submission_fini(struct drm_i915_private *dev_priv); void i915_guc_capture_logs(struct drm_i915_private *dev_priv); +void i915_guc_flush_logs(struct drm_i915_private *dev_priv); void i915_guc_register(struct drm_i915_private *dev_priv); void i915_guc_unregister(struct drm_i915_private *dev_priv); -- cgit v1.1 From 685534ef4c8faff1f69c9962ad3013e17354ff8b Mon Sep 17 00:00:00 2001 From: Sagar Arun Kamble Date: Wed, 12 Oct 2016 21:54:41 +0530 Subject: drm/i915: Debugfs support for GuC logging control This patch provides debugfs interface i915_guc_output_control for on the fly enabling/disabling of logging in GuC firmware and controlling the verbosity level of logs. The value written to the file, should have bit 0 set to enable logging and bits 4-7 should contain the verbosity info. v2: Add a forceful flush, to collect left over logs, on disabling logging. Useful for Validation. v3: Besides minor cleanup, implement read method for the debugfs file and set the guc_log_level to -1 when logging is disabled. (Tvrtko) v4: Minor cleanup & rebase. (Tvrtko) v5: - Lock struct_mutex after the NULL check for guc log buffer vma. (Chris) - Rebase. Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c | 41 ++++++++++++++++++++- drivers/gpu/drm/i915/i915_guc_submission.c | 59 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_guc.h | 1 + 3 files changed, 100 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e24f69c..be92efe 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -2548,6 +2548,44 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) return 0; } +static int i915_guc_log_control_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = to_i915(dev); + + if (!dev_priv->guc.log.vma) + return -EINVAL; + + *val = i915.guc_log_level; + + return 0; +} + +static int i915_guc_log_control_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = to_i915(dev); + int ret; + + if (!dev_priv->guc.log.vma) + return -EINVAL; + + ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + intel_runtime_pm_get(dev_priv); + ret = i915_guc_log_control(dev_priv, val); + intel_runtime_pm_put(dev_priv); + + mutex_unlock(&dev->struct_mutex); + return ret; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, + i915_guc_log_control_get, i915_guc_log_control_set, + "%lld\n"); + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -5454,7 +5492,8 @@ static const struct i915_debugfs_files { {"i915_fbc_false_color", &i915_fbc_fc_fops}, {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, - {"i915_dp_test_active", &i915_displayport_test_active_fops} + {"i915_dp_test_active", &i915_displayport_test_active_fops}, + {"i915_guc_log_control", &i915_guc_log_control_fops} }; void intel_display_crc_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 9fd9a3a..6fac8e9 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -193,6 +193,16 @@ static int host2guc_force_logbuffer_flush(struct intel_guc *guc) return host2guc_action(guc, data, 2); } +static int host2guc_logging_control(struct intel_guc *guc, u32 control_val) +{ + u32 data[2]; + + data[0] = HOST2GUC_ACTION_UK_LOG_ENABLE_LOGGING; + data[1] = control_val; + + return host2guc_action(guc, data, 2); +} + /* * Initialise, update, or clear doorbell data shared with the GuC * @@ -1605,3 +1615,52 @@ void i915_guc_register(struct drm_i915_private *dev_priv) guc_log_late_setup(&dev_priv->guc); mutex_unlock(&dev_priv->drm.struct_mutex); } + +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val) +{ + union guc_log_control log_param; + int ret; + + log_param.value = control_val; + + if (log_param.verbosity < GUC_LOG_VERBOSITY_MIN || + log_param.verbosity > GUC_LOG_VERBOSITY_MAX) + return -EINVAL; + + /* This combination doesn't make sense & won't have any effect */ + if (!log_param.logging_enabled && (i915.guc_log_level < 0)) + return 0; + + ret = host2guc_logging_control(&dev_priv->guc, log_param.value); + if (ret < 0) { + DRM_DEBUG_DRIVER("host2guc action failed %d\n", ret); + return ret; + } + + i915.guc_log_level = log_param.verbosity; + + /* If log_level was set as -1 at boot time, then the relay channel file + * wouldn't have been created by now and interrupts also would not have + * been enabled. + */ + if (!dev_priv->guc.log.relay_chan) { + ret = guc_log_late_setup(&dev_priv->guc); + if (!ret) + gen9_enable_guc_interrupts(dev_priv); + } else if (!log_param.logging_enabled) { + /* Once logging is disabled, GuC won't generate logs & send an + * interrupt. But there could be some data in the log buffer + * which is yet to be captured. So request GuC to update the log + * buffer state and then collect the left over logs. + */ + i915_guc_flush_logs(dev_priv); + + /* As logging is disabled, update log level to reflect that */ + i915.guc_log_level = -1; + } else { + /* In case interrupts were disabled, enable them now */ + gen9_enable_guc_interrupts(dev_priv); + } + + return ret; +} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 71da75e..49ced0b 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -188,5 +188,6 @@ void i915_guc_capture_logs(struct drm_i915_private *dev_priv); void i915_guc_flush_logs(struct drm_i915_private *dev_priv); void i915_guc_register(struct drm_i915_private *dev_priv); void i915_guc_unregister(struct drm_i915_private *dev_priv); +int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val); #endif -- cgit v1.1 From 717065907ff7df57c268eac7bee44fbf3ef6faab Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:42 +0530 Subject: drm/i915: Use SSE4.1 movntdqa based memcpy for sampling GuC log buffer To ensure that we always get the up-to-date data from log buffer, its better to access the buffer through an uncached CPU mapping. Also the way buffer is accessed from GuC & Host side, manually doing cache flush may not be effective always if cached CPU mapping is used. In order to avoid any performance drop & have fast reads from the GuC log buffer, used SSE4.1 movntdqa based memcpy function i915_memcpy_from_wc, as copying using movntqda from WC type memory is almost as fast as reading from WB memory. This way log buffer sampling time will not get increased and so would be able to deal with the flush interrupt storm when GuC is generating logs at a very high rate. Ideally SSE 4.1 should be present on all chipsets supporting GuC based submisssions, but if not then logging will not be enabled. v2: Rebase. v3: Squash the WC type vmalloc mapping patch with this patch. (Chris) Suggested-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 6fac8e9..8cdcc45 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1148,18 +1148,16 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) /* Just copy the newly written data */ if (read_offset > write_offset) { - memcpy(dst_data, src_data, write_offset); + i915_memcpy_from_wc(dst_data, src_data, write_offset); bytes_to_copy = buffer_size - read_offset; } else { bytes_to_copy = write_offset - read_offset; } - memcpy(dst_data + read_offset, - src_data + read_offset, bytes_to_copy); + i915_memcpy_from_wc(dst_data + read_offset, + src_data + read_offset, bytes_to_copy); src_data += buffer_size; dst_data += buffer_size; - - /* FIXME: invalidate/flush for log buffer needed */ } if (log_buf_snapshot_state) @@ -1219,8 +1217,11 @@ static int guc_log_create_extras(struct intel_guc *guc) return 0; if (!guc->log.buf_addr) { - /* Create a vmalloc mapping of log buffer pages */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WB); + /* Create a WC (Uncached for read) vmalloc mapping of log + * buffer pages, so that we can directly get the data + * (up-to-date) from memory. + */ + vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_ERROR("Couldn't map log buffer pages %d\n", ret); @@ -1263,6 +1264,16 @@ static void guc_log_create(struct intel_guc *guc) vma = guc->log.vma; if (!vma) { + /* We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (WARN_ON(!i915_memcpy_from_wc(NULL, NULL, 0))) { + /* logging will not be enabled */ + i915.guc_log_level = -1; + return; + } + vma = guc_allocate_vma(guc, size); if (IS_ERR(vma)) { /* logging will be off */ -- cgit v1.1 From 1e6b8b0dc8d48f413237c215ceaea031f3469b2d Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:43 +0530 Subject: drm/i915: Early creation of relay channel for capturing boot time logs As per the current i915 Driver load sequence, debugfs registration is done at the end and so the relay channel debugfs file is also created after that but the GuC firmware is loaded much earlier in the sequence. As a result Driver could miss capturing the boot-time logs of GuC firmware if there are flush interrupts from the GuC side. Relay has a provision to support early logging where initially only relay channel can be created, to have buffers for storing logs, and later on channel can be associated with a debugfs file at appropriate time. Have availed that, which allows Driver to capture boot time logs also, which can be collected once Userspace comes up. v2: - Remove the couple of FIXMEs, as now the relay channel will be created early before enabling the flush interrupts, so no possibility of relay channel pointer being modified & read at the same time from 2 different execution contexts. - Rebase. v3: - Add a comment to justiy setting 'is_global' before the NULL check on the parent directory dentry pointer. Suggested-by: Chris Wilson Signed-off-by: Akash Goel Reviewed-by: Tvrtko Ursulin Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 71 ++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 8cdcc45..ac8fe8b 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -915,15 +915,16 @@ static struct dentry *create_buf_file_callback(const char *filename, { struct dentry *buf_file; - if (!parent) - return NULL; - /* This to enable the use of a single buffer for the relay channel and * correspondingly have a single file exposed to User, through which * it can collect the logs in order without any post-processing. + * Need to set 'is_global' even if parent is NULL for early logging. */ *is_global = 1; + if (!parent) + return NULL; + /* Not using the channel filename passed as an argument, since for each * channel relay appends the corresponding CPU number to the filename * passed in relay_open(). This should be fine as relay just needs a @@ -956,13 +957,40 @@ static void guc_log_remove_relay_file(struct intel_guc *guc) relay_close(guc->log.relay_chan); } -static int guc_log_create_relay_file(struct intel_guc *guc) +static int guc_log_create_relay_channel(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); struct rchan *guc_log_relay_chan; - struct dentry *log_dir; size_t n_subbufs, subbuf_size; + /* Keep the size of sub buffers same as shared log buffer */ + subbuf_size = guc->log.vma->obj->base.size; + + /* Store up to 8 snapshots, which is large enough to buffer sufficient + * boot time logs and provides enough leeway to User, in terms of + * latency, for consuming the logs from relay. Also doesn't take + * up too much memory. + */ + n_subbufs = 8; + + guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, + n_subbufs, &relay_callbacks, dev_priv); + if (!guc_log_relay_chan) { + DRM_ERROR("Couldn't create relay chan for GuC logging\n"); + return -ENOMEM; + } + + GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); + guc->log.relay_chan = guc_log_relay_chan; + return 0; +} + +static int guc_log_create_relay_file(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + struct dentry *log_dir; + int ret; + /* For now create the log file in /sys/kernel/debug/dri/0 dir */ log_dir = dev_priv->drm.primary->debugfs_root; @@ -982,26 +1010,12 @@ static int guc_log_create_relay_file(struct intel_guc *guc) return -ENODEV; } - /* Keep the size of sub buffers same as shared log buffer */ - subbuf_size = guc->log.vma->obj->base.size; - - /* Store up to 8 snapshots, which is large enough to buffer sufficient - * boot time logs and provides enough leeway to User, in terms of - * latency, for consuming the logs from relay. Also doesn't take - * up too much memory. - */ - n_subbufs = 8; - - guc_log_relay_chan = relay_open("guc_log", log_dir, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); - if (!guc_log_relay_chan) { - DRM_ERROR("Couldn't create relay chan for GuC logging\n"); - return -ENOMEM; + ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir); + if (ret) { + DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); + return ret; } - GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - /* FIXME: Cover the update under a lock ? */ - guc->log.relay_chan = guc_log_relay_chan; return 0; } @@ -1021,7 +1035,6 @@ static void guc_move_to_next_buf(struct intel_guc *guc) static void *guc_get_write_buffer(struct intel_guc *guc) { - /* FIXME: Cover the check under a lock ? */ if (!guc->log.relay_chan) return NULL; @@ -1231,6 +1244,16 @@ static int guc_log_create_extras(struct intel_guc *guc) guc->log.buf_addr = vaddr; } + if (!guc->log.relay_chan) { + /* Create a relay channel, so that we have buffers for storing + * the GuC firmware logs, the channel will be linked with a file + * later on when debugfs is registered. + */ + ret = guc_log_create_relay_channel(guc); + if (ret) + return ret; + } + if (!guc->log.flush_wq) { INIT_WORK(&guc->log.flush_work, guc_capture_logs_work); -- cgit v1.1 From 7ef54de7fdf13953c1721e4b6b43e7b8d76d5521 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Wed, 12 Oct 2016 21:54:44 +0530 Subject: drm/i915: Mark the GuC log buffer flush interrupts handling WQ as freezable The GuC log buffer flush work item has to do a register access to send the ack to GuC and this work item, if not synced before suspend, can potentially get executed after the GFX device is suspended. This work item function uses rpm get/put calls around the Hw access, which covers the rpm suspend case but for system suspend a sync would be required as kernel can potentially schedule the work items even after some devices, including GFX, have been put to suspend. But sync has to be done only for the system suspend case, as sync along with rpm get/put can cause a deadlock for rpm suspend path. To have the sync, but like a NOOP, for rpm suspend path also this work item could have been queued from the irq handler only when the device is runtime active & kept active while that work item is pending or getting executed but an interrupt can come even after the device is out of use and so can potentially lead to missing of this work item. By marking the workqueue, dedicated for handling GuC log buffer flush interrupts, as freezable we don't have to bother about flushing of this work item from the suspend hooks, the pending work item if any will be either executed before the suspend or scheduled later on resume. This way the handling of log buffer flush work item can be kept same between system suspend & rpm suspend. Suggested-by: Imre Deak Cc: Imre Deak Signed-off-by: Akash Goel Reviewed-by: Imre Deak Signed-off-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index ac8fe8b..bf65ffa 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1257,10 +1257,19 @@ static int guc_log_create_extras(struct intel_guc *guc) if (!guc->log.flush_wq) { INIT_WORK(&guc->log.flush_work, guc_capture_logs_work); - /* Need a dedicated wq to process log buffer flush interrupts - * from GuC without much delay so as to avoid any loss of logs. + /* + * GuC log buffer flush work item has to do register access to + * send the ack to GuC and this work item, if not synced before + * suspend, can potentially get executed after the GFX device is + * suspended. + * By marking the WQ as freezable, we don't have to bother about + * flushing of this work item from the suspend hooks, the pending + * work item if any will be either executed before the suspend + * or scheduled later on resume. This way the handling of work + * item can be kept same between system suspend & rpm suspend. */ - guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", WQ_HIGHPRI); + guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); if (guc->log.flush_wq == NULL) { DRM_ERROR("Couldn't allocate the wq for GuC logging\n"); return -ENOMEM; -- cgit v1.1 From de867c20b94b6d52a0b10cb02f10a1f475abf7f5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 25 Oct 2016 13:16:02 +0100 Subject: drm/i915: Include the kernel uptime in the error state As well as knowing when the error occurred, it is more interesting to me to know how long after booting the error occurred, and for good measure record the time since last hw initialisation. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20161025121602.1457-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 4 ++++ drivers/gpu/drm/i915/i915_gem.c | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 12 ++++++++++-- 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b4cb1f0..7a621c7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -746,6 +746,8 @@ struct intel_display_error_state; struct drm_i915_error_state { struct kref ref; struct timeval time; + struct timeval boottime; + struct timeval uptime; struct drm_i915_private *i915; @@ -2099,6 +2101,8 @@ struct drm_i915_private { * off the idle_work. */ struct delayed_work idle_work; + + ktime_t last_init_time; } gt; /* perform PHY state sanity checks? */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0e26ee9..9361c7b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4470,6 +4470,8 @@ i915_gem_init_hw(struct drm_device *dev) enum intel_engine_id id; int ret; + dev_priv->gt.last_init_time = ktime_get(); + /* Double layer security blanket, see i915_gem_init() */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 48fece3..d5feace 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -546,9 +546,13 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } err_printf(m, "%s\n", error->error_msg); - err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, - error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); + err_printf(m, "Time: %ld s %ld us\n", + error->time.tv_sec, error->time.tv_usec); + err_printf(m, "Boottime: %ld s %ld us\n", + error->boottime.tv_sec, error->boottime.tv_usec); + err_printf(m, "Uptime: %ld s %ld us\n", + error->uptime.tv_sec, error->uptime.tv_usec); err_print_capabilities(m, &error->device_info); max_hangcheck_score = 0; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -1549,6 +1553,10 @@ static int capture(void *data) i915_gem_capture_guc_log_buffer(error->i915, error); do_gettimeofday(&error->time); + error->boottime = ktime_to_timeval(ktime_get_boottime()); + error->uptime = + ktime_to_timeval(ktime_sub(ktime_get(), + error->i915->gt.last_init_time)); error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); -- cgit v1.1 From ed4596ea992db45a27bfc3a0e401fb95ef2d5f23 Mon Sep 17 00:00:00 2001 From: Akash Goel Date: Tue, 25 Oct 2016 22:05:23 +0530 Subject: drm/i915/guc: WA to address the Ringbuffer coherency issue Driver accesses the ringbuffer pages, via GMADR BAR, if the pages are pinned in mappable aperture portion of GGTT and for ringbuffer pages allocated from Stolen memory, access can only be done through GMADR BAR. In case of GuC based submission, updates done in ringbuffer via GMADR may not get committed to memory by the time the Command streamer starts reading them, resulting in fetching of stale data. For Host based submission, such problem is not there as the write to Ring Tail or ELSP register happens from the Host side prior to submission. Access to any GFX register from CPU side goes to GTTMMADR BAR and Hw already enforces the ordering between outstanding GMADR writes & new GTTMADR access. MMIO writes from GuC side do not go to GTTMMADR BAR as GuC communication to registers within GT is contained within GT, so ordering is not enforced resulting in a race, which can manifest in form of a hang. To ensure the flush of in-flight GMADR writes, a POSTING READ is done to GuC register prior to doorbell ring. There is already a similar WA in i915_gem_object_flush_gtt_write_domain(), which takes care of GMADR writes from User space to GEM buffers, but not the ringbuffer writes from KMD. This WA is needed on all recent HW. v2: - Use POSTING_READ_FW instead of POSTING_READ as GuC register do not lie in any forcewake domain range and so the overhead of spinlock & search in the forcewake table is avoidable. (Chris) Cc: Chris Wilson Signed-off-by: Sagar Arun Kamble Signed-off-by: Akash Goel Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1477413323-1880-1-git-send-email-akash.goel@intel.com --- drivers/gpu/drm/i915/i915_guc_submission.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index bf65ffa..74235ea 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -634,6 +634,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) */ static void i915_guc_submit(struct drm_i915_gem_request *rq) { + struct drm_i915_private *dev_priv = rq->i915; unsigned int engine_id = rq->engine->id; struct intel_guc *guc = &rq->i915->guc; struct i915_guc_client *client = guc->execbuf_client; @@ -641,6 +642,11 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq) spin_lock(&client->wq_lock); guc_wq_item_append(client, rq); + + /* WA to flush out the pending GMADR writes to ring buffer. */ + if (i915_vma_is_map_and_fenceable(rq->ring->vma)) + POSTING_READ_FW(GUC_STATUS); + b_ret = guc_ring_doorbell(client); client->submissions[engine_id] += 1; -- cgit v1.1 From 64fafcf5a2fa2f3c33fa7f5957923839b36f1e8c Mon Sep 17 00:00:00 2001 From: Min He Date: Tue, 25 Oct 2016 16:26:04 +0800 Subject: drm/i915/gvt: fix an typo in skl_decode_mi_display_flip Fix type to set correct pipe number. Signed-off-by: Min He Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 477b0d4..0084ece 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1145,7 +1145,7 @@ static int skl_decode_mi_display_flip(struct parser_exec_state *s, info->event = PRIMARY_B_FLIP_DONE; break; case MI_DISPLAY_FLIP_SKL_PLANE_1_C: - info->pipe = PIPE_B; + info->pipe = PIPE_C; info->event = PRIMARY_C_FLIP_DONE; break; default: -- cgit v1.1 From b0122f3114e20f7d73520761648f8f0c141f3a30 Mon Sep 17 00:00:00 2001 From: Xiaoguang Chen Date: Tue, 25 Oct 2016 16:56:45 +0800 Subject: drm/i915/gvt: fix detect_host calling logic Like other routines, intel_gvt_hypervisor_detect_host returns 0 for success. Signed-off-by: Xiaoguang Chen Signed-off-by: Jike Song Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c index bf03777..385969a 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.c +++ b/drivers/gpu/drm/i915/gvt/gvt.c @@ -65,6 +65,8 @@ struct intel_gvt_io_emulation_ops intel_gvt_io_emulation_ops = { */ int intel_gvt_init_host(void) { + int ret; + if (intel_gvt_host.initialized) return 0; @@ -90,7 +92,8 @@ int intel_gvt_init_host(void) return -EINVAL; /* Try to detect if we're running in host instead of VM. */ - if (!intel_gvt_hypervisor_detect_host()) + ret = intel_gvt_hypervisor_detect_host(); + if (ret) return -ENODEV; gvt_dbg_core("Running with hypervisor %s in host mode\n", -- cgit v1.1 From 40d2428b3a6aaa3d6ba0de0e8780c4b4eb53426c Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 26 Oct 2016 09:38:50 +0800 Subject: drm/i915/gvt: add vreg write for GDRST handler The emulation handler for MMIO GDRST miss vreg write in it, as result the vreg cannot update correspondingly. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 3e74fb3..52af517 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -258,6 +258,7 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, u32 data; u64 bitmap = 0; + write_vreg(vgpu, offset, p_data, bytes); data = vgpu_vreg(vgpu, offset); if (data & GEN6_GRDOM_FULL) { -- cgit v1.1 From 0a8b66e3adbe302a02bb33e1a662416281ade616 Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 26 Oct 2016 13:36:41 +0800 Subject: drm/i915/gvt: correct the reset logic The current_vgpu will set to NULL after stopping the scheduler when the reset is triggered by current vgpu, so here need change the judgement condition for current vgpu detection. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 52af517..7832e49 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -239,7 +239,11 @@ static int handle_device_reset(struct intel_vgpu *vgpu, unsigned int offset, vgpu->resetting = true; intel_vgpu_stop_schedule(vgpu); - if (scheduler->current_vgpu == vgpu) { + /* + * The current_vgpu will set to NULL after stopping the + * scheduler when the reset is triggered by current vgpu. + */ + if (scheduler->current_vgpu == NULL) { mutex_unlock(&vgpu->gvt->lock); intel_gvt_wait_vgpu_idle(vgpu); mutex_lock(&vgpu->gvt->lock); -- cgit v1.1 From 07ee2bce6a516e0218dba22581803cb8f11bcf82 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 25 Oct 2016 17:40:35 +0100 Subject: drm/i915: Rotated view does not need a fence We do not need to set up a fence for the rotated view. Display does not need it and no one can access it. v2: Move code to __i915_vma_set_map_and_fenceable. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 05a20d098db1 ("drm/i915: Move map-and-fenceable tracking to the VMA") Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9361c7b..eb524bd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3790,7 +3790,12 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = (vma->node.start + fence_size <= dev_priv->ggtt.mappable_end); - if (mappable && fenceable) + /* + * Explicitly disable for rotated VMA since the display does not + * need the fence and the VMA is not accessible to other users. + */ + if (mappable && fenceable && + vma->ggtt_view.type != I915_GGTT_VIEW_ROTATED) vma->flags |= I915_VMA_CAN_FENCE; else vma->flags &= ~I915_VMA_CAN_FENCE; -- cgit v1.1 From 3299e7e43484a85eeb5c7ec09958bff05c9d0543 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 25 Oct 2016 17:41:12 +0100 Subject: drm/i915: Remove two invalid warns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Objects can have multiple VMAs used for display in which case assertion that objects must not be pinned for display more times than the current VMA is incorrect. v2: Commit message update. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 058d88c4330f ("drm/i915: Track pinned VMA") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1477413635-3876-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eb524bd..87018df9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3595,8 +3595,6 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - WARN_ON(obj->pin_display > i915_vma_pin_count(vma)); - i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3633,7 +3631,6 @@ i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); i915_vma_unpin(vma); - WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma)); } /** -- cgit v1.1 From 1aab956c7b8872fb6976328316bfad62c6e67cf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Fri, 21 Oct 2016 16:44:38 +0300 Subject: drm/i915: Refresh that status of MST capable connectors in ->detect() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once we've determined that the sink is MST capable we never end up running through the full detect cycle again, despite getting HPDs. Fix tht by ripping out the incorrect piece of code responsible. This got broken when I moved the long HPD handling to the ->detect() hook, but failed to remove the leftover code. Cc: Ander Conselvan de Oliveira Cc: drm-intel-fixes@lists.freedesktop.org Cc: Rui Tiago Matos Tested-by: Rui Tiago Matos Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98323 Cc: Kirill A. Shutemov Tested-by: Kirill A. Shutemov References: https://bugs.freedesktop.org/show_bug.cgi?id=98306 Fixes: 27d4efc5591a ("drm/i915: Move long hpd handling into the hotplug work") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477057478-29328-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_dp.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index f30db8f..80db8a3 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -4492,21 +4492,11 @@ static enum drm_connector_status intel_dp_detect(struct drm_connector *connector, bool force) { struct intel_dp *intel_dp = intel_attached_dp(connector); - struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct intel_encoder *intel_encoder = &intel_dig_port->base; enum drm_connector_status status = connector->status; DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id, connector->name); - if (intel_dp->is_mst) { - /* MST devices are disconnected from a monitor POV */ - intel_dp_unset_edid(intel_dp); - if (intel_encoder->type != INTEL_OUTPUT_EDP) - intel_encoder->type = INTEL_OUTPUT_DP; - return connector_status_disconnected; - } - /* If full detect is not performed yet, do a full detect */ if (!intel_dp->detect_done) status = intel_dp_long_pulse(intel_dp->attached_connector); -- cgit v1.1 From 9ca89c443de94751cd6fb5d84b215ec48279e8c2 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Tue, 25 Oct 2016 17:54:17 +0300 Subject: drm/i915/audio: drop extra crtc clock check from HDMI audio N lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The array contains the crtc clock, rely on that. While at it, debug log the HDMI N value or automatic mode. Cc: Ville Syrjälä Cc: "Lin, Mengdong" Cc: Libin Yang Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477407258-30599-2-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/intel_audio.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 7093cfb..1a5c212 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -254,16 +254,15 @@ hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, tmp &= ~AUD_CONFIG_N_PROG_ENABLE; tmp |= audio_config_hdmi_pixel_clock(adjusted_mode); - if (adjusted_mode->crtc_clock == TMDS_296M || - adjusted_mode->crtc_clock == TMDS_297M) { - n = audio_config_hdmi_get_n(adjusted_mode, rate); - if (n != 0) { - tmp &= ~AUD_CONFIG_N_MASK; - tmp |= AUD_CONFIG_N(n); - tmp |= AUD_CONFIG_N_PROG_ENABLE; - } else { - DRM_DEBUG_KMS("no suitable N value is found\n"); - } + n = audio_config_hdmi_get_n(adjusted_mode, rate); + if (n != 0) { + DRM_DEBUG_KMS("using N %d\n", n); + + tmp &= ~AUD_CONFIG_N_MASK; + tmp |= AUD_CONFIG_N(n); + tmp |= AUD_CONFIG_N_PROG_ENABLE; + } else { + DRM_DEBUG_KMS("using automatic N\n"); } I915_WRITE(HSW_AUD_CFG(pipe), tmp); -- cgit v1.1 From 6014ac122ed081feca99217bc57b2e15c7fc1a51 Mon Sep 17 00:00:00 2001 From: Libin Yang Date: Tue, 25 Oct 2016 17:54:18 +0300 Subject: drm/i915/audio: set proper N/M in modeset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When modeset occurs and the LS_CLK is set to some special values in DP mode, the N/M need to be set manually if audio is playing. Otherwise the first several seconds may be silent in audio playback. The relationship of Maud and Naud is expressed in the following equation: Maud/Naud = 512 * fs / f_LS_Clk Please refer VESA DisplayPort Standard spec for details. v2 by Jani: - organize Maud/Naud table according to DP 1.4 spec - add 64k and 128k audio rates - update HSW_AUD_M_CTS_ENABLE register when Maud not found - remove extra checks for port clock - simplify Maud/Naud lookup - reset patch author back to Libin Cc: "Zhang, Keqiao" Cc: Ville Syrjälä Cc: "Lin, Mengdong" Reviewed-by: Ville Syrjälä Signed-off-by: Libin Yang Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477407258-30599-3-git-send-email-jani.nikula@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 7 +++ drivers/gpu/drm/i915/intel_audio.c | 93 +++++++++++++++++++++++++++++++++++++- 2 files changed, 99 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index bdc7b35..542e570 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7370,6 +7370,13 @@ enum { #define _HSW_AUD_MISC_CTRL_B 0x65110 #define HSW_AUD_MISC_CTRL(pipe) _MMIO_PIPE(pipe, _HSW_AUD_MISC_CTRL_A, _HSW_AUD_MISC_CTRL_B) +#define _HSW_AUD_M_CTS_ENABLE_A 0x65028 +#define _HSW_AUD_M_CTS_ENABLE_B 0x65128 +#define HSW_AUD_M_CTS_ENABLE(pipe) _MMIO_PIPE(pipe, _HSW_AUD_M_CTS_ENABLE_A, _HSW_AUD_M_CTS_ENABLE_B) +#define AUD_M_CTS_M_VALUE_INDEX (1 << 21) +#define AUD_M_CTS_M_PROG_ENABLE (1 << 20) +#define AUD_CONFIG_M_MASK 0xfffff + #define _HSW_AUD_DIP_ELD_CTRL_ST_A 0x650b4 #define _HSW_AUD_DIP_ELD_CTRL_ST_B 0x651b4 #define HSW_AUD_DIP_ELD_CTRL(pipe) _MMIO_PIPE(pipe, _HSW_AUD_DIP_ELD_CTRL_ST_A, _HSW_AUD_DIP_ELD_CTRL_ST_B) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 1a5c212..813fd74 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -57,6 +57,63 @@ * struct &i915_audio_component_audio_ops @audio_ops is called from i915 driver. */ +/* DP N/M table */ +#define LC_540M 540000 +#define LC_270M 270000 +#define LC_162M 162000 + +struct dp_aud_n_m { + int sample_rate; + int clock; + u16 m; + u16 n; +}; + +/* Values according to DP 1.4 Table 2-104 */ +static const struct dp_aud_n_m dp_aud_n_m[] = { + { 32000, LC_162M, 1024, 10125 }, + { 44100, LC_162M, 784, 5625 }, + { 48000, LC_162M, 512, 3375 }, + { 64000, LC_162M, 2048, 10125 }, + { 88200, LC_162M, 1568, 5625 }, + { 96000, LC_162M, 1024, 3375 }, + { 128000, LC_162M, 4096, 10125 }, + { 176400, LC_162M, 3136, 5625 }, + { 192000, LC_162M, 2048, 3375 }, + { 32000, LC_270M, 1024, 16875 }, + { 44100, LC_270M, 784, 9375 }, + { 48000, LC_270M, 512, 5625 }, + { 64000, LC_270M, 2048, 16875 }, + { 88200, LC_270M, 1568, 9375 }, + { 96000, LC_270M, 1024, 5625 }, + { 128000, LC_270M, 4096, 16875 }, + { 176400, LC_270M, 3136, 9375 }, + { 192000, LC_270M, 2048, 5625 }, + { 32000, LC_540M, 1024, 33750 }, + { 44100, LC_540M, 784, 18750 }, + { 48000, LC_540M, 512, 11250 }, + { 64000, LC_540M, 2048, 33750 }, + { 88200, LC_540M, 1568, 18750 }, + { 96000, LC_540M, 1024, 11250 }, + { 128000, LC_540M, 4096, 33750 }, + { 176400, LC_540M, 3136, 18750 }, + { 192000, LC_540M, 2048, 11250 }, +}; + +static const struct dp_aud_n_m * +audio_config_dp_get_n_m(struct intel_crtc *intel_crtc, int rate) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dp_aud_n_m); i++) { + if (rate == dp_aud_n_m[i].sample_rate && + intel_crtc->config->port_clock == dp_aud_n_m[i].clock) + return &dp_aud_n_m[i]; + } + + return NULL; +} + static const struct { int clock; u32 config; @@ -225,16 +282,43 @@ hsw_dp_audio_config_update(struct intel_crtc *intel_crtc, enum port port, const struct drm_display_mode *adjusted_mode) { struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev); + struct i915_audio_component *acomp = dev_priv->audio_component; + int rate = acomp ? acomp->aud_sample_rate[port] : 0; + const struct dp_aud_n_m *nm = audio_config_dp_get_n_m(intel_crtc, rate); enum pipe pipe = intel_crtc->pipe; u32 tmp; + if (nm) + DRM_DEBUG_KMS("using Maud %u, Naud %u\n", nm->m, nm->n); + else + DRM_DEBUG_KMS("using automatic Maud, Naud\n"); + tmp = I915_READ(HSW_AUD_CFG(pipe)); tmp &= ~AUD_CONFIG_N_VALUE_INDEX; tmp &= ~AUD_CONFIG_PIXEL_CLOCK_HDMI_MASK; tmp &= ~AUD_CONFIG_N_PROG_ENABLE; tmp |= AUD_CONFIG_N_VALUE_INDEX; + if (nm) { + tmp &= ~AUD_CONFIG_N_MASK; + tmp |= AUD_CONFIG_N(nm->n); + tmp |= AUD_CONFIG_N_PROG_ENABLE; + } + I915_WRITE(HSW_AUD_CFG(pipe), tmp); + + tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe)); + tmp &= ~AUD_CONFIG_M_MASK; + tmp &= ~AUD_M_CTS_M_VALUE_INDEX; + tmp &= ~AUD_M_CTS_M_PROG_ENABLE; + + if (nm) { + tmp |= nm->m; + tmp |= AUD_M_CTS_M_VALUE_INDEX; + tmp |= AUD_M_CTS_M_PROG_ENABLE; + } + + I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp); } static void @@ -266,6 +350,12 @@ hsw_hdmi_audio_config_update(struct intel_crtc *intel_crtc, enum port port, } I915_WRITE(HSW_AUD_CFG(pipe), tmp); + + tmp = I915_READ(HSW_AUD_M_CTS_ENABLE(pipe)); + tmp &= ~AUD_CONFIG_M_MASK; + tmp &= ~AUD_M_CTS_M_VALUE_INDEX; + tmp |= AUD_M_CTS_M_PROG_ENABLE; + I915_WRITE(HSW_AUD_M_CTS_ENABLE(pipe), tmp); } static void @@ -686,7 +776,8 @@ static int i915_audio_component_sync_audio_rate(struct device *kdev, int port, /* 1. get the pipe */ intel_encoder = get_saved_enc(dev_priv, port, pipe); if (!intel_encoder || !intel_encoder->base.crtc || - intel_encoder->type != INTEL_OUTPUT_HDMI) { + (intel_encoder->type != INTEL_OUTPUT_HDMI && + intel_encoder->type != INTEL_OUTPUT_DP)) { DRM_DEBUG_KMS("Not valid for port %c\n", port_name(port)); err = -ENODEV; goto unlock; -- cgit v1.1 From c726ad01d2e3e27400421de85800d954df55aeea Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:24 +0300 Subject: drm/dp: Factor out helper to distinguish between branch and sink devices This check is open-coded in a few places, so it makes sense to simplify things by having a helper for it similar to the rest of DPCD feature helpers. v2: (Jani) - Move the helper to drm_dp_helper.h. - Split out this change to a separate patch. Cc: Jani Nikula Cc: dri-devel@lists.freedesktop.org Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 80db8a3..951a0bb 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1459,8 +1459,7 @@ static void intel_dp_print_hw_revision(struct intel_dp *intel_dp) if ((drm_debug & DRM_UT_KMS) == 0) return; - if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & - DP_DWN_STRM_PORT_PRESENT)) + if (!drm_dp_is_branch(intel_dp->dpcd)) return; len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_HW_REV, &rev, 1); @@ -1478,8 +1477,7 @@ static void intel_dp_print_sw_revision(struct intel_dp *intel_dp) if ((drm_debug & DRM_UT_KMS) == 0) return; - if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & - DP_DWN_STRM_PORT_PRESENT)) + if (!drm_dp_is_branch(intel_dp->dpcd)) return; len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_SW_REV, &rev, 2); @@ -3615,8 +3613,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) if (!is_edp(intel_dp) && !intel_dp->sink_count) return false; - if (!(intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] & - DP_DWN_STRM_PORT_PRESENT)) + if (!drm_dp_is_branch(intel_dp->dpcd)) return true; /* native DP sink */ if (intel_dp->dpcd[DP_DPCD_REV] == 0x10) @@ -4134,7 +4131,7 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp) return connector_status_connected; /* if there's no downstream port, we're done */ - if (!(dpcd[DP_DOWNSTREAMPORT_PRESENT] & DP_DWN_STRM_PORT_PRESENT)) + if (!drm_dp_is_branch(dpcd)) return connector_status_connected; /* If we're HPD-aware, SINK_COUNT changes dynamically */ -- cgit v1.1 From 6f172a43a6d679e4a6563376f749868a9bf29752 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:25 +0300 Subject: drm/i915/dp: Remove debug dependency of DPCD SW/HW revision read Performing DPCD AUX reads based on debug settings may introduce obscure bugs in other places that depend on the read being done (or being not done). To reduce the uncertainty perform the reads unconditionally. Cc: Mika Kahola Suggested-by: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 951a0bb..5af3907 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1456,9 +1456,6 @@ static void intel_dp_print_hw_revision(struct intel_dp *intel_dp) uint8_t rev; int len; - if ((drm_debug & DRM_UT_KMS) == 0) - return; - if (!drm_dp_is_branch(intel_dp->dpcd)) return; @@ -1474,9 +1471,6 @@ static void intel_dp_print_sw_revision(struct intel_dp *intel_dp) uint8_t rev[2]; int len; - if ((drm_debug & DRM_UT_KMS) == 0) - return; - if (!drm_dp_is_branch(intel_dp->dpcd)) return; -- cgit v1.1 From 5e8966774253778b861d5c0cbdaf66535233c873 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:26 +0300 Subject: drm/i915/dp: Print only sink or branch specific OUI based on dev type There are two separate sets of DPCD registers for the DP OUI - as well as for the device ID and HW/SW revision - based on whether the given DP device is a branch or a sink. Currently we print both branch and sink OUIs, for consistency print only the one that corresponds to the probed device. v2: - Split out this change into a separate patch. (Jani) Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 5af3907..332dc86 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3624,17 +3624,17 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) static void intel_dp_probe_oui(struct intel_dp *intel_dp) { + bool is_branch = drm_dp_is_branch(intel_dp->dpcd); u8 buf[3]; if (!(intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT)) return; - if (drm_dp_dpcd_read(&intel_dp->aux, DP_SINK_OUI, buf, 3) == 3) - DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n", - buf[0], buf[1], buf[2]); - - if (drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_OUI, buf, 3) == 3) - DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n", + if (drm_dp_dpcd_read(&intel_dp->aux, + is_branch ? DP_BRANCH_OUI : DP_SINK_OUI, + buf, 3) == 3) + DRM_DEBUG_KMS("%s OUI: %02hx%02hx%02hx\n", + is_branch ? "Branch" : "Sink", buf[0], buf[1], buf[2]); } -- cgit v1.1 From 7b3fc170d6a6ac1d1622178a2db317671cb89b9c Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 25 Oct 2016 16:12:39 +0300 Subject: drm/i915/dp: Print full branch/sink descriptor Extend the branch/sink descriptor info with the missing device ID field. While at it also read out all the descriptor registers in one transfer and make the debug print more compact. v2: (Jani) - Cache the descriptor in intel_dp. - Split out this change into a separate patch. v3: (Jani) - Fix return value check of __intel_dp_read_desc(). - Use %pE instead of %s to print the device ID. Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477401159-15098-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 63 ++++++++++++++-------------------------- drivers/gpu/drm/i915/intel_drv.h | 10 +++++++ 2 files changed, 32 insertions(+), 41 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 332dc86..bff27b7 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1451,34 +1451,35 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } -static void intel_dp_print_hw_revision(struct intel_dp *intel_dp) +static bool +__intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc) { - uint8_t rev; - int len; - - if (!drm_dp_is_branch(intel_dp->dpcd)) - return; + u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI : + DP_SINK_OUI; - len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_HW_REV, &rev, 1); - if (len < 0) - return; - - DRM_DEBUG_KMS("sink hw revision: %d.%d\n", (rev & 0xf0) >> 4, rev & 0xf); + return drm_dp_dpcd_read(&intel_dp->aux, base, desc, sizeof(*desc)) == + sizeof(*desc); } -static void intel_dp_print_sw_revision(struct intel_dp *intel_dp) +static bool intel_dp_read_desc(struct intel_dp *intel_dp) { - uint8_t rev[2]; - int len; + struct intel_dp_desc *desc = &intel_dp->desc; + bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & + DP_OUI_SUPPORT; + int dev_id_len; - if (!drm_dp_is_branch(intel_dp->dpcd)) - return; + if (!__intel_dp_read_desc(intel_dp, desc)) + return false; - len = drm_dp_dpcd_read(&intel_dp->aux, DP_BRANCH_SW_REV, &rev, 2); - if (len < 0) - return; + dev_id_len = strnlen(desc->device_id, sizeof(desc->device_id)); + DRM_DEBUG_KMS("DP %s: OUI %*phD%s dev-ID %*pE HW-rev %d.%d SW-rev %d.%d\n", + drm_dp_is_branch(intel_dp->dpcd) ? "branch" : "sink", + (int)sizeof(desc->oui), desc->oui, oui_sup ? "" : "(NS)", + dev_id_len, desc->device_id, + desc->hw_rev >> 4, desc->hw_rev & 0xf, + desc->sw_major_rev, desc->sw_minor_rev); - DRM_DEBUG_KMS("sink sw revision: %d.%d\n", rev[0], rev[1]); + return true; } static int rate_to_index(int find, const int *rates) @@ -3621,23 +3622,6 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp) return true; } -static void -intel_dp_probe_oui(struct intel_dp *intel_dp) -{ - bool is_branch = drm_dp_is_branch(intel_dp->dpcd); - u8 buf[3]; - - if (!(intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & DP_OUI_SUPPORT)) - return; - - if (drm_dp_dpcd_read(&intel_dp->aux, - is_branch ? DP_BRANCH_OUI : DP_SINK_OUI, - buf, 3) == 3) - DRM_DEBUG_KMS("%s OUI: %02hx%02hx%02hx\n", - is_branch ? "Branch" : "Sink", - buf[0], buf[1], buf[2]); -} - static bool intel_dp_can_mst(struct intel_dp *intel_dp) { @@ -4416,10 +4400,7 @@ intel_dp_long_pulse(struct intel_connector *intel_connector) intel_dp_print_rates(intel_dp); - intel_dp_probe_oui(intel_dp); - - intel_dp_print_hw_revision(intel_dp); - intel_dp_print_sw_revision(intel_dp); + intel_dp_read_desc(intel_dp); intel_dp_configure_mst(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7dda79d..65ebe92 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -883,6 +883,14 @@ enum link_m_n_set { M2_N2 }; +struct intel_dp_desc { + u8 oui[3]; + u8 device_id[6]; + u8 hw_rev; + u8 sw_major_rev; + u8 sw_minor_rev; +} __packed; + struct intel_dp { i915_reg_t output_reg; i915_reg_t aux_ch_ctl_reg; @@ -905,6 +913,8 @@ struct intel_dp { /* sink rates as reported by DP_SUPPORTED_LINK_RATES */ uint8_t num_sink_rates; int sink_rates[DP_MAX_SUPPORTED_RATES]; + /* sink or branch descriptor */ + struct intel_dp_desc desc; struct drm_dp_aux aux; uint8_t train_set[4]; int panel_power_up_delay; -- cgit v1.1 From 24e807e79f103cd9e7799eed704ed6916915d65a Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:28 +0300 Subject: drm/i915/lspcon: Fail LSPCON probe if the start of DPCD can't be read All types of DP devices (eDP, DP sink, DP branch) will fail their probe if the start of DPCD can't be read. The LSPCON PCON functionality also depends on accessing this area, so fail the probe if the read fails. Cc: Shashank Sharma Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-6-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 2 ++ drivers/gpu/drm/i915/intel_lspcon.c | 5 +++++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index bff27b7..07a502f 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -3497,7 +3497,7 @@ intel_dp_link_down(struct intel_dp *intel_dp) intel_dp->DP = DP; } -static bool +bool intel_dp_read_dpcd(struct intel_dp *intel_dp) { if (drm_dp_dpcd_read(&intel_dp->aux, 0x000, intel_dp->dpcd, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 65ebe92..b958326 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1467,6 +1467,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) return ~((1 << lane_count) - 1) & 0xf; } +bool intel_dp_read_dpcd(struct intel_dp *intel_dp); + /* intel_dp_aux_backlight.c */ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 632149c..23b817a 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -131,6 +131,11 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) } } + if (!intel_dp_read_dpcd(dp)) { + DRM_ERROR("LSPCON DPCD read failed\n"); + return false; + } + DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; } -- cgit v1.1 From 12a47a422862214d3a715a0bd93140b8176dcdc9 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:29 +0300 Subject: drm/i915/dp: Read DP descriptor for eDP and LSPCON too As for external DP sink and branch devices read and print the DP descriptor for eDP and LSPCON devices as well to aid debugging. v2: - Split out this change to a separate patch. (Jani) Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-7-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 4 +++- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_lspcon.c | 2 ++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 07a502f..6eb4364 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1461,7 +1461,7 @@ __intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc) sizeof(*desc); } -static bool intel_dp_read_desc(struct intel_dp *intel_dp) +bool intel_dp_read_desc(struct intel_dp *intel_dp) { struct intel_dp_desc *desc = &intel_dp->desc; bool oui_sup = intel_dp->dpcd[DP_DOWN_STREAM_PORT_COUNT] & @@ -3521,6 +3521,8 @@ intel_edp_init_dpcd(struct intel_dp *intel_dp) if (!intel_dp_read_dpcd(intel_dp)) return false; + intel_dp_read_desc(intel_dp); + if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11) dev_priv->no_aux_handshake = intel_dp->dpcd[DP_MAX_DOWNSPREAD] & DP_NO_AUX_HANDSHAKE_LINK_TRAINING; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b958326..20fb6e7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1468,6 +1468,7 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) } bool intel_dp_read_dpcd(struct intel_dp *intel_dp); +bool intel_dp_read_desc(struct intel_dp *intel_dp); /* intel_dp_aux_backlight.c */ int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector); diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 23b817a..c5f278b 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -136,6 +136,8 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } + intel_dp_read_desc(dp); + DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; } -- cgit v1.1 From a5d94b83ec709dbd90f17cba79142fde123d3869 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:30 +0300 Subject: drm/i915/lspcon: Get DDC adapter via container_of() instead of cached ptr We can use the container_of() magic to get to the DDC adapter, so no need for caching a pointer to it. We'll also need to get at the intel_dp ptr in the following patch, so add a helper that can be used for both purposes. Cc: Shashank Sharma Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-8-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_lspcon.c | 15 +++++++++++---- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 20fb6e7..5989c48 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -974,7 +974,6 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; - struct drm_dp_aux *aux; }; struct intel_digital_port { diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index c5f278b..3dc5a0b 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -27,10 +27,18 @@ #include #include "intel_drv.h" +static struct intel_dp *lspcon_to_intel_dp(struct intel_lspcon *lspcon) +{ + struct intel_digital_port *dig_port = + container_of(lspcon, struct intel_digital_port, lspcon); + + return &dig_port->dp; +} + static enum drm_lspcon_mode lspcon_get_current_mode(struct intel_lspcon *lspcon) { enum drm_lspcon_mode current_mode = DRM_LSPCON_MODE_INVALID; - struct i2c_adapter *adapter = &lspcon->aux->ddc; + struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; if (drm_lspcon_get_mode(adapter, ¤t_mode)) DRM_ERROR("Error reading LSPCON mode\n"); @@ -45,7 +53,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon, { int err; enum drm_lspcon_mode current_mode; - struct i2c_adapter *adapter = &lspcon->aux->ddc; + struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; err = drm_lspcon_get_mode(adapter, ¤t_mode); if (err) { @@ -72,7 +80,7 @@ static int lspcon_change_mode(struct intel_lspcon *lspcon, static bool lspcon_probe(struct intel_lspcon *lspcon) { enum drm_dp_dual_mode_type adaptor_type; - struct i2c_adapter *adapter = &lspcon->aux->ddc; + struct i2c_adapter *adapter = &lspcon_to_intel_dp(lspcon)->aux.ddc; /* Lets probe the adaptor and check its type */ adaptor_type = drm_dp_dual_mode_detect(adapter); @@ -111,7 +119,6 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) lspcon->active = false; lspcon->mode = DRM_LSPCON_MODE_INVALID; - lspcon->aux = &dp->aux; if (!lspcon_probe(lspcon)) { DRM_ERROR("Failed to probe lspcon\n"); -- cgit v1.1 From 489375c866c111f16cea93b2467ebe59c9022cc7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 24 Oct 2016 19:33:31 +0300 Subject: drm/i915/lspcon: Add workaround for resuming in PCON mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On my APL the LSPCON firmware resumes in PCON mode as opposed to the expected LS mode. It also appears to be in a state where AUX DPCD reads will succeed but return garbage recovering only after a few hundreds of milliseconds. After the recovery time DPCD reads will result in the correct values and things will continue to work. If I2C over AUX is attempted during this recovery time (implying an AUX write transaction) the firmware won't recover and will stay in this broken state. As a workaround check if the firmware is in PCON state after resume and if so wait until the correct DPCD values are returned. For this we compare the branch descriptor with the one we cached during init time. If the firmware was in the LS state, we skip the w/a and continue as before. v2: - Use the DP descriptor value cached in intel_dp. (Jani) - Get to intel_dp using container_of(), instead of a cached ptr. (Shashank) - Use usleep_range() instead of msleep(). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98353 Cc: Shashank Sharma Cc: Ville Syrjälä Cc: Jani Nikula Signed-off-by: Imre Deak Reviewed-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477326811-30431-9-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 3 +++ drivers/gpu/drm/i915/intel_lspcon.c | 37 ++++++++++++++++++++++++++++++++++++- 3 files changed, 40 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 6eb4364..8f313c1 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1451,7 +1451,7 @@ static void intel_dp_print_rates(struct intel_dp *intel_dp) DRM_DEBUG_KMS("common rates: %s\n", str); } -static bool +bool __intel_dp_read_desc(struct intel_dp *intel_dp, struct intel_dp_desc *desc) { u32 base = drm_dp_is_branch(intel_dp->dpcd) ? DP_BRANCH_OUI : diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 5989c48..c2f3863 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -974,6 +974,7 @@ struct intel_dp { struct intel_lspcon { bool active; enum drm_lspcon_mode mode; + bool desc_valid; }; struct intel_digital_port { @@ -1467,6 +1468,8 @@ static inline unsigned int intel_dp_unused_lane_mask(int lane_count) } bool intel_dp_read_dpcd(struct intel_dp *intel_dp); +bool __intel_dp_read_desc(struct intel_dp *intel_dp, + struct intel_dp_desc *desc); bool intel_dp_read_desc(struct intel_dp *intel_dp); /* intel_dp_aux_backlight.c */ diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c index 3dc5a0b..daa5234 100644 --- a/drivers/gpu/drm/i915/intel_lspcon.c +++ b/drivers/gpu/drm/i915/intel_lspcon.c @@ -97,8 +97,43 @@ static bool lspcon_probe(struct intel_lspcon *lspcon) return true; } +static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon) +{ + struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon); + unsigned long start = jiffies; + + if (!lspcon->desc_valid) + return; + + while (1) { + struct intel_dp_desc desc; + + /* + * The w/a only applies in PCON mode and we don't expect any + * AUX errors. + */ + if (!__intel_dp_read_desc(intel_dp, &desc)) + return; + + if (!memcmp(&intel_dp->desc, &desc, sizeof(desc))) { + DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n", + jiffies_to_msecs(jiffies - start)); + return; + } + + if (time_after(jiffies, start + msecs_to_jiffies(1000))) + break; + + usleep_range(10000, 15000); + } + + DRM_DEBUG_KMS("LSPCON DP descriptor mismatch after resume\n"); +} + void lspcon_resume(struct intel_lspcon *lspcon) { + lspcon_resume_in_pcon_wa(lspcon); + if (lspcon_change_mode(lspcon, DRM_LSPCON_MODE_PCON, true)) DRM_ERROR("LSPCON resume failed\n"); else @@ -143,7 +178,7 @@ bool lspcon_init(struct intel_digital_port *intel_dig_port) return false; } - intel_dp_read_desc(dp); + lspcon->desc_valid = intel_dp_read_desc(dp); DRM_DEBUG_KMS("Success: LSPCON init\n"); return true; -- cgit v1.1 From aafee2eb8ce86c13b3dde819de2f93d0ba61a040 Mon Sep 17 00:00:00 2001 From: Arkadiusz Hiler Date: Tue, 25 Oct 2016 14:48:02 +0200 Subject: drm/i915: fix comment on I915_{READ, WRITE}_FW Comment mentioned use of intel_uncore_forcewake_irq{unlock, lock} functions which are nonexistent (and never were). The description was also incomplete and could cause confusion. Updated comment is more elaborate on usage and caveats. v2: mention __locked variant of intel_uncore_forcewake_{get,put} instead of plain ones Cc: Chris Wilson Cc: Matthew Auld Cc: Mika Kuoppala Signed-off-by: Arkadiusz Hiler Reviewed-by: Matthew Auld Reviewed-by: Chris Wilson [Mika: removed two superfluous lines on comment noted by Chris] Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1477399682-3133-1-git-send-email-arkadiusz.hiler@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7a621c7..55afb66 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3844,11 +3844,30 @@ __raw_write(64, q) #undef __raw_write /* These are untraced mmio-accessors that are only valid to be used inside - * critical sections inside IRQ handlers where forcewake is explicitly + * critical sections, such as inside IRQ handlers, where forcewake is explicitly * controlled. + * * Think twice, and think again, before using these. - * Note: Should only be used between intel_uncore_forcewake_irqlock() and - * intel_uncore_forcewake_irqunlock(). + * + * As an example, these accessors can possibly be used between: + * + * spin_lock_irq(&dev_priv->uncore.lock); + * intel_uncore_forcewake_get__locked(); + * + * and + * + * intel_uncore_forcewake_put__locked(); + * spin_unlock_irq(&dev_priv->uncore.lock); + * + * + * Note: some registers may not need forcewake held, so + * intel_uncore_forcewake_{get,put} can be omitted, see + * intel_uncore_forcewake_for_reg(). + * + * Certain architectures will die if the same cacheline is concurrently accessed + * by different clients (e.g. on Ivybridge). Access to registers should + * therefore generally be serialised, by either the dev_priv->uncore.lock or + * a more localised lock guarding all access to that bank of registers. */ #define I915_READ_FW(reg__) __raw_i915_read32(dev_priv, (reg__)) #define I915_WRITE_FW(reg__, val__) __raw_i915_write32(dev_priv, (reg__), (val__)) -- cgit v1.1 From da064b47c0b8d0dff1905b38c76e7e51fb5a9547 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 24 Oct 2016 19:13:04 +0300 Subject: drm/i915: Fix SKL+ 90/270 degree rotated plane coordinate computation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pass the framebuffer size in .16 fixed point coordinates to drm_rect_rotate() since that's what the source coordinates are as well at this stage. We used to do this part of the computation in integer coordinates, but that got changed when moving the computation to happen in the check phase of the operation. Unfortunately I forgot to shift up the fb width and height appropriately. With the bogus size we ended up with some negative fb offset, which when added to the vma offset caused out scanout to start at an offset earlier than we inteded. Eg. when testing on my SKL I saw a row of incorrect tiles at the top of my screen. Cc: Tvrtko Ursulin Cc: Sivakumar Thulasimani Cc: drm-intel-fixes@lists.freedesktop.org Fixes: b63a16f6cd89 ("drm/i915: Compute display surface offset in the plane check hook for SKL+") Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477325584-23679-1-git-send-email-ville.syrjala@linux.intel.com Tested-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2a5a7c2..895b3dc 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2985,7 +2985,8 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state) /* Rotate src coordinates to match rotated GTT view */ if (drm_rotation_90_or_270(rotation)) drm_rect_rotate(&plane_state->base.src, - fb->width, fb->height, DRM_ROTATE_270); + fb->width << 16, fb->height << 16, + DRM_ROTATE_270); /* * Handle the AUX surface first since -- cgit v1.1 From 1c00164d4cd1f513ea44a1b068490e52e7cbf04f Mon Sep 17 00:00:00 2001 From: Anusha Srivatsa Date: Mon, 24 Oct 2016 17:28:21 -0700 Subject: drm/i915/DMC/KBL: Load DMC on KBL using the no_stepping_info array Currently, for display there is only one DMC image for KBL. Remove the stepping_info table for KBL and use the no_stepping_info array for loading the firmware. v2: Removed the block of code as pointed out by Rodrigo to make the loads as generic as possible. Cc: Rodrigo Vivi Signed-off-by: Anusha Srivatsa Reviewed-by: Rodrigo Vivi Acked-by: Imre Deak Signed-off-by: Rodrigo Vivi Link: http://patchwork.freedesktop.org/patch/msgid/1477355301-7035-1-git-send-email-anusha.srivatsa@intel.com --- drivers/gpu/drm/i915/intel_csr.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 1ea0e1f..d7a04bc 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -168,12 +168,6 @@ struct stepping_info { char substepping; }; -static const struct stepping_info kbl_stepping_info[] = { - {'A', '0'}, {'B', '0'}, {'C', '0'}, - {'D', '0'}, {'E', '0'}, {'F', '0'}, - {'G', '0'}, {'H', '0'}, {'I', '0'}, -}; - static const struct stepping_info skl_stepping_info[] = { {'A', '0'}, {'B', '0'}, {'C', '0'}, {'D', '0'}, {'E', '0'}, {'F', '0'}, @@ -194,10 +188,7 @@ intel_get_stepping_info(struct drm_i915_private *dev_priv) const struct stepping_info *si; unsigned int size; - if (IS_KABYLAKE(dev_priv)) { - size = ARRAY_SIZE(kbl_stepping_info); - si = kbl_stepping_info; - } else if (IS_SKYLAKE(dev_priv)) { + if (IS_SKYLAKE(dev_priv)) { size = ARRAY_SIZE(skl_stepping_info); si = skl_stepping_info; } else if (IS_BROXTON(dev_priv)) { -- cgit v1.1 From 23736d1b1b2321f7e4647d8d5f8ff16fab11d24f Mon Sep 17 00:00:00 2001 From: Ping Gao Date: Wed, 26 Oct 2016 09:38:52 +0800 Subject: drm/i915/gvt: add full vGPU reset support Full vGPU reset need to release all the shadow PPGGT pages to avoid unnecessary write-protect and also should re-initialize pvinfo after resetting vregs to keep pvinfo correct. Signed-off-by: Ping Gao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gvt.h | 2 ++ drivers/gpu/drm/i915/gvt/handlers.c | 10 ++++++++++ drivers/gpu/drm/i915/gvt/vgpu.c | 4 ++-- 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index 11df62b..62fc9e3 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -382,6 +382,8 @@ void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu); int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa); int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci); +int setup_vgpu_mmio(struct intel_vgpu *vgpu); +void populate_pvinfo_page(struct intel_vgpu *vgpu); #include "mpt.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 7832e49..4850cf3 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -251,6 +251,16 @@ static int handle_device_reset(struct intel_vgpu *vgpu, unsigned int offset, intel_vgpu_reset_execlist(vgpu, bitmap); + /* full GPU reset */ + if (bitmap == 0xff) { + mutex_unlock(&vgpu->gvt->lock); + intel_vgpu_clean_gtt(vgpu); + mutex_lock(&vgpu->gvt->lock); + setup_vgpu_mmio(vgpu); + populate_pvinfo_page(vgpu); + intel_vgpu_init_gtt(vgpu); + } + vgpu->resetting = false; return 0; diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c index 9401436..4f54005 100644 --- a/drivers/gpu/drm/i915/gvt/vgpu.c +++ b/drivers/gpu/drm/i915/gvt/vgpu.c @@ -41,7 +41,7 @@ static void clean_vgpu_mmio(struct intel_vgpu *vgpu) vgpu->mmio.vreg = vgpu->mmio.sreg = NULL; } -static int setup_vgpu_mmio(struct intel_vgpu *vgpu) +int setup_vgpu_mmio(struct intel_vgpu *vgpu) { struct intel_gvt *gvt = vgpu->gvt; const struct intel_gvt_device_info *info = &gvt->device_info; @@ -103,7 +103,7 @@ static void setup_vgpu_cfg_space(struct intel_vgpu *vgpu, } } -static void populate_pvinfo_page(struct intel_vgpu *vgpu) +void populate_pvinfo_page(struct intel_vgpu *vgpu) { /* setup the ballooning information */ vgpu_vreg64(vgpu, vgtif_reg(magic)) = VGT_MAGIC; -- cgit v1.1 From 6fb5082a8c4243c22ecf310b9f3add8371dfa26e Mon Sep 17 00:00:00 2001 From: Bing Niu Date: Mon, 31 Oct 2016 17:35:12 +0800 Subject: drm/i915/gvt: throw error basing on execlist submit result throw error message in elsp emulation handler basing on execlist submit result. guest will trigger tdr process for recovering, gvt just follow guest's desire. v2: populate error to top of mmio emulation logic, comments from zhenyu Signed-off-by: Bing Niu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/handlers.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 4850cf3..9ab1f95 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1320,7 +1320,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, int ring_id = render_mmio_to_ring_id(vgpu->gvt, offset); struct intel_vgpu_execlist *execlist; u32 data = *(u32 *)p_data; - int ret; + int ret = 0; if (WARN_ON(ring_id < 0 || ring_id > I915_NUM_ENGINES - 1)) return -EINVAL; @@ -1328,12 +1328,15 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, execlist = &vgpu->execlist[ring_id]; execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data; - if (execlist->elsp_dwords.index == 3) + if (execlist->elsp_dwords.index == 3) { ret = intel_vgpu_submit_execlist(vgpu, ring_id); + if(ret) + gvt_err("fail submit workload on ring %d\n", ring_id); + } ++execlist->elsp_dwords.index; execlist->elsp_dwords.index &= 0x3; - return 0; + return ret; } static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, -- cgit v1.1 From e45d7b7f47a4849a5d3d55a2cf5802a72924d37b Mon Sep 17 00:00:00 2001 From: "Du, Changbin" Date: Thu, 27 Oct 2016 11:10:31 +0800 Subject: drm/i915/gvt: fix nested sleeping issue We cannot use blocking method mutex_lock inside a wait loop. Here we invoke pick_next_workload() which needs acquire a mutex in our "condition" experssion. Then we go into a another of the going-to-sleep sequence and changing the task state. This is a dangerous. Let's rewrite the wait sequence to avoid nested sleeping. v2: fix do...while loop exit condition (zhenyu) v3: rebase to gvt-staging branch Signed-off-by: Du, Changbin Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index e96eaee..f7e320b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -402,19 +402,24 @@ static int workload_thread(void *priv) struct intel_vgpu_workload *workload = NULL; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); + DEFINE_WAIT_FUNC(wait, woken_wake_function); kfree(p); gvt_dbg_core("workload thread for ring %d started\n", ring_id); while (!kthread_should_stop()) { - ret = wait_event_interruptible(scheduler->waitq[ring_id], - kthread_should_stop() || - (workload = pick_next_workload(gvt, ring_id))); - - WARN_ON_ONCE(ret); - - if (kthread_should_stop()) + add_wait_queue(&scheduler->waitq[ring_id], &wait); + do { + workload = pick_next_workload(gvt, ring_id); + if (workload) + break; + wait_woken(&wait, TASK_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + } while (!kthread_should_stop()); + remove_wait_queue(&scheduler->waitq[ring_id], &wait); + + if (!workload) break; mutex_lock(&scheduler_mutex); -- cgit v1.1 From 1353ec3833360ffab479d17781493ead1d38a006 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 27 Oct 2016 13:48:32 +0100 Subject: drm/i915: Correct pipe fault reporting string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newline somehow ended up in the middle of the line. Signed-off-by: Tvrtko Ursulin Reviewed-by: Ville Syrjälä Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1477572512-4030-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 9111cfd..88239e1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2503,7 +2503,7 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) fault_errors &= GEN8_DE_PIPE_IRQ_FAULT_ERRORS; if (fault_errors) - DRM_ERROR("Fault errors on pipe %c\n: 0x%08x", + DRM_ERROR("Fault errors on pipe %c: 0x%08x\n", pipe_name(pipe), fault_errors); } -- cgit v1.1 From 01c3faa70bcde3519f0dba08e6218806bca03435 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:14 +0300 Subject: drm/i915: Rename struct i915_power_well field data to id Calling it data seems to imply arbitrary data can be associated with the power well. However, that field is used for look ups and expected to be unique, so rename it. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/f3916c3c5bfa793b0fc870fd44007a3ff425194d.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +- drivers/gpu/drm/i915/intel_runtime_pm.c | 112 ++++++++++++++++---------------- 2 files changed, 58 insertions(+), 57 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 55afb66..61449fa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1322,7 +1322,8 @@ struct i915_power_well { /* cached hw enabled state */ bool hw_enabled; unsigned long domains; - unsigned long data; + /* unique identifier for this power well */ + unsigned long id; const struct i915_power_well_ops *ops; }; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 82edba2..53ba45a 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -330,7 +330,7 @@ static void skl_power_well_post_enable(struct drm_i915_private *dev_priv, * sure vgacon can keep working normally without triggering interrupts * and error messages. */ - if (power_well->data == SKL_DISP_PW_2) { + if (power_well->id == SKL_DISP_PW_2) { vga_get_uninterruptible(pdev, VGA_RSRC_LEGACY_IO); outb(inb(VGA_MSR_READ), VGA_MSR_WRITE); vga_put(pdev, VGA_RSRC_LEGACY_IO); @@ -343,7 +343,7 @@ static void skl_power_well_post_enable(struct drm_i915_private *dev_priv, static void skl_power_well_pre_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - if (power_well->data == SKL_DISP_PW_2) + if (power_well->id == SKL_DISP_PW_2) gen8_irq_power_well_pre_disable(dev_priv, 1 << PIPE_C | 1 << PIPE_B); } @@ -658,7 +658,7 @@ static void gen9_sanitize_power_well_requests(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum skl_disp_power_wells power_well_id = power_well->data; + enum skl_disp_power_wells power_well_id = power_well->id; u32 val; u32 mask; @@ -703,7 +703,7 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, tmp = I915_READ(HSW_PWR_WELL_DRIVER); fuse_status = I915_READ(SKL_FUSE_STATUS); - switch (power_well->data) { + switch (power_well->id) { case SKL_DISP_PW_1: if (intel_wait_for_register(dev_priv, SKL_FUSE_STATUS, @@ -727,13 +727,13 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, case SKL_DISP_PW_MISC_IO: break; default: - WARN(1, "Unknown power well %lu\n", power_well->data); + WARN(1, "Unknown power well %lu\n", power_well->id); return; } - req_mask = SKL_POWER_WELL_REQ(power_well->data); + req_mask = SKL_POWER_WELL_REQ(power_well->id); enable_requested = tmp & req_mask; - state_mask = SKL_POWER_WELL_STATE(power_well->data); + state_mask = SKL_POWER_WELL_STATE(power_well->id); is_enabled = tmp & state_mask; if (!enable && enable_requested) @@ -769,14 +769,14 @@ static void skl_set_power_well(struct drm_i915_private *dev_priv, power_well->name, enable ? "enable" : "disable"); if (check_fuse_status) { - if (power_well->data == SKL_DISP_PW_1) { + if (power_well->id == SKL_DISP_PW_1) { if (intel_wait_for_register(dev_priv, SKL_FUSE_STATUS, SKL_FUSE_PG1_DIST_STATUS, SKL_FUSE_PG1_DIST_STATUS, 1)) DRM_ERROR("PG1 distributing status timeout\n"); - } else if (power_well->data == SKL_DISP_PW_2) { + } else if (power_well->id == SKL_DISP_PW_2) { if (intel_wait_for_register(dev_priv, SKL_FUSE_STATUS, SKL_FUSE_PG2_DIST_STATUS, @@ -818,8 +818,8 @@ static void hsw_power_well_disable(struct drm_i915_private *dev_priv, static bool skl_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - uint32_t mask = SKL_POWER_WELL_REQ(power_well->data) | - SKL_POWER_WELL_STATE(power_well->data); + uint32_t mask = SKL_POWER_WELL_REQ(power_well->id) | + SKL_POWER_WELL_STATE(power_well->id); return (I915_READ(HSW_PWR_WELL_DRIVER) & mask) == mask; } @@ -847,7 +847,7 @@ static void skl_power_well_disable(struct drm_i915_private *dev_priv, static enum dpio_phy bxt_power_well_to_phy(struct i915_power_well *power_well) { - enum skl_disp_power_wells power_well_id = power_well->data; + enum skl_disp_power_wells power_well_id = power_well->id; return power_well_id == BXT_DPIO_CMN_A ? DPIO_PHY1 : DPIO_PHY0; } @@ -855,7 +855,7 @@ static enum dpio_phy bxt_power_well_to_phy(struct i915_power_well *power_well) static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum skl_disp_power_wells power_well_id = power_well->data; + enum skl_disp_power_wells power_well_id = power_well->id; struct i915_power_well *cmn_a_well = NULL; if (power_well_id == BXT_DPIO_CMN_BC) { @@ -975,7 +975,7 @@ static bool i9xx_always_on_power_well_enabled(struct drm_i915_private *dev_priv, static void vlv_set_power_well(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, bool enable) { - enum punit_power_well power_well_id = power_well->data; + enum punit_power_well power_well_id = power_well->id; u32 mask; u32 state; u32 ctrl; @@ -1029,7 +1029,7 @@ static void vlv_power_well_disable(struct drm_i915_private *dev_priv, static bool vlv_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - int power_well_id = power_well->data; + int power_well_id = power_well->id; bool enabled = false; u32 mask; u32 state; @@ -1144,7 +1144,7 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DISP2D); vlv_set_power_well(dev_priv, power_well, true); @@ -1154,7 +1154,7 @@ static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DISP2D); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DISP2D); vlv_display_power_well_deinit(dev_priv); @@ -1164,7 +1164,7 @@ static void vlv_display_power_well_disable(struct drm_i915_private *dev_priv, static void vlv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DPIO_CMN_BC); /* since ref/cri clock was enabled */ udelay(1); /* >10ns for cmnreset, >0ns for sidereset */ @@ -1190,7 +1190,7 @@ static void vlv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, { enum pipe pipe; - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DPIO_CMN_BC); for_each_pipe(dev_priv, pipe) assert_pll_disabled(dev_priv, pipe); @@ -1213,7 +1213,7 @@ static struct i915_power_well *lookup_power_well(struct drm_i915_private *dev_pr struct i915_power_well *power_well; power_well = &power_domains->power_wells[i]; - if (power_well->data == power_well_id) + if (power_well->id == power_well_id) return power_well; } @@ -1337,10 +1337,10 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, enum pipe pipe; uint32_t tmp; - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->id != PUNIT_POWER_WELL_DPIO_CMN_D); - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + if (power_well->id == PUNIT_POWER_WELL_DPIO_CMN_BC) { pipe = PIPE_A; phy = DPIO_PHY0; } else { @@ -1368,7 +1368,7 @@ static void chv_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, DPIO_SUS_CLK_CONFIG_GATE_CLKREQ; vlv_dpio_write(dev_priv, pipe, CHV_CMN_DW28, tmp); - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + if (power_well->id == PUNIT_POWER_WELL_DPIO_CMN_BC) { tmp = vlv_dpio_read(dev_priv, pipe, _CHV_CMN_DW6_CH1); tmp |= DPIO_DYNPWRDOWNEN_CH1; vlv_dpio_write(dev_priv, pipe, _CHV_CMN_DW6_CH1, tmp); @@ -1399,10 +1399,10 @@ static void chv_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, { enum dpio_phy phy; - WARN_ON_ONCE(power_well->data != PUNIT_POWER_WELL_DPIO_CMN_BC && - power_well->data != PUNIT_POWER_WELL_DPIO_CMN_D); + WARN_ON_ONCE(power_well->id != PUNIT_POWER_WELL_DPIO_CMN_BC && + power_well->id != PUNIT_POWER_WELL_DPIO_CMN_D); - if (power_well->data == PUNIT_POWER_WELL_DPIO_CMN_BC) { + if (power_well->id == PUNIT_POWER_WELL_DPIO_CMN_BC) { phy = DPIO_PHY0; assert_pll_disabled(dev_priv, PIPE_A); assert_pll_disabled(dev_priv, PIPE_B); @@ -1551,7 +1551,7 @@ void chv_phy_powergate_lanes(struct intel_encoder *encoder, static bool chv_pipe_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum pipe pipe = power_well->data; + enum pipe pipe = power_well->id; bool enabled; u32 state, ctrl; @@ -1581,7 +1581,7 @@ static void chv_set_pipe_power_well(struct drm_i915_private *dev_priv, struct i915_power_well *power_well, bool enable) { - enum pipe pipe = power_well->data; + enum pipe pipe = power_well->id; u32 state; u32 ctrl; @@ -1614,7 +1614,7 @@ out: static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PIPE_A); + WARN_ON_ONCE(power_well->id != PIPE_A); chv_set_pipe_power_well(dev_priv, power_well, power_well->count > 0); } @@ -1622,7 +1622,7 @@ static void chv_pipe_power_well_sync_hw(struct drm_i915_private *dev_priv, static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PIPE_A); + WARN_ON_ONCE(power_well->id != PIPE_A); chv_set_pipe_power_well(dev_priv, power_well, true); @@ -1632,7 +1632,7 @@ static void chv_pipe_power_well_enable(struct drm_i915_private *dev_priv, static void chv_pipe_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - WARN_ON_ONCE(power_well->data != PIPE_A); + WARN_ON_ONCE(power_well->id != PIPE_A); vlv_display_power_well_deinit(dev_priv); @@ -1976,12 +1976,12 @@ static struct i915_power_well vlv_power_wells[] = { .always_on = 1, .domains = POWER_DOMAIN_MASK, .ops = &i9xx_always_on_power_well_ops, - .data = PUNIT_POWER_WELL_ALWAYS_ON, + .id = PUNIT_POWER_WELL_ALWAYS_ON, }, { .name = "display", .domains = VLV_DISPLAY_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DISP2D, + .id = PUNIT_POWER_WELL_DISP2D, .ops = &vlv_display_power_well_ops, }, { @@ -1991,7 +1991,7 @@ static struct i915_power_well vlv_power_wells[] = { VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, + .id = PUNIT_POWER_WELL_DPIO_TX_B_LANES_01, }, { .name = "dpio-tx-b-23", @@ -2000,7 +2000,7 @@ static struct i915_power_well vlv_power_wells[] = { VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, + .id = PUNIT_POWER_WELL_DPIO_TX_B_LANES_23, }, { .name = "dpio-tx-c-01", @@ -2009,7 +2009,7 @@ static struct i915_power_well vlv_power_wells[] = { VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, + .id = PUNIT_POWER_WELL_DPIO_TX_C_LANES_01, }, { .name = "dpio-tx-c-23", @@ -2018,12 +2018,12 @@ static struct i915_power_well vlv_power_wells[] = { VLV_DPIO_TX_C_LANES_01_POWER_DOMAINS | VLV_DPIO_TX_C_LANES_23_POWER_DOMAINS, .ops = &vlv_dpio_power_well_ops, - .data = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, + .id = PUNIT_POWER_WELL_DPIO_TX_C_LANES_23, }, { .name = "dpio-common", .domains = VLV_DPIO_CMN_BC_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, + .id = PUNIT_POWER_WELL_DPIO_CMN_BC, .ops = &vlv_dpio_cmn_power_well_ops, }, }; @@ -2043,19 +2043,19 @@ static struct i915_power_well chv_power_wells[] = { * required for any pipe to work. */ .domains = CHV_DISPLAY_POWER_DOMAINS, - .data = PIPE_A, + .id = PIPE_A, .ops = &chv_pipe_power_well_ops, }, { .name = "dpio-common-bc", .domains = CHV_DPIO_CMN_BC_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_BC, + .id = PUNIT_POWER_WELL_DPIO_CMN_BC, .ops = &chv_dpio_cmn_power_well_ops, }, { .name = "dpio-common-d", .domains = CHV_DPIO_CMN_D_POWER_DOMAINS, - .data = PUNIT_POWER_WELL_DPIO_CMN_D, + .id = PUNIT_POWER_WELL_DPIO_CMN_D, .ops = &chv_dpio_cmn_power_well_ops, }, }; @@ -2078,57 +2078,57 @@ static struct i915_power_well skl_power_wells[] = { .always_on = 1, .domains = POWER_DOMAIN_MASK, .ops = &i9xx_always_on_power_well_ops, - .data = SKL_DISP_PW_ALWAYS_ON, + .id = SKL_DISP_PW_ALWAYS_ON, }, { .name = "power well 1", /* Handled by the DMC firmware */ .domains = 0, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_1, + .id = SKL_DISP_PW_1, }, { .name = "MISC IO power well", /* Handled by the DMC firmware */ .domains = 0, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_MISC_IO, + .id = SKL_DISP_PW_MISC_IO, }, { .name = "DC off", .domains = SKL_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .data = SKL_DISP_PW_DC_OFF, + .id = SKL_DISP_PW_DC_OFF, }, { .name = "power well 2", .domains = SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_2, + .id = SKL_DISP_PW_2, }, { .name = "DDI A/E power well", .domains = SKL_DISPLAY_DDI_A_E_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_DDI_A_E, + .id = SKL_DISP_PW_DDI_A_E, }, { .name = "DDI B power well", .domains = SKL_DISPLAY_DDI_B_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_DDI_B, + .id = SKL_DISP_PW_DDI_B, }, { .name = "DDI C power well", .domains = SKL_DISPLAY_DDI_C_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_DDI_C, + .id = SKL_DISP_PW_DDI_C, }, { .name = "DDI D power well", .domains = SKL_DISPLAY_DDI_D_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_DDI_D, + .id = SKL_DISP_PW_DDI_D, }, }; @@ -2143,31 +2143,31 @@ static struct i915_power_well bxt_power_wells[] = { .name = "power well 1", .domains = 0, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_1, + .id = SKL_DISP_PW_1, }, { .name = "DC off", .domains = BXT_DISPLAY_DC_OFF_POWER_DOMAINS, .ops = &gen9_dc_off_power_well_ops, - .data = SKL_DISP_PW_DC_OFF, + .id = SKL_DISP_PW_DC_OFF, }, { .name = "power well 2", .domains = BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS, .ops = &skl_power_well_ops, - .data = SKL_DISP_PW_2, + .id = SKL_DISP_PW_2, }, { .name = "dpio-common-a", .domains = BXT_DPIO_CMN_A_POWER_DOMAINS, .ops = &bxt_dpio_cmn_power_well_ops, - .data = BXT_DPIO_CMN_A, + .id = BXT_DPIO_CMN_A, }, { .name = "dpio-common-bc", .domains = BXT_DPIO_CMN_BC_POWER_DOMAINS, .ops = &bxt_dpio_cmn_power_well_ops, - .data = BXT_DPIO_CMN_BC, + .id = BXT_DPIO_CMN_BC, }, }; -- cgit v1.1 From 362624c9ba3f6bff2df6304068a45b355d4ab13b Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:15 +0300 Subject: drm/i915: Explicitly map broxton DPIO power wells to phys The mapping from the BXT_DPIO_CMN_* power wells to their respective phys required a detour implemented in the bxt_power_well_to_phy() function. Instead, embed that information directly into the power_well struct, by resurrecting the data field. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/7fe97582fa08c7340ce6a3b6b0ea3e72a73182d7.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 5 +++++ drivers/gpu/drm/i915/intel_runtime_pm.c | 22 +++++++--------------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 61449fa..d3249e9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1324,6 +1324,11 @@ struct i915_power_well { unsigned long domains; /* unique identifier for this power well */ unsigned long id; + /* + * Arbitraty data associated with this power well. Platform and power + * well specific. + */ + unsigned long data; const struct i915_power_well_ops *ops; }; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 53ba45a..5fd76ee 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -845,13 +845,6 @@ static void skl_power_well_disable(struct drm_i915_private *dev_priv, skl_set_power_well(dev_priv, power_well, false); } -static enum dpio_phy bxt_power_well_to_phy(struct i915_power_well *power_well) -{ - enum skl_disp_power_wells power_well_id = power_well->id; - - return power_well_id == BXT_DPIO_CMN_A ? DPIO_PHY1 : DPIO_PHY0; -} - static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { @@ -867,7 +860,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, intel_power_well_get(dev_priv, cmn_a_well); } - bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well)); + bxt_ddi_phy_init(dev_priv, power_well->data); if (cmn_a_well) intel_power_well_put(dev_priv, cmn_a_well); @@ -876,14 +869,13 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - bxt_ddi_phy_uninit(dev_priv, bxt_power_well_to_phy(power_well)); + bxt_ddi_phy_uninit(dev_priv, power_well->data); } static bool bxt_dpio_cmn_power_well_enabled(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - return bxt_ddi_phy_is_enabled(dev_priv, - bxt_power_well_to_phy(power_well)); + return bxt_ddi_phy_is_enabled(dev_priv, power_well->data); } static void bxt_dpio_cmn_power_well_sync_hw(struct drm_i915_private *dev_priv, @@ -902,13 +894,11 @@ static void bxt_verify_ddi_phy_power_wells(struct drm_i915_private *dev_priv) power_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_A); if (power_well->count > 0) - bxt_ddi_phy_verify_state(dev_priv, - bxt_power_well_to_phy(power_well)); + bxt_ddi_phy_verify_state(dev_priv, power_well->data); power_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_BC); if (power_well->count > 0) - bxt_ddi_phy_verify_state(dev_priv, - bxt_power_well_to_phy(power_well)); + bxt_ddi_phy_verify_state(dev_priv, power_well->data); } static bool gen9_dc_off_power_well_enabled(struct drm_i915_private *dev_priv, @@ -2162,12 +2152,14 @@ static struct i915_power_well bxt_power_wells[] = { .domains = BXT_DPIO_CMN_A_POWER_DOMAINS, .ops = &bxt_dpio_cmn_power_well_ops, .id = BXT_DPIO_CMN_A, + .data = DPIO_PHY1, }, { .name = "dpio-common-bc", .domains = BXT_DPIO_CMN_BC_POWER_DOMAINS, .ops = &bxt_dpio_cmn_power_well_ops, .id = BXT_DPIO_CMN_BC, + .data = DPIO_PHY0, }, }; -- cgit v1.1 From b284eedaf74bdbd262f71a7937ca78f45354173f Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:16 +0300 Subject: drm/i915: Pass lane count to bxt_ddi_phy_calc_lane_optmin_mask() Pass lane count to bxt_ddi_phy_calc_lane_optmin_mask() instead of having it extract that number from a pipe_config to decouple the phy code from intel_crtc_state. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/a4977e0207e594953c4f9d1b5f2ef972a8679e74.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_ddi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index fb18d69..1de0276 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -2190,9 +2190,9 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, static uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - struct intel_crtc_state *pipe_config) + uint8_t lane_count) { - switch (pipe_config->lane_count) { + switch (lane_count) { case 1: return 0; case 2: @@ -2200,7 +2200,7 @@ bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, case 4: return BIT(3) | BIT(2) | BIT(0); default: - MISSING_CASE(pipe_config->lane_count); + MISSING_CASE(lane_count); return 0; } @@ -2417,7 +2417,7 @@ static bool intel_ddi_compute_config(struct intel_encoder *encoder, if (IS_BROXTON(dev_priv) && ret) pipe_config->lane_lat_optim_mask = bxt_ddi_phy_calc_lane_lat_optim_mask(encoder, - pipe_config); + pipe_config->lane_count); return ret; -- cgit v1.1 From 47a6bc61b86657646aea38e837a7f25c68cec7f8 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:17 +0300 Subject: drm/i915: Move broxton phy code to intel_dpio_phy.c The phy in broxton is also a dpio phy, similar to cherryview but with programming through MMIO. So move the code together with the other similar phys. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/d611de6d256593cf904172db7ff27f164480c228.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 12 ++ drivers/gpu/drm/i915/intel_ddi.c | 322 +-------------------------------- drivers/gpu/drm/i915/intel_dpio_phy.c | 327 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/intel_drv.h | 6 - 4 files changed, 341 insertions(+), 326 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d3249e9..c642f10 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3761,6 +3761,18 @@ u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); /* intel_dpio_phy.c */ +void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy); +void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy); +bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, + enum dpio_phy phy); +bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, + enum dpio_phy phy); +uint8_t bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, + uint8_t lane_count); +void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, + uint8_t lane_lat_optim_mask); +uint8_t bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder); + void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, bool uniq_trans_scale); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 1de0276..51ca8ea 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1923,332 +1923,14 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder, } } -bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, - enum dpio_phy phy) -{ - enum port port; - - if (!(I915_READ(BXT_P_CR_GT_DISP_PWRON) & GT_DISPLAY_POWER_ON(phy))) - return false; - - if ((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & - (PHY_POWER_GOOD | PHY_RESERVED)) != PHY_POWER_GOOD) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but power hasn't settled\n", - phy); - - return false; - } - - if (phy == DPIO_PHY1 && - !(I915_READ(BXT_PORT_REF_DW3(DPIO_PHY1)) & GRC_DONE)) { - DRM_DEBUG_DRIVER("DDI PHY 1 powered, but GRC isn't done\n"); - - return false; - } - - if (!(I915_READ(BXT_PHY_CTL_FAMILY(phy)) & COMMON_RESET_DIS)) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but still in reset\n", - phy); - - return false; - } - - for_each_port_masked(port, - phy == DPIO_PHY0 ? BIT(PORT_B) | BIT(PORT_C) : - BIT(PORT_A)) { - u32 tmp = I915_READ(BXT_PHY_CTL(port)); - - if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { - DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " - "for port %c powered down " - "(PHY_CTL %08x)\n", - phy, port_name(port), tmp); - - return false; - } - } - - return true; -} - -static u32 bxt_get_grc(struct drm_i915_private *dev_priv, enum dpio_phy phy) -{ - u32 val = I915_READ(BXT_PORT_REF_DW6(phy)); - - return (val & GRC_CODE_MASK) >> GRC_CODE_SHIFT; -} - -static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv, - enum dpio_phy phy) -{ - if (intel_wait_for_register(dev_priv, - BXT_PORT_REF_DW3(phy), - GRC_DONE, GRC_DONE, - 10)) - DRM_ERROR("timeout waiting for PHY%d GRC\n", phy); -} - -void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) -{ - u32 val; - - if (bxt_ddi_phy_is_enabled(dev_priv, phy)) { - /* Still read out the GRC value for state verification */ - if (phy == DPIO_PHY0) - dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, phy); - - if (bxt_ddi_phy_verify_state(dev_priv, phy)) { - DRM_DEBUG_DRIVER("DDI PHY %d already enabled, " - "won't reprogram it\n", phy); - - return; - } - - DRM_DEBUG_DRIVER("DDI PHY %d enabled with invalid state, " - "force reprogramming it\n", phy); - } - - val = I915_READ(BXT_P_CR_GT_DISP_PWRON); - val |= GT_DISPLAY_POWER_ON(phy); - I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val); - - /* - * The PHY registers start out inaccessible and respond to reads with - * all 1s. Eventually they become accessible as they power up, then - * the reserved bit will give the default 0. Poll on the reserved bit - * becoming 0 to find when the PHY is accessible. - * HW team confirmed that the time to reach phypowergood status is - * anywhere between 50 us and 100us. - */ - if (wait_for_us(((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & - (PHY_RESERVED | PHY_POWER_GOOD)) == PHY_POWER_GOOD), 100)) { - DRM_ERROR("timeout during PHY%d power on\n", phy); - } - - /* Program PLL Rcomp code offset */ - val = I915_READ(BXT_PORT_CL1CM_DW9(phy)); - val &= ~IREF0RC_OFFSET_MASK; - val |= 0xE4 << IREF0RC_OFFSET_SHIFT; - I915_WRITE(BXT_PORT_CL1CM_DW9(phy), val); - - val = I915_READ(BXT_PORT_CL1CM_DW10(phy)); - val &= ~IREF1RC_OFFSET_MASK; - val |= 0xE4 << IREF1RC_OFFSET_SHIFT; - I915_WRITE(BXT_PORT_CL1CM_DW10(phy), val); - - /* Program power gating */ - val = I915_READ(BXT_PORT_CL1CM_DW28(phy)); - val |= OCL1_POWER_DOWN_EN | DW28_OLDO_DYN_PWR_DOWN_EN | - SUS_CLK_CONFIG; - I915_WRITE(BXT_PORT_CL1CM_DW28(phy), val); - - if (phy == DPIO_PHY0) { - val = I915_READ(BXT_PORT_CL2CM_DW6_BC); - val |= DW6_OLDO_DYN_PWR_DOWN_EN; - I915_WRITE(BXT_PORT_CL2CM_DW6_BC, val); - } - - val = I915_READ(BXT_PORT_CL1CM_DW30(phy)); - val &= ~OCL2_LDOFUSE_PWR_DIS; - /* - * On PHY1 disable power on the second channel, since no port is - * connected there. On PHY0 both channels have a port, so leave it - * enabled. - * TODO: port C is only connected on BXT-P, so on BXT0/1 we should - * power down the second channel on PHY0 as well. - * - * FIXME: Clarify programming of the following, the register is - * read-only with bit 6 fixed at 0 at least in stepping A. - */ - if (phy == DPIO_PHY1) - val |= OCL2_LDOFUSE_PWR_DIS; - I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val); - - if (phy == DPIO_PHY0) { - uint32_t grc_code; - /* - * PHY0 isn't connected to an RCOMP resistor so copy over - * the corresponding calibrated value from PHY1, and disable - * the automatic calibration on PHY0. - */ - val = dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, DPIO_PHY1); - grc_code = val << GRC_CODE_FAST_SHIFT | - val << GRC_CODE_SLOW_SHIFT | - val; - I915_WRITE(BXT_PORT_REF_DW6(DPIO_PHY0), grc_code); - - val = I915_READ(BXT_PORT_REF_DW8(DPIO_PHY0)); - val |= GRC_DIS | GRC_RDY_OVRD; - I915_WRITE(BXT_PORT_REF_DW8(DPIO_PHY0), val); - } - - val = I915_READ(BXT_PHY_CTL_FAMILY(phy)); - val |= COMMON_RESET_DIS; - I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val); - - if (phy == DPIO_PHY1) - bxt_phy_wait_grc_done(dev_priv, DPIO_PHY1); -} - -void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) -{ - uint32_t val; - - val = I915_READ(BXT_PHY_CTL_FAMILY(phy)); - val &= ~COMMON_RESET_DIS; - I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val); - - val = I915_READ(BXT_P_CR_GT_DISP_PWRON); - val &= ~GT_DISPLAY_POWER_ON(phy); - I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val); -} - -static bool __printf(6, 7) -__phy_reg_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy, - i915_reg_t reg, u32 mask, u32 expected, - const char *reg_fmt, ...) -{ - struct va_format vaf; - va_list args; - u32 val; - - val = I915_READ(reg); - if ((val & mask) == expected) - return true; - - va_start(args, reg_fmt); - vaf.fmt = reg_fmt; - vaf.va = &args; - - DRM_DEBUG_DRIVER("DDI PHY %d reg %pV [%08x] state mismatch: " - "current %08x, expected %08x (mask %08x)\n", - phy, &vaf, reg.reg, val, (val & ~mask) | expected, - mask); - - va_end(args); - - return false; -} - -bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, - enum dpio_phy phy) -{ - uint32_t mask; - bool ok; - -#define _CHK(reg, mask, exp, fmt, ...) \ - __phy_reg_verify_state(dev_priv, phy, reg, mask, exp, fmt, \ - ## __VA_ARGS__) - - if (!bxt_ddi_phy_is_enabled(dev_priv, phy)) - return false; - - ok = true; - - /* PLL Rcomp code offset */ - ok &= _CHK(BXT_PORT_CL1CM_DW9(phy), - IREF0RC_OFFSET_MASK, 0xe4 << IREF0RC_OFFSET_SHIFT, - "BXT_PORT_CL1CM_DW9(%d)", phy); - ok &= _CHK(BXT_PORT_CL1CM_DW10(phy), - IREF1RC_OFFSET_MASK, 0xe4 << IREF1RC_OFFSET_SHIFT, - "BXT_PORT_CL1CM_DW10(%d)", phy); - - /* Power gating */ - mask = OCL1_POWER_DOWN_EN | DW28_OLDO_DYN_PWR_DOWN_EN | SUS_CLK_CONFIG; - ok &= _CHK(BXT_PORT_CL1CM_DW28(phy), mask, mask, - "BXT_PORT_CL1CM_DW28(%d)", phy); - - if (phy == DPIO_PHY0) - ok &= _CHK(BXT_PORT_CL2CM_DW6_BC, - DW6_OLDO_DYN_PWR_DOWN_EN, DW6_OLDO_DYN_PWR_DOWN_EN, - "BXT_PORT_CL2CM_DW6_BC"); - - /* - * TODO: Verify BXT_PORT_CL1CM_DW30 bit OCL2_LDOFUSE_PWR_DIS, - * at least on stepping A this bit is read-only and fixed at 0. - */ - - if (phy == DPIO_PHY0) { - u32 grc_code = dev_priv->bxt_phy_grc; - - grc_code = grc_code << GRC_CODE_FAST_SHIFT | - grc_code << GRC_CODE_SLOW_SHIFT | - grc_code; - mask = GRC_CODE_FAST_MASK | GRC_CODE_SLOW_MASK | - GRC_CODE_NOM_MASK; - ok &= _CHK(BXT_PORT_REF_DW6(DPIO_PHY0), mask, grc_code, - "BXT_PORT_REF_DW6(%d)", DPIO_PHY0); - - mask = GRC_DIS | GRC_RDY_OVRD; - ok &= _CHK(BXT_PORT_REF_DW8(DPIO_PHY0), mask, mask, - "BXT_PORT_REF_DW8(%d)", DPIO_PHY0); - } - - return ok; -#undef _CHK -} - -static uint8_t -bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, - uint8_t lane_count) -{ - switch (lane_count) { - case 1: - return 0; - case 2: - return BIT(2) | BIT(0); - case 4: - return BIT(3) | BIT(2) | BIT(0); - default: - MISSING_CASE(lane_count); - - return 0; - } -} - static void bxt_ddi_pre_pll_enable(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state) { - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; struct intel_crtc *intel_crtc = to_intel_crtc(encoder->base.crtc); - int lane; - - for (lane = 0; lane < 4; lane++) { - u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); - - /* - * Note that on CHV this flag is called UPAR, but has - * the same function. - */ - val &= ~LATENCY_OPTIM; - if (intel_crtc->config->lane_lat_optim_mask & BIT(lane)) - val |= LATENCY_OPTIM; - - I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val); - } -} - -static uint8_t -bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) -{ - struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); - struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); - enum port port = dport->port; - int lane; - uint8_t mask; - - mask = 0; - for (lane = 0; lane < 4; lane++) { - u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); - - if (val & LATENCY_OPTIM) - mask |= BIT(lane); - } + uint8_t mask = intel_crtc->config->lane_lat_optim_mask; - return mask; + bxt_ddi_phy_set_lane_optim_mask(encoder, mask); } void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp) diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 047f487..edf0cfd 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -23,6 +23,333 @@ #include "intel_drv.h" +bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, + enum dpio_phy phy) +{ + enum port port; + + if (!(I915_READ(BXT_P_CR_GT_DISP_PWRON) & GT_DISPLAY_POWER_ON(phy))) + return false; + + if ((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & + (PHY_POWER_GOOD | PHY_RESERVED)) != PHY_POWER_GOOD) { + DRM_DEBUG_DRIVER("DDI PHY %d powered, but power hasn't settled\n", + phy); + + return false; + } + + if (phy == DPIO_PHY1 && + !(I915_READ(BXT_PORT_REF_DW3(DPIO_PHY1)) & GRC_DONE)) { + DRM_DEBUG_DRIVER("DDI PHY 1 powered, but GRC isn't done\n"); + + return false; + } + + if (!(I915_READ(BXT_PHY_CTL_FAMILY(phy)) & COMMON_RESET_DIS)) { + DRM_DEBUG_DRIVER("DDI PHY %d powered, but still in reset\n", + phy); + + return false; + } + + for_each_port_masked(port, + phy == DPIO_PHY0 ? BIT(PORT_B) | BIT(PORT_C) : + BIT(PORT_A)) { + u32 tmp = I915_READ(BXT_PHY_CTL(port)); + + if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { + DRM_DEBUG_DRIVER("DDI PHY %d powered, but common lane " + "for port %c powered down " + "(PHY_CTL %08x)\n", + phy, port_name(port), tmp); + + return false; + } + } + + return true; +} + +static u32 bxt_get_grc(struct drm_i915_private *dev_priv, enum dpio_phy phy) +{ + u32 val = I915_READ(BXT_PORT_REF_DW6(phy)); + + return (val & GRC_CODE_MASK) >> GRC_CODE_SHIFT; +} + +static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv, + enum dpio_phy phy) +{ + if (intel_wait_for_register(dev_priv, + BXT_PORT_REF_DW3(phy), + GRC_DONE, GRC_DONE, + 10)) + DRM_ERROR("timeout waiting for PHY%d GRC\n", phy); +} + +void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) +{ + u32 val; + + if (bxt_ddi_phy_is_enabled(dev_priv, phy)) { + /* Still read out the GRC value for state verification */ + if (phy == DPIO_PHY0) + dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, phy); + + if (bxt_ddi_phy_verify_state(dev_priv, phy)) { + DRM_DEBUG_DRIVER("DDI PHY %d already enabled, " + "won't reprogram it\n", phy); + + return; + } + + DRM_DEBUG_DRIVER("DDI PHY %d enabled with invalid state, " + "force reprogramming it\n", phy); + } + + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + val |= GT_DISPLAY_POWER_ON(phy); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val); + + /* + * The PHY registers start out inaccessible and respond to reads with + * all 1s. Eventually they become accessible as they power up, then + * the reserved bit will give the default 0. Poll on the reserved bit + * becoming 0 to find when the PHY is accessible. + * HW team confirmed that the time to reach phypowergood status is + * anywhere between 50 us and 100us. + */ + if (wait_for_us(((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & + (PHY_RESERVED | PHY_POWER_GOOD)) == PHY_POWER_GOOD), 100)) { + DRM_ERROR("timeout during PHY%d power on\n", phy); + } + + /* Program PLL Rcomp code offset */ + val = I915_READ(BXT_PORT_CL1CM_DW9(phy)); + val &= ~IREF0RC_OFFSET_MASK; + val |= 0xE4 << IREF0RC_OFFSET_SHIFT; + I915_WRITE(BXT_PORT_CL1CM_DW9(phy), val); + + val = I915_READ(BXT_PORT_CL1CM_DW10(phy)); + val &= ~IREF1RC_OFFSET_MASK; + val |= 0xE4 << IREF1RC_OFFSET_SHIFT; + I915_WRITE(BXT_PORT_CL1CM_DW10(phy), val); + + /* Program power gating */ + val = I915_READ(BXT_PORT_CL1CM_DW28(phy)); + val |= OCL1_POWER_DOWN_EN | DW28_OLDO_DYN_PWR_DOWN_EN | + SUS_CLK_CONFIG; + I915_WRITE(BXT_PORT_CL1CM_DW28(phy), val); + + if (phy == DPIO_PHY0) { + val = I915_READ(BXT_PORT_CL2CM_DW6_BC); + val |= DW6_OLDO_DYN_PWR_DOWN_EN; + I915_WRITE(BXT_PORT_CL2CM_DW6_BC, val); + } + + val = I915_READ(BXT_PORT_CL1CM_DW30(phy)); + val &= ~OCL2_LDOFUSE_PWR_DIS; + /* + * On PHY1 disable power on the second channel, since no port is + * connected there. On PHY0 both channels have a port, so leave it + * enabled. + * TODO: port C is only connected on BXT-P, so on BXT0/1 we should + * power down the second channel on PHY0 as well. + * + * FIXME: Clarify programming of the following, the register is + * read-only with bit 6 fixed at 0 at least in stepping A. + */ + if (phy == DPIO_PHY1) + val |= OCL2_LDOFUSE_PWR_DIS; + I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val); + + if (phy == DPIO_PHY0) { + uint32_t grc_code; + /* + * PHY0 isn't connected to an RCOMP resistor so copy over + * the corresponding calibrated value from PHY1, and disable + * the automatic calibration on PHY0. + */ + val = dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, DPIO_PHY1); + grc_code = val << GRC_CODE_FAST_SHIFT | + val << GRC_CODE_SLOW_SHIFT | + val; + I915_WRITE(BXT_PORT_REF_DW6(DPIO_PHY0), grc_code); + + val = I915_READ(BXT_PORT_REF_DW8(DPIO_PHY0)); + val |= GRC_DIS | GRC_RDY_OVRD; + I915_WRITE(BXT_PORT_REF_DW8(DPIO_PHY0), val); + } + + val = I915_READ(BXT_PHY_CTL_FAMILY(phy)); + val |= COMMON_RESET_DIS; + I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val); + + if (phy == DPIO_PHY1) + bxt_phy_wait_grc_done(dev_priv, DPIO_PHY1); +} + +void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) +{ + uint32_t val; + + val = I915_READ(BXT_PHY_CTL_FAMILY(phy)); + val &= ~COMMON_RESET_DIS; + I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val); + + val = I915_READ(BXT_P_CR_GT_DISP_PWRON); + val &= ~GT_DISPLAY_POWER_ON(phy); + I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val); +} + +static bool __printf(6, 7) +__phy_reg_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy, + i915_reg_t reg, u32 mask, u32 expected, + const char *reg_fmt, ...) +{ + struct va_format vaf; + va_list args; + u32 val; + + val = I915_READ(reg); + if ((val & mask) == expected) + return true; + + va_start(args, reg_fmt); + vaf.fmt = reg_fmt; + vaf.va = &args; + + DRM_DEBUG_DRIVER("DDI PHY %d reg %pV [%08x] state mismatch: " + "current %08x, expected %08x (mask %08x)\n", + phy, &vaf, reg.reg, val, (val & ~mask) | expected, + mask); + + va_end(args); + + return false; +} + +bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, + enum dpio_phy phy) +{ + uint32_t mask; + bool ok; + +#define _CHK(reg, mask, exp, fmt, ...) \ + __phy_reg_verify_state(dev_priv, phy, reg, mask, exp, fmt, \ + ## __VA_ARGS__) + + if (!bxt_ddi_phy_is_enabled(dev_priv, phy)) + return false; + + ok = true; + + /* PLL Rcomp code offset */ + ok &= _CHK(BXT_PORT_CL1CM_DW9(phy), + IREF0RC_OFFSET_MASK, 0xe4 << IREF0RC_OFFSET_SHIFT, + "BXT_PORT_CL1CM_DW9(%d)", phy); + ok &= _CHK(BXT_PORT_CL1CM_DW10(phy), + IREF1RC_OFFSET_MASK, 0xe4 << IREF1RC_OFFSET_SHIFT, + "BXT_PORT_CL1CM_DW10(%d)", phy); + + /* Power gating */ + mask = OCL1_POWER_DOWN_EN | DW28_OLDO_DYN_PWR_DOWN_EN | SUS_CLK_CONFIG; + ok &= _CHK(BXT_PORT_CL1CM_DW28(phy), mask, mask, + "BXT_PORT_CL1CM_DW28(%d)", phy); + + if (phy == DPIO_PHY0) + ok &= _CHK(BXT_PORT_CL2CM_DW6_BC, + DW6_OLDO_DYN_PWR_DOWN_EN, DW6_OLDO_DYN_PWR_DOWN_EN, + "BXT_PORT_CL2CM_DW6_BC"); + + /* + * TODO: Verify BXT_PORT_CL1CM_DW30 bit OCL2_LDOFUSE_PWR_DIS, + * at least on stepping A this bit is read-only and fixed at 0. + */ + + if (phy == DPIO_PHY0) { + u32 grc_code = dev_priv->bxt_phy_grc; + + grc_code = grc_code << GRC_CODE_FAST_SHIFT | + grc_code << GRC_CODE_SLOW_SHIFT | + grc_code; + mask = GRC_CODE_FAST_MASK | GRC_CODE_SLOW_MASK | + GRC_CODE_NOM_MASK; + ok &= _CHK(BXT_PORT_REF_DW6(DPIO_PHY0), mask, grc_code, + "BXT_PORT_REF_DW6(%d)", DPIO_PHY0); + + mask = GRC_DIS | GRC_RDY_OVRD; + ok &= _CHK(BXT_PORT_REF_DW8(DPIO_PHY0), mask, mask, + "BXT_PORT_REF_DW8(%d)", DPIO_PHY0); + } + + return ok; +#undef _CHK +} + +uint8_t +bxt_ddi_phy_calc_lane_lat_optim_mask(struct intel_encoder *encoder, + uint8_t lane_count) +{ + switch (lane_count) { + case 1: + return 0; + case 2: + return BIT(2) | BIT(0); + case 4: + return BIT(3) | BIT(2) | BIT(0); + default: + MISSING_CASE(lane_count); + + return 0; + } +} + +void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, + uint8_t lane_lat_optim_mask) +{ + struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + enum port port = dport->port; + int lane; + + for (lane = 0; lane < 4; lane++) { + u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); + + /* + * Note that on CHV this flag is called UPAR, but has + * the same function. + */ + val &= ~LATENCY_OPTIM; + if (lane_lat_optim_mask & BIT(lane)) + val |= LATENCY_OPTIM; + + I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val); + } +} + +uint8_t +bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) +{ + struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); + enum port port = dport->port; + int lane; + uint8_t mask; + + mask = 0; + for (lane = 0; lane < 4; lane++) { + u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); + + if (val & LATENCY_OPTIM) + mask |= BIT(lane); + } + + return mask; +} + + void chv_set_phy_signal_level(struct intel_encoder *encoder, u32 deemph_reg_value, u32 margin_reg_value, bool uniq_trans_scale) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index c2f3863..29ab526 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1351,12 +1351,6 @@ void hsw_enable_pc8(struct drm_i915_private *dev_priv); void hsw_disable_pc8(struct drm_i915_private *dev_priv); void bxt_init_cdclk(struct drm_i915_private *dev_priv); void bxt_uninit_cdclk(struct drm_i915_private *dev_priv); -void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy); -void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy); -bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, - enum dpio_phy phy); -bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, - enum dpio_phy phy); void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv); void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); -- cgit v1.1 From f38861b814b530fbf5add9fa845da99444ebbde0 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:18 +0300 Subject: drm/i915: Move DPIO phy documentation section to intel_dpio_phy.c Move the DPIO phy documentation section to intel_dpio_phy.c, since that is a more suitable place now that there is a source file dedicated for those phys. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/55a2d38c15c06a8c5bce498b28decc03948f0224.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 91 +---------------------------------- drivers/gpu/drm/i915/intel_dpio_phy.c | 91 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 90 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 542e570..2f504f6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -830,96 +830,7 @@ enum skl_disp_power_wells { #define CCK_FREQUENCY_STATUS_SHIFT 8 #define CCK_FREQUENCY_VALUES (0x1f << 0) -/** - * DOC: DPIO - * - * VLV, CHV and BXT have slightly peculiar display PHYs for driving DP/HDMI - * ports. DPIO is the name given to such a display PHY. These PHYs - * don't follow the standard programming model using direct MMIO - * registers, and instead their registers must be accessed trough IOSF - * sideband. VLV has one such PHY for driving ports B and C, and CHV - * adds another PHY for driving port D. Each PHY responds to specific - * IOSF-SB port. - * - * Each display PHY is made up of one or two channels. Each channel - * houses a common lane part which contains the PLL and other common - * logic. CH0 common lane also contains the IOSF-SB logic for the - * Common Register Interface (CRI) ie. the DPIO registers. CRI clock - * must be running when any DPIO registers are accessed. - * - * In addition to having their own registers, the PHYs are also - * controlled through some dedicated signals from the display - * controller. These include PLL reference clock enable, PLL enable, - * and CRI clock selection, for example. - * - * Eeach channel also has two splines (also called data lanes), and - * each spline is made up of one Physical Access Coding Sub-Layer - * (PCS) block and two TX lanes. So each channel has two PCS blocks - * and four TX lanes. The TX lanes are used as DP lanes or TMDS - * data/clock pairs depending on the output type. - * - * Additionally the PHY also contains an AUX lane with AUX blocks - * for each channel. This is used for DP AUX communication, but - * this fact isn't really relevant for the driver since AUX is - * controlled from the display controller side. No DPIO registers - * need to be accessed during AUX communication, - * - * Generally on VLV/CHV the common lane corresponds to the pipe and - * the spline (PCS/TX) corresponds to the port. - * - * For dual channel PHY (VLV/CHV): - * - * pipe A == CMN/PLL/REF CH0 - * - * pipe B == CMN/PLL/REF CH1 - * - * port B == PCS/TX CH0 - * - * port C == PCS/TX CH1 - * - * This is especially important when we cross the streams - * ie. drive port B with pipe B, or port C with pipe A. - * - * For single channel PHY (CHV): - * - * pipe C == CMN/PLL/REF CH0 - * - * port D == PCS/TX CH0 - * - * On BXT the entire PHY channel corresponds to the port. That means - * the PLL is also now associated with the port rather than the pipe, - * and so the clock needs to be routed to the appropriate transcoder. - * Port A PLL is directly connected to transcoder EDP and port B/C - * PLLs can be routed to any transcoder A/B/C. - * - * Note: DDI0 is digital port B, DD1 is digital port C, and DDI2 is - * digital port D (CHV) or port A (BXT). :: - * - * - * Dual channel PHY (VLV/CHV/BXT) - * --------------------------------- - * | CH0 | CH1 | - * | CMN/PLL/REF | CMN/PLL/REF | - * |---------------|---------------| Display PHY - * | PCS01 | PCS23 | PCS01 | PCS23 | - * |-------|-------|-------|-------| - * |TX0|TX1|TX2|TX3|TX0|TX1|TX2|TX3| - * --------------------------------- - * | DDI0 | DDI1 | DP/HDMI ports - * --------------------------------- - * - * Single channel PHY (CHV/BXT) - * ----------------- - * | CH0 | - * | CMN/PLL/REF | - * |---------------| Display PHY - * | PCS01 | PCS23 | - * |-------|-------| - * |TX0|TX1|TX2|TX3| - * ----------------- - * | DDI2 | DP/HDMI port - * ----------------- - */ +/* DPIO registers */ #define DPIO_DEVFN 0 #define DPIO_CTL _MMIO(VLV_DISPLAY_BASE + 0x2110) diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index edf0cfd..6806296 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -23,6 +23,97 @@ #include "intel_drv.h" +/** + * DOC: DPIO + * + * VLV, CHV and BXT have slightly peculiar display PHYs for driving DP/HDMI + * ports. DPIO is the name given to such a display PHY. These PHYs + * don't follow the standard programming model using direct MMIO + * registers, and instead their registers must be accessed trough IOSF + * sideband. VLV has one such PHY for driving ports B and C, and CHV + * adds another PHY for driving port D. Each PHY responds to specific + * IOSF-SB port. + * + * Each display PHY is made up of one or two channels. Each channel + * houses a common lane part which contains the PLL and other common + * logic. CH0 common lane also contains the IOSF-SB logic for the + * Common Register Interface (CRI) ie. the DPIO registers. CRI clock + * must be running when any DPIO registers are accessed. + * + * In addition to having their own registers, the PHYs are also + * controlled through some dedicated signals from the display + * controller. These include PLL reference clock enable, PLL enable, + * and CRI clock selection, for example. + * + * Eeach channel also has two splines (also called data lanes), and + * each spline is made up of one Physical Access Coding Sub-Layer + * (PCS) block and two TX lanes. So each channel has two PCS blocks + * and four TX lanes. The TX lanes are used as DP lanes or TMDS + * data/clock pairs depending on the output type. + * + * Additionally the PHY also contains an AUX lane with AUX blocks + * for each channel. This is used for DP AUX communication, but + * this fact isn't really relevant for the driver since AUX is + * controlled from the display controller side. No DPIO registers + * need to be accessed during AUX communication, + * + * Generally on VLV/CHV the common lane corresponds to the pipe and + * the spline (PCS/TX) corresponds to the port. + * + * For dual channel PHY (VLV/CHV): + * + * pipe A == CMN/PLL/REF CH0 + * + * pipe B == CMN/PLL/REF CH1 + * + * port B == PCS/TX CH0 + * + * port C == PCS/TX CH1 + * + * This is especially important when we cross the streams + * ie. drive port B with pipe B, or port C with pipe A. + * + * For single channel PHY (CHV): + * + * pipe C == CMN/PLL/REF CH0 + * + * port D == PCS/TX CH0 + * + * On BXT the entire PHY channel corresponds to the port. That means + * the PLL is also now associated with the port rather than the pipe, + * and so the clock needs to be routed to the appropriate transcoder. + * Port A PLL is directly connected to transcoder EDP and port B/C + * PLLs can be routed to any transcoder A/B/C. + * + * Note: DDI0 is digital port B, DD1 is digital port C, and DDI2 is + * digital port D (CHV) or port A (BXT). :: + * + * + * Dual channel PHY (VLV/CHV/BXT) + * --------------------------------- + * | CH0 | CH1 | + * | CMN/PLL/REF | CMN/PLL/REF | + * |---------------|---------------| Display PHY + * | PCS01 | PCS23 | PCS01 | PCS23 | + * |-------|-------|-------|-------| + * |TX0|TX1|TX2|TX3|TX0|TX1|TX2|TX3| + * --------------------------------- + * | DDI0 | DDI1 | DP/HDMI ports + * --------------------------------- + * + * Single channel PHY (CHV/BXT) + * ----------------- + * | CH0 | + * | CMN/PLL/REF | + * |---------------| Display PHY + * | PCS01 | PCS23 | + * |-------|-------| + * |TX0|TX1|TX2|TX3| + * ----------------- + * | DDI2 | DP/HDMI port + * ----------------- + */ + bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { -- cgit v1.1 From b6e08203cc1f453d1807df38b5b95b93853cebd9 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:19 +0300 Subject: drm/i915: Move broxton vswing sequence to intel_dpio_phy.c The vswing sequence is related to the DPIO phy, so move it closer to the rest of DPIO phy related code. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/59aa5c85a115c5cbed81e793f20cd7b9f8de694b.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/intel_ddi.c | 38 +++++----------------------------- drivers/gpu/drm/i915/intel_dpio_phy.c | 39 +++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c642f10..7e79298 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3761,6 +3761,9 @@ u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); /* intel_dpio_phy.c */ +void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, + enum port port, u32 margin, u32 scale, + u32 enable, u32 deemphasis); void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy); void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy); bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 51ca8ea..938ac4d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1547,7 +1547,6 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, { const struct bxt_ddi_buf_trans *ddi_translations; u32 n_entries, i; - uint32_t val; if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { n_entries = ARRAY_SIZE(bxt_ddi_translations_edp); @@ -1576,38 +1575,11 @@ static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, } } - /* - * While we write to the group register to program all lanes at once we - * can read only lane registers and we pick lanes 0/1 for that. - */ - val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); - val &= ~(TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT); - I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); - - val = I915_READ(BXT_PORT_TX_DW2_LN0(port)); - val &= ~(MARGIN_000 | UNIQ_TRANS_SCALE); - val |= ddi_translations[level].margin << MARGIN_000_SHIFT | - ddi_translations[level].scale << UNIQ_TRANS_SCALE_SHIFT; - I915_WRITE(BXT_PORT_TX_DW2_GRP(port), val); - - val = I915_READ(BXT_PORT_TX_DW3_LN0(port)); - val &= ~SCALE_DCOMP_METHOD; - if (ddi_translations[level].enable) - val |= SCALE_DCOMP_METHOD; - - if ((val & UNIQUE_TRANGE_EN_METHOD) && !(val & SCALE_DCOMP_METHOD)) - DRM_ERROR("Disabled scaling while ouniqetrangenmethod was set"); - - I915_WRITE(BXT_PORT_TX_DW3_GRP(port), val); - - val = I915_READ(BXT_PORT_TX_DW4_LN0(port)); - val &= ~DE_EMPHASIS; - val |= ddi_translations[level].deemphasis << DEEMPH_SHIFT; - I915_WRITE(BXT_PORT_TX_DW4_GRP(port), val); - - val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); - val |= TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT; - I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); + bxt_ddi_phy_set_signal_level(dev_priv, port, + ddi_translations[level].margin, + ddi_translations[level].scale, + ddi_translations[level].enable, + ddi_translations[level].deemphasis); } static uint32_t translate_signal_level(int signal_levels) diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 6806296..2a18724 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -114,6 +114,45 @@ * ----------------- */ +void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, + enum port port, u32 margin, u32 scale, + u32 enable, u32 deemphasis) +{ + u32 val; + + /* + * While we write to the group register to program all lanes at once we + * can read only lane registers and we pick lanes 0/1 for that. + */ + val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); + val &= ~(TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT); + I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); + + val = I915_READ(BXT_PORT_TX_DW2_LN0(port)); + val &= ~(MARGIN_000 | UNIQ_TRANS_SCALE); + val |= margin << MARGIN_000_SHIFT | scale << UNIQ_TRANS_SCALE_SHIFT; + I915_WRITE(BXT_PORT_TX_DW2_GRP(port), val); + + val = I915_READ(BXT_PORT_TX_DW3_LN0(port)); + val &= ~SCALE_DCOMP_METHOD; + if (enable) + val |= SCALE_DCOMP_METHOD; + + if ((val & UNIQUE_TRANGE_EN_METHOD) && !(val & SCALE_DCOMP_METHOD)) + DRM_ERROR("Disabled scaling while ouniqetrangenmethod was set"); + + I915_WRITE(BXT_PORT_TX_DW3_GRP(port), val); + + val = I915_READ(BXT_PORT_TX_DW4_LN0(port)); + val &= ~DE_EMPHASIS; + val |= deemphasis << DEEMPH_SHIFT; + I915_WRITE(BXT_PORT_TX_DW4_GRP(port), val); + + val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); + val |= TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT; + I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); +} + bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { -- cgit v1.1 From 842d416654ebbcae86c32c0a354da9f649335410 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:20 +0300 Subject: drm/i915: Create a struct to hold information about the broxton phys Information about which phy is dual channel is hardcoded in the phy init sequence. Split that to a separate struct so the init sequence is more generic. v2: Restore mangled part that ended up in following patch. (Imre) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/9102f4c984044126057e4fdd1b91a615ff25fae6.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_reg.h | 9 +++-- drivers/gpu/drm/i915/intel_dpio_phy.c | 65 +++++++++++++++++++++++++++++------ 2 files changed, 62 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2f504f6..44683f9 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1307,8 +1307,13 @@ enum skl_disp_power_wells { #define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC, \ _PORT_CL1CM_DW30_A) -/* Defined for PHY0 only */ -#define BXT_PORT_CL2CM_DW6_BC _MMIO(0x6C358) +/* The spec defines this only for BXT PHY0, but lets assume that this + * would exist for PHY1 too if it had a second channel. + */ +#define _PORT_CL2CM_DW6_A 0x162358 +#define _PORT_CL2CM_DW6_BC 0x6C358 +#define BXT_PORT_CL2CM_DW6(phy) _BXT_PHY((phy), _PORT_CL2CM_DW6_BC, \ + _PORT_CL2CM_DW6_A) #define DW6_OLDO_DYN_PWR_DOWN_EN (1 << 28) /* BXT PHY Ref registers */ diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 2a18724..1b1fba1 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -114,6 +114,50 @@ * ----------------- */ +/** + * struct bxt_ddi_phy_info - Hold info for a broxton DDI phy + */ +struct bxt_ddi_phy_info { + /** + * @dual_channel: true if this phy has a second channel. + */ + bool dual_channel; + + /** + * @channel: struct containing per channel information. + */ + struct { + /** + * @port: which port maps to this channel. + */ + enum port port; + } channel[2]; +}; + +static const struct bxt_ddi_phy_info bxt_ddi_phy_info[] = { + [DPIO_PHY0] = { + .dual_channel = true, + + .channel = { + [DPIO_CH0] = { .port = PORT_B }, + [DPIO_CH1] = { .port = PORT_C }, + } + }, + [DPIO_PHY1] = { + .dual_channel = false, + + .channel = { + [DPIO_CH0] = { .port = PORT_A }, + } + }, +}; + +static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) +{ + return (phy_info->dual_channel * BIT(phy_info->channel[DPIO_CH1].port)) | + BIT(phy_info->channel[DPIO_CH0].port); +} + void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, enum port port, u32 margin, u32 scale, u32 enable, u32 deemphasis) @@ -156,6 +200,7 @@ void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, enum dpio_phy phy) { + const struct bxt_ddi_phy_info *phy_info = &bxt_ddi_phy_info[phy]; enum port port; if (!(I915_READ(BXT_P_CR_GT_DISP_PWRON) & GT_DISPLAY_POWER_ON(phy))) @@ -183,9 +228,7 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - for_each_port_masked(port, - phy == DPIO_PHY0 ? BIT(PORT_B) | BIT(PORT_C) : - BIT(PORT_A)) { + for_each_port_masked(port, bxt_phy_port_mask(phy_info)) { u32 tmp = I915_READ(BXT_PHY_CTL(port)); if (tmp & BXT_PHY_CMNLANE_POWERDOWN_ACK) { @@ -220,6 +263,7 @@ static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv, void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) { + const struct bxt_ddi_phy_info *phy_info = &bxt_ddi_phy_info[phy]; u32 val; if (bxt_ddi_phy_is_enabled(dev_priv, phy)) { @@ -272,10 +316,10 @@ void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) SUS_CLK_CONFIG; I915_WRITE(BXT_PORT_CL1CM_DW28(phy), val); - if (phy == DPIO_PHY0) { - val = I915_READ(BXT_PORT_CL2CM_DW6_BC); + if (phy_info->dual_channel) { + val = I915_READ(BXT_PORT_CL2CM_DW6(phy)); val |= DW6_OLDO_DYN_PWR_DOWN_EN; - I915_WRITE(BXT_PORT_CL2CM_DW6_BC, val); + I915_WRITE(BXT_PORT_CL2CM_DW6(phy), val); } val = I915_READ(BXT_PORT_CL1CM_DW30(phy)); @@ -290,7 +334,7 @@ void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) * FIXME: Clarify programming of the following, the register is * read-only with bit 6 fixed at 0 at least in stepping A. */ - if (phy == DPIO_PHY1) + if (!phy_info->dual_channel) val |= OCL2_LDOFUSE_PWR_DIS; I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val); @@ -363,6 +407,7 @@ __phy_reg_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy, bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy) { + const struct bxt_ddi_phy_info *phy_info = &bxt_ddi_phy_info[phy]; uint32_t mask; bool ok; @@ -388,10 +433,10 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, ok &= _CHK(BXT_PORT_CL1CM_DW28(phy), mask, mask, "BXT_PORT_CL1CM_DW28(%d)", phy); - if (phy == DPIO_PHY0) - ok &= _CHK(BXT_PORT_CL2CM_DW6_BC, + if (phy_info->dual_channel) + ok &= _CHK(BXT_PORT_CL2CM_DW6(phy), DW6_OLDO_DYN_PWR_DOWN_EN, DW6_OLDO_DYN_PWR_DOWN_EN, - "BXT_PORT_CL2CM_DW6_BC"); + "BXT_PORT_CL2CM_DW6(%d)", phy); /* * TODO: Verify BXT_PORT_CL1CM_DW30 bit OCL2_LDOFUSE_PWR_DIS, -- cgit v1.1 From e7583f7b1018a862b2c93fd50650181881b2a0e1 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Thu, 6 Oct 2016 19:22:21 +0300 Subject: drm/i915: Add location of the Rcomp resistor to bxt_ddi_phy_info Use struct bxt_ddi_phy_info to hold information of where the Rcomp resistor is located, instead of hard coding it in the init sequence. Note that this moves the enabling of the phy with the Rcomp resistor out of the power well enable code. That should be safe since bxt_ddi_phy_init() is called while the power domains lock is held, and that is the only way that function gets called, so there is no possibility of a concurrent phy enable caused by a power domain get call. v2: Replace comment about lock with lockdep_assert_held() (Imre) Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/62d209950ad48484564f3e793cf247cf62572a39.1475770848.git-series.ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dpio_phy.c | 72 +++++++++++++++++++++++++-------- drivers/gpu/drm/i915/intel_runtime_pm.c | 15 ------- 2 files changed, 55 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 1b1fba1..6711e3a 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -124,6 +124,13 @@ struct bxt_ddi_phy_info { bool dual_channel; /** + * @rcomp_phy: If -1, indicates this phy has its own rcomp resistor. + * Otherwise the GRC value will be copied from the phy indicated by + * this field. + */ + enum dpio_phy rcomp_phy; + + /** * @channel: struct containing per channel information. */ struct { @@ -137,6 +144,7 @@ struct bxt_ddi_phy_info { static const struct bxt_ddi_phy_info bxt_ddi_phy_info[] = { [DPIO_PHY0] = { .dual_channel = true, + .rcomp_phy = DPIO_PHY1, .channel = { [DPIO_CH0] = { .port = PORT_B }, @@ -145,6 +153,7 @@ static const struct bxt_ddi_phy_info bxt_ddi_phy_info[] = { }, [DPIO_PHY1] = { .dual_channel = false, + .rcomp_phy = -1, .channel = { [DPIO_CH0] = { .port = PORT_A }, @@ -214,9 +223,10 @@ bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, return false; } - if (phy == DPIO_PHY1 && - !(I915_READ(BXT_PORT_REF_DW3(DPIO_PHY1)) & GRC_DONE)) { - DRM_DEBUG_DRIVER("DDI PHY 1 powered, but GRC isn't done\n"); + if (phy_info->rcomp_phy == -1 && + !(I915_READ(BXT_PORT_REF_DW3(phy)) & GRC_DONE)) { + DRM_DEBUG_DRIVER("DDI PHY %d powered, but GRC isn't done\n", + phy); return false; } @@ -261,14 +271,15 @@ static void bxt_phy_wait_grc_done(struct drm_i915_private *dev_priv, DRM_ERROR("timeout waiting for PHY%d GRC\n", phy); } -void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) +static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, + enum dpio_phy phy) { const struct bxt_ddi_phy_info *phy_info = &bxt_ddi_phy_info[phy]; u32 val; if (bxt_ddi_phy_is_enabled(dev_priv, phy)) { /* Still read out the GRC value for state verification */ - if (phy == DPIO_PHY0) + if (phy_info->rcomp_phy != -1) dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, phy); if (bxt_ddi_phy_verify_state(dev_priv, phy)) { @@ -338,30 +349,32 @@ void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) val |= OCL2_LDOFUSE_PWR_DIS; I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val); - if (phy == DPIO_PHY0) { + if (phy_info->rcomp_phy != -1) { uint32_t grc_code; /* * PHY0 isn't connected to an RCOMP resistor so copy over * the corresponding calibrated value from PHY1, and disable * the automatic calibration on PHY0. */ - val = dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, DPIO_PHY1); + val = dev_priv->bxt_phy_grc = bxt_get_grc(dev_priv, + phy_info->rcomp_phy); grc_code = val << GRC_CODE_FAST_SHIFT | val << GRC_CODE_SLOW_SHIFT | val; - I915_WRITE(BXT_PORT_REF_DW6(DPIO_PHY0), grc_code); + I915_WRITE(BXT_PORT_REF_DW6(phy), grc_code); - val = I915_READ(BXT_PORT_REF_DW8(DPIO_PHY0)); + val = I915_READ(BXT_PORT_REF_DW8(phy)); val |= GRC_DIS | GRC_RDY_OVRD; - I915_WRITE(BXT_PORT_REF_DW8(DPIO_PHY0), val); + I915_WRITE(BXT_PORT_REF_DW8(phy), val); } val = I915_READ(BXT_PHY_CTL_FAMILY(phy)); val |= COMMON_RESET_DIS; I915_WRITE(BXT_PHY_CTL_FAMILY(phy), val); - if (phy == DPIO_PHY1) - bxt_phy_wait_grc_done(dev_priv, DPIO_PHY1); + if (phy_info->rcomp_phy == -1) + bxt_phy_wait_grc_done(dev_priv, phy); + } void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) @@ -377,6 +390,31 @@ void bxt_ddi_phy_uninit(struct drm_i915_private *dev_priv, enum dpio_phy phy) I915_WRITE(BXT_P_CR_GT_DISP_PWRON, val); } +void bxt_ddi_phy_init(struct drm_i915_private *dev_priv, enum dpio_phy phy) +{ + const struct bxt_ddi_phy_info *phy_info = &bxt_ddi_phy_info[phy]; + enum dpio_phy rcomp_phy = phy_info->rcomp_phy; + bool was_enabled; + + lockdep_assert_held(&dev_priv->power_domains.lock); + + if (rcomp_phy != -1) { + was_enabled = bxt_ddi_phy_is_enabled(dev_priv, rcomp_phy); + + /* + * We need to copy the GRC calibration value from rcomp_phy, + * so make sure it's powered up. + */ + if (!was_enabled) + _bxt_ddi_phy_init(dev_priv, rcomp_phy); + } + + _bxt_ddi_phy_init(dev_priv, phy); + + if (rcomp_phy != -1 && !was_enabled) + bxt_ddi_phy_uninit(dev_priv, phy_info->rcomp_phy); +} + static bool __printf(6, 7) __phy_reg_verify_state(struct drm_i915_private *dev_priv, enum dpio_phy phy, i915_reg_t reg, u32 mask, u32 expected, @@ -443,7 +481,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, * at least on stepping A this bit is read-only and fixed at 0. */ - if (phy == DPIO_PHY0) { + if (phy_info->rcomp_phy != -1) { u32 grc_code = dev_priv->bxt_phy_grc; grc_code = grc_code << GRC_CODE_FAST_SHIFT | @@ -451,12 +489,12 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, grc_code; mask = GRC_CODE_FAST_MASK | GRC_CODE_SLOW_MASK | GRC_CODE_NOM_MASK; - ok &= _CHK(BXT_PORT_REF_DW6(DPIO_PHY0), mask, grc_code, - "BXT_PORT_REF_DW6(%d)", DPIO_PHY0); + ok &= _CHK(BXT_PORT_REF_DW6(phy), mask, grc_code, + "BXT_PORT_REF_DW6(%d)", phy); mask = GRC_DIS | GRC_RDY_OVRD; - ok &= _CHK(BXT_PORT_REF_DW8(DPIO_PHY0), mask, mask, - "BXT_PORT_REF_DW8(%d)", DPIO_PHY0); + ok &= _CHK(BXT_PORT_REF_DW8(phy), mask, mask, + "BXT_PORT_REF_DW8(%d)", phy); } return ok; diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 5fd76ee..95034a0 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -848,22 +848,7 @@ static void skl_power_well_disable(struct drm_i915_private *dev_priv, static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { - enum skl_disp_power_wells power_well_id = power_well->id; - struct i915_power_well *cmn_a_well = NULL; - - if (power_well_id == BXT_DPIO_CMN_BC) { - /* - * We need to copy the GRC calibration value from the eDP PHY, - * so make sure it's powered up. - */ - cmn_a_well = lookup_power_well(dev_priv, BXT_DPIO_CMN_A); - intel_power_well_get(dev_priv, cmn_a_well); - } - bxt_ddi_phy_init(dev_priv, power_well->data); - - if (cmn_a_well) - intel_power_well_put(dev_priv, cmn_a_well); } static void bxt_dpio_cmn_power_well_disable(struct drm_i915_private *dev_priv, -- cgit v1.1 From ed37892e6df2a3caae4928583c211970a46375a6 Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 19 Oct 2016 10:59:00 +0300 Subject: drm/i915: Address broxton phy registers based on phy and channel number The port registers related to the phys in broxton map to different channels and specific phys. Make that mapping explicit. v2: Pass enum dpio_phy to macros instead of mmio base. (Imre) v3: Fix typo in macros. (Imre) v4: Also change variables from u32 to enum dpio_phy. (Imre) Remove leftovers from previous version. (Imre) v5: Actually git add the changes. Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1476863940-6019-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_reg.h | 141 ++++++++++++++++++---------------- drivers/gpu/drm/i915/intel_dpio_phy.c | 68 ++++++++++++---- drivers/gpu/drm/i915/intel_dpll_mgr.c | 84 +++++++++++--------- 4 files changed, 175 insertions(+), 120 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7e79298..a8f006e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3761,6 +3761,8 @@ u32 vlv_flisdsi_read(struct drm_i915_private *dev_priv, u32 reg); void vlv_flisdsi_write(struct drm_i915_private *dev_priv, u32 reg, u32 val); /* intel_dpio_phy.c */ +void bxt_port_to_phy_channel(enum port port, + enum dpio_phy *phy, enum dpio_channel *ch); void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, enum port port, u32 margin, u32 scale, u32 enable, u32 deemphasis); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 44683f9..3361d7f 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1186,7 +1186,19 @@ enum skl_disp_power_wells { #define DPIO_UPAR_SHIFT 30 /* BXT PHY registers */ -#define _BXT_PHY(phy, a, b) _MMIO_PIPE((phy), (a), (b)) +#define _BXT_PHY0_BASE 0x6C000 +#define _BXT_PHY1_BASE 0x162000 +#define BXT_PHY_BASE(phy) _PIPE((phy), _BXT_PHY0_BASE, \ + _BXT_PHY1_BASE) + +#define _BXT_PHY(phy, reg) \ + _MMIO(BXT_PHY_BASE(phy) - _BXT_PHY0_BASE + (reg)) + +#define _BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1) \ + (BXT_PHY_BASE(phy) + _PIPE((ch), (reg_ch0) - _BXT_PHY0_BASE, \ + (reg_ch1) - _BXT_PHY0_BASE)) +#define _MMIO_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1) \ + _MMIO(_BXT_PHY_CH(phy, ch, reg_ch0, reg_ch1)) #define BXT_P_CR_GT_DISP_PWRON _MMIO(0x138090) #define GT_DISPLAY_POWER_ON(phy) (1 << (phy)) @@ -1203,8 +1215,8 @@ enum skl_disp_power_wells { #define _PHY_CTL_FAMILY_EDP 0x64C80 #define _PHY_CTL_FAMILY_DDI 0x64C90 #define COMMON_RESET_DIS (1 << 31) -#define BXT_PHY_CTL_FAMILY(phy) _BXT_PHY((phy), _PHY_CTL_FAMILY_DDI, \ - _PHY_CTL_FAMILY_EDP) +#define BXT_PHY_CTL_FAMILY(phy) _MMIO_PIPE((phy), _PHY_CTL_FAMILY_DDI, \ + _PHY_CTL_FAMILY_EDP) /* BXT PHY PLL registers */ #define _PORT_PLL_A 0x46074 @@ -1224,18 +1236,18 @@ enum skl_disp_power_wells { #define PORT_PLL_P2_SHIFT 8 #define PORT_PLL_P2_MASK (0x1f << PORT_PLL_P2_SHIFT) #define PORT_PLL_P2(x) ((x) << PORT_PLL_P2_SHIFT) -#define BXT_PORT_PLL_EBB_0(port) _MMIO_PORT3(port, _PORT_PLL_EBB_0_A, \ - _PORT_PLL_EBB_0_B, \ - _PORT_PLL_EBB_0_C) +#define BXT_PORT_PLL_EBB_0(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PLL_EBB_0_B, \ + _PORT_PLL_EBB_0_C) #define _PORT_PLL_EBB_4_A 0x162038 #define _PORT_PLL_EBB_4_B 0x6C038 #define _PORT_PLL_EBB_4_C 0x6C344 #define PORT_PLL_10BIT_CLK_ENABLE (1 << 13) #define PORT_PLL_RECALIBRATE (1 << 14) -#define BXT_PORT_PLL_EBB_4(port) _MMIO_PORT3(port, _PORT_PLL_EBB_4_A, \ - _PORT_PLL_EBB_4_B, \ - _PORT_PLL_EBB_4_C) +#define BXT_PORT_PLL_EBB_4(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PLL_EBB_4_B, \ + _PORT_PLL_EBB_4_C) #define _PORT_PLL_0_A 0x162100 #define _PORT_PLL_0_B 0x6C100 @@ -1266,62 +1278,56 @@ enum skl_disp_power_wells { #define PORT_PLL_DCO_AMP_DEFAULT 15 #define PORT_PLL_DCO_AMP_MASK 0x3c00 #define PORT_PLL_DCO_AMP(x) ((x)<<10) -#define _PORT_PLL_BASE(port) _PORT3(port, _PORT_PLL_0_A, \ - _PORT_PLL_0_B, \ - _PORT_PLL_0_C) -#define BXT_PORT_PLL(port, idx) _MMIO(_PORT_PLL_BASE(port) + (idx) * 4) +#define _PORT_PLL_BASE(phy, ch) _BXT_PHY_CH(phy, ch, \ + _PORT_PLL_0_B, \ + _PORT_PLL_0_C) +#define BXT_PORT_PLL(phy, ch, idx) _MMIO(_PORT_PLL_BASE(phy, ch) + \ + (idx) * 4) /* BXT PHY common lane registers */ #define _PORT_CL1CM_DW0_A 0x162000 #define _PORT_CL1CM_DW0_BC 0x6C000 #define PHY_POWER_GOOD (1 << 16) #define PHY_RESERVED (1 << 7) -#define BXT_PORT_CL1CM_DW0(phy) _BXT_PHY((phy), _PORT_CL1CM_DW0_BC, \ - _PORT_CL1CM_DW0_A) +#define BXT_PORT_CL1CM_DW0(phy) _BXT_PHY((phy), _PORT_CL1CM_DW0_BC) #define _PORT_CL1CM_DW9_A 0x162024 #define _PORT_CL1CM_DW9_BC 0x6C024 #define IREF0RC_OFFSET_SHIFT 8 #define IREF0RC_OFFSET_MASK (0xFF << IREF0RC_OFFSET_SHIFT) -#define BXT_PORT_CL1CM_DW9(phy) _BXT_PHY((phy), _PORT_CL1CM_DW9_BC, \ - _PORT_CL1CM_DW9_A) +#define BXT_PORT_CL1CM_DW9(phy) _BXT_PHY((phy), _PORT_CL1CM_DW9_BC) #define _PORT_CL1CM_DW10_A 0x162028 #define _PORT_CL1CM_DW10_BC 0x6C028 #define IREF1RC_OFFSET_SHIFT 8 #define IREF1RC_OFFSET_MASK (0xFF << IREF1RC_OFFSET_SHIFT) -#define BXT_PORT_CL1CM_DW10(phy) _BXT_PHY((phy), _PORT_CL1CM_DW10_BC, \ - _PORT_CL1CM_DW10_A) +#define BXT_PORT_CL1CM_DW10(phy) _BXT_PHY((phy), _PORT_CL1CM_DW10_BC) #define _PORT_CL1CM_DW28_A 0x162070 #define _PORT_CL1CM_DW28_BC 0x6C070 #define OCL1_POWER_DOWN_EN (1 << 23) #define DW28_OLDO_DYN_PWR_DOWN_EN (1 << 22) #define SUS_CLK_CONFIG 0x3 -#define BXT_PORT_CL1CM_DW28(phy) _BXT_PHY((phy), _PORT_CL1CM_DW28_BC, \ - _PORT_CL1CM_DW28_A) +#define BXT_PORT_CL1CM_DW28(phy) _BXT_PHY((phy), _PORT_CL1CM_DW28_BC) #define _PORT_CL1CM_DW30_A 0x162078 #define _PORT_CL1CM_DW30_BC 0x6C078 #define OCL2_LDOFUSE_PWR_DIS (1 << 6) -#define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC, \ - _PORT_CL1CM_DW30_A) +#define BXT_PORT_CL1CM_DW30(phy) _BXT_PHY((phy), _PORT_CL1CM_DW30_BC) /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ #define _PORT_CL2CM_DW6_A 0x162358 #define _PORT_CL2CM_DW6_BC 0x6C358 -#define BXT_PORT_CL2CM_DW6(phy) _BXT_PHY((phy), _PORT_CL2CM_DW6_BC, \ - _PORT_CL2CM_DW6_A) +#define BXT_PORT_CL2CM_DW6(phy) _BXT_PHY((phy), _PORT_CL2CM_DW6_BC) #define DW6_OLDO_DYN_PWR_DOWN_EN (1 << 28) /* BXT PHY Ref registers */ #define _PORT_REF_DW3_A 0x16218C #define _PORT_REF_DW3_BC 0x6C18C #define GRC_DONE (1 << 22) -#define BXT_PORT_REF_DW3(phy) _BXT_PHY((phy), _PORT_REF_DW3_BC, \ - _PORT_REF_DW3_A) +#define BXT_PORT_REF_DW3(phy) _BXT_PHY((phy), _PORT_REF_DW3_BC) #define _PORT_REF_DW6_A 0x162198 #define _PORT_REF_DW6_BC 0x6C198 @@ -1332,15 +1338,13 @@ enum skl_disp_power_wells { #define GRC_CODE_SLOW_SHIFT 8 #define GRC_CODE_SLOW_MASK (0xFF << GRC_CODE_SLOW_SHIFT) #define GRC_CODE_NOM_MASK 0xFF -#define BXT_PORT_REF_DW6(phy) _BXT_PHY((phy), _PORT_REF_DW6_BC, \ - _PORT_REF_DW6_A) +#define BXT_PORT_REF_DW6(phy) _BXT_PHY((phy), _PORT_REF_DW6_BC) #define _PORT_REF_DW8_A 0x1621A0 #define _PORT_REF_DW8_BC 0x6C1A0 #define GRC_DIS (1 << 15) #define GRC_RDY_OVRD (1 << 1) -#define BXT_PORT_REF_DW8(phy) _BXT_PHY((phy), _PORT_REF_DW8_BC, \ - _PORT_REF_DW8_A) +#define BXT_PORT_REF_DW8(phy) _BXT_PHY((phy), _PORT_REF_DW8_BC) /* BXT PHY PCS registers */ #define _PORT_PCS_DW10_LN01_A 0x162428 @@ -1349,12 +1353,13 @@ enum skl_disp_power_wells { #define _PORT_PCS_DW10_GRP_A 0x162C28 #define _PORT_PCS_DW10_GRP_B 0x6CC28 #define _PORT_PCS_DW10_GRP_C 0x6CE28 -#define BXT_PORT_PCS_DW10_LN01(port) _MMIO_PORT3(port, _PORT_PCS_DW10_LN01_A, \ - _PORT_PCS_DW10_LN01_B, \ - _PORT_PCS_DW10_LN01_C) -#define BXT_PORT_PCS_DW10_GRP(port) _MMIO_PORT3(port, _PORT_PCS_DW10_GRP_A, \ - _PORT_PCS_DW10_GRP_B, \ - _PORT_PCS_DW10_GRP_C) +#define BXT_PORT_PCS_DW10_LN01(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PCS_DW10_LN01_B, \ + _PORT_PCS_DW10_LN01_C) +#define BXT_PORT_PCS_DW10_GRP(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PCS_DW10_GRP_B, \ + _PORT_PCS_DW10_GRP_C) + #define TX2_SWING_CALC_INIT (1 << 31) #define TX1_SWING_CALC_INIT (1 << 30) @@ -1369,15 +1374,15 @@ enum skl_disp_power_wells { #define _PORT_PCS_DW12_GRP_C 0x6CE30 #define LANESTAGGER_STRAP_OVRD (1 << 6) #define LANE_STAGGER_MASK 0x1F -#define BXT_PORT_PCS_DW12_LN01(port) _MMIO_PORT3(port, _PORT_PCS_DW12_LN01_A, \ - _PORT_PCS_DW12_LN01_B, \ - _PORT_PCS_DW12_LN01_C) -#define BXT_PORT_PCS_DW12_LN23(port) _MMIO_PORT3(port, _PORT_PCS_DW12_LN23_A, \ - _PORT_PCS_DW12_LN23_B, \ - _PORT_PCS_DW12_LN23_C) -#define BXT_PORT_PCS_DW12_GRP(port) _MMIO_PORT3(port, _PORT_PCS_DW12_GRP_A, \ - _PORT_PCS_DW12_GRP_B, \ - _PORT_PCS_DW12_GRP_C) +#define BXT_PORT_PCS_DW12_LN01(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PCS_DW12_LN01_B, \ + _PORT_PCS_DW12_LN01_C) +#define BXT_PORT_PCS_DW12_LN23(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PCS_DW12_LN23_B, \ + _PORT_PCS_DW12_LN23_C) +#define BXT_PORT_PCS_DW12_GRP(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_PCS_DW12_GRP_B, \ + _PORT_PCS_DW12_GRP_C) /* BXT PHY TX registers */ #define _BXT_LANE_OFFSET(lane) (((lane) >> 1) * 0x200 + \ @@ -1389,12 +1394,12 @@ enum skl_disp_power_wells { #define _PORT_TX_DW2_GRP_A 0x162D08 #define _PORT_TX_DW2_GRP_B 0x6CD08 #define _PORT_TX_DW2_GRP_C 0x6CF08 -#define BXT_PORT_TX_DW2_GRP(port) _MMIO_PORT3(port, _PORT_TX_DW2_GRP_A, \ - _PORT_TX_DW2_GRP_B, \ - _PORT_TX_DW2_GRP_C) -#define BXT_PORT_TX_DW2_LN0(port) _MMIO_PORT3(port, _PORT_TX_DW2_LN0_A, \ - _PORT_TX_DW2_LN0_B, \ - _PORT_TX_DW2_LN0_C) +#define BXT_PORT_TX_DW2_LN0(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW2_LN0_B, \ + _PORT_TX_DW2_LN0_C) +#define BXT_PORT_TX_DW2_GRP(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW2_GRP_B, \ + _PORT_TX_DW2_GRP_C) #define MARGIN_000_SHIFT 16 #define MARGIN_000 (0xFF << MARGIN_000_SHIFT) #define UNIQ_TRANS_SCALE_SHIFT 8 @@ -1406,12 +1411,12 @@ enum skl_disp_power_wells { #define _PORT_TX_DW3_GRP_A 0x162D0C #define _PORT_TX_DW3_GRP_B 0x6CD0C #define _PORT_TX_DW3_GRP_C 0x6CF0C -#define BXT_PORT_TX_DW3_GRP(port) _MMIO_PORT3(port, _PORT_TX_DW3_GRP_A, \ - _PORT_TX_DW3_GRP_B, \ - _PORT_TX_DW3_GRP_C) -#define BXT_PORT_TX_DW3_LN0(port) _MMIO_PORT3(port, _PORT_TX_DW3_LN0_A, \ - _PORT_TX_DW3_LN0_B, \ - _PORT_TX_DW3_LN0_C) +#define BXT_PORT_TX_DW3_LN0(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW3_LN0_B, \ + _PORT_TX_DW3_LN0_C) +#define BXT_PORT_TX_DW3_GRP(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW3_GRP_B, \ + _PORT_TX_DW3_GRP_C) #define SCALE_DCOMP_METHOD (1 << 26) #define UNIQUE_TRANGE_EN_METHOD (1 << 27) @@ -1421,12 +1426,12 @@ enum skl_disp_power_wells { #define _PORT_TX_DW4_GRP_A 0x162D10 #define _PORT_TX_DW4_GRP_B 0x6CD10 #define _PORT_TX_DW4_GRP_C 0x6CF10 -#define BXT_PORT_TX_DW4_LN0(port) _MMIO_PORT3(port, _PORT_TX_DW4_LN0_A, \ - _PORT_TX_DW4_LN0_B, \ - _PORT_TX_DW4_LN0_C) -#define BXT_PORT_TX_DW4_GRP(port) _MMIO_PORT3(port, _PORT_TX_DW4_GRP_A, \ - _PORT_TX_DW4_GRP_B, \ - _PORT_TX_DW4_GRP_C) +#define BXT_PORT_TX_DW4_LN0(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW4_LN0_B, \ + _PORT_TX_DW4_LN0_C) +#define BXT_PORT_TX_DW4_GRP(phy, ch) _MMIO_BXT_PHY_CH(phy, ch, \ + _PORT_TX_DW4_GRP_B, \ + _PORT_TX_DW4_GRP_C) #define DEEMPH_SHIFT 24 #define DE_EMPHASIS (0xFF << DEEMPH_SHIFT) @@ -1435,10 +1440,10 @@ enum skl_disp_power_wells { #define _PORT_TX_DW14_LN0_C 0x6C938 #define LATENCY_OPTIM_SHIFT 30 #define LATENCY_OPTIM (1 << LATENCY_OPTIM_SHIFT) -#define BXT_PORT_TX_DW14_LN(port, lane) _MMIO(_PORT3((port), _PORT_TX_DW14_LN0_A, \ - _PORT_TX_DW14_LN0_B, \ - _PORT_TX_DW14_LN0_C) + \ - _BXT_LANE_OFFSET(lane)) +#define BXT_PORT_TX_DW14_LN(phy, ch, lane) \ + _MMIO(_BXT_PHY_CH(phy, ch, _PORT_TX_DW14_LN0_B, \ + _PORT_TX_DW14_LN0_C) + \ + _BXT_LANE_OFFSET(lane)) /* UAIMI scratch pad register 1 */ #define UAIMI_SPR1 _MMIO(0x4F074) diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 6711e3a..4a6164a 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -167,26 +167,58 @@ static u32 bxt_phy_port_mask(const struct bxt_ddi_phy_info *phy_info) BIT(phy_info->channel[DPIO_CH0].port); } +void bxt_port_to_phy_channel(enum port port, + enum dpio_phy *phy, enum dpio_channel *ch) +{ + const struct bxt_ddi_phy_info *phy_info; + int i; + + for (i = 0; i < ARRAY_SIZE(bxt_ddi_phy_info); i++) { + phy_info = &bxt_ddi_phy_info[i]; + + if (port == phy_info->channel[DPIO_CH0].port) { + *phy = i; + *ch = DPIO_CH0; + return; + } + + if (phy_info->dual_channel && + port == phy_info->channel[DPIO_CH1].port) { + *phy = i; + *ch = DPIO_CH1; + return; + } + } + + WARN(1, "PHY not found for PORT %c", port_name(port)); + *phy = DPIO_PHY0; + *ch = DPIO_CH0; +} + void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, enum port port, u32 margin, u32 scale, u32 enable, u32 deemphasis) { u32 val; + enum dpio_phy phy; + enum dpio_channel ch; + + bxt_port_to_phy_channel(port, &phy, &ch); /* * While we write to the group register to program all lanes at once we * can read only lane registers and we pick lanes 0/1 for that. */ - val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); + val = I915_READ(BXT_PORT_PCS_DW10_LN01(phy, ch)); val &= ~(TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT); - I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); + I915_WRITE(BXT_PORT_PCS_DW10_GRP(phy, ch), val); - val = I915_READ(BXT_PORT_TX_DW2_LN0(port)); + val = I915_READ(BXT_PORT_TX_DW2_LN0(phy, ch)); val &= ~(MARGIN_000 | UNIQ_TRANS_SCALE); val |= margin << MARGIN_000_SHIFT | scale << UNIQ_TRANS_SCALE_SHIFT; - I915_WRITE(BXT_PORT_TX_DW2_GRP(port), val); + I915_WRITE(BXT_PORT_TX_DW2_GRP(phy, ch), val); - val = I915_READ(BXT_PORT_TX_DW3_LN0(port)); + val = I915_READ(BXT_PORT_TX_DW3_LN0(phy, ch)); val &= ~SCALE_DCOMP_METHOD; if (enable) val |= SCALE_DCOMP_METHOD; @@ -194,16 +226,16 @@ void bxt_ddi_phy_set_signal_level(struct drm_i915_private *dev_priv, if ((val & UNIQUE_TRANGE_EN_METHOD) && !(val & SCALE_DCOMP_METHOD)) DRM_ERROR("Disabled scaling while ouniqetrangenmethod was set"); - I915_WRITE(BXT_PORT_TX_DW3_GRP(port), val); + I915_WRITE(BXT_PORT_TX_DW3_GRP(phy, ch), val); - val = I915_READ(BXT_PORT_TX_DW4_LN0(port)); + val = I915_READ(BXT_PORT_TX_DW4_LN0(phy, ch)); val &= ~DE_EMPHASIS; val |= deemphasis << DEEMPH_SHIFT; - I915_WRITE(BXT_PORT_TX_DW4_GRP(port), val); + I915_WRITE(BXT_PORT_TX_DW4_GRP(phy, ch), val); - val = I915_READ(BXT_PORT_PCS_DW10_LN01(port)); + val = I915_READ(BXT_PORT_PCS_DW10_LN01(phy, ch)); val |= TX2_SWING_CALC_INIT | TX1_SWING_CALC_INIT; - I915_WRITE(BXT_PORT_PCS_DW10_GRP(port), val); + I915_WRITE(BXT_PORT_PCS_DW10_GRP(phy, ch), val); } bool bxt_ddi_phy_is_enabled(struct drm_i915_private *dev_priv, @@ -490,7 +522,7 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, mask = GRC_CODE_FAST_MASK | GRC_CODE_SLOW_MASK | GRC_CODE_NOM_MASK; ok &= _CHK(BXT_PORT_REF_DW6(phy), mask, grc_code, - "BXT_PORT_REF_DW6(%d)", phy); + "BXT_PORT_REF_DW6(%d)", phy); mask = GRC_DIS | GRC_RDY_OVRD; ok &= _CHK(BXT_PORT_REF_DW8(phy), mask, mask, @@ -525,10 +557,14 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); enum port port = dport->port; + enum dpio_phy phy; + enum dpio_channel ch; int lane; + bxt_port_to_phy_channel(port, &phy, &ch); + for (lane = 0; lane < 4; lane++) { - u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); + u32 val = I915_READ(BXT_PORT_TX_DW14_LN(phy, ch, lane)); /* * Note that on CHV this flag is called UPAR, but has @@ -538,7 +574,7 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder, if (lane_lat_optim_mask & BIT(lane)) val |= LATENCY_OPTIM; - I915_WRITE(BXT_PORT_TX_DW14_LN(port, lane), val); + I915_WRITE(BXT_PORT_TX_DW14_LN(phy, ch, lane), val); } } @@ -548,12 +584,16 @@ bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder) struct intel_digital_port *dport = enc_to_dig_port(&encoder->base); struct drm_i915_private *dev_priv = to_i915(dport->base.base.dev); enum port port = dport->port; + enum dpio_phy phy; + enum dpio_channel ch; int lane; uint8_t mask; + bxt_port_to_phy_channel(port, &phy, &ch); + mask = 0; for (lane = 0; lane < 4; lane++) { - u32 val = I915_READ(BXT_PORT_TX_DW14_LN(port, lane)); + u32 val = I915_READ(BXT_PORT_TX_DW14_LN(phy, ch, lane)); if (val & LATENCY_OPTIM) mask |= BIT(lane); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 605d0b5..21853a1 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -1371,6 +1371,10 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, { uint32_t temp; enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum dpio_phy phy; + enum dpio_channel ch; + + bxt_port_to_phy_channel(port, &phy, &ch); /* Non-SSC reference */ temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); @@ -1378,72 +1382,72 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, I915_WRITE(BXT_PORT_PLL_ENABLE(port), temp); /* Disable 10 bit clock */ - temp = I915_READ(BXT_PORT_PLL_EBB_4(port)); + temp = I915_READ(BXT_PORT_PLL_EBB_4(phy, ch)); temp &= ~PORT_PLL_10BIT_CLK_ENABLE; - I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp); + I915_WRITE(BXT_PORT_PLL_EBB_4(phy, ch), temp); /* Write P1 & P2 */ - temp = I915_READ(BXT_PORT_PLL_EBB_0(port)); + temp = I915_READ(BXT_PORT_PLL_EBB_0(phy, ch)); temp &= ~(PORT_PLL_P1_MASK | PORT_PLL_P2_MASK); temp |= pll->config.hw_state.ebb0; - I915_WRITE(BXT_PORT_PLL_EBB_0(port), temp); + I915_WRITE(BXT_PORT_PLL_EBB_0(phy, ch), temp); /* Write M2 integer */ - temp = I915_READ(BXT_PORT_PLL(port, 0)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 0)); temp &= ~PORT_PLL_M2_MASK; temp |= pll->config.hw_state.pll0; - I915_WRITE(BXT_PORT_PLL(port, 0), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 0), temp); /* Write N */ - temp = I915_READ(BXT_PORT_PLL(port, 1)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 1)); temp &= ~PORT_PLL_N_MASK; temp |= pll->config.hw_state.pll1; - I915_WRITE(BXT_PORT_PLL(port, 1), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 1), temp); /* Write M2 fraction */ - temp = I915_READ(BXT_PORT_PLL(port, 2)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 2)); temp &= ~PORT_PLL_M2_FRAC_MASK; temp |= pll->config.hw_state.pll2; - I915_WRITE(BXT_PORT_PLL(port, 2), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 2), temp); /* Write M2 fraction enable */ - temp = I915_READ(BXT_PORT_PLL(port, 3)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 3)); temp &= ~PORT_PLL_M2_FRAC_ENABLE; temp |= pll->config.hw_state.pll3; - I915_WRITE(BXT_PORT_PLL(port, 3), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 3), temp); /* Write coeff */ - temp = I915_READ(BXT_PORT_PLL(port, 6)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 6)); temp &= ~PORT_PLL_PROP_COEFF_MASK; temp &= ~PORT_PLL_INT_COEFF_MASK; temp &= ~PORT_PLL_GAIN_CTL_MASK; temp |= pll->config.hw_state.pll6; - I915_WRITE(BXT_PORT_PLL(port, 6), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 6), temp); /* Write calibration val */ - temp = I915_READ(BXT_PORT_PLL(port, 8)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 8)); temp &= ~PORT_PLL_TARGET_CNT_MASK; temp |= pll->config.hw_state.pll8; - I915_WRITE(BXT_PORT_PLL(port, 8), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 8), temp); - temp = I915_READ(BXT_PORT_PLL(port, 9)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 9)); temp &= ~PORT_PLL_LOCK_THRESHOLD_MASK; temp |= pll->config.hw_state.pll9; - I915_WRITE(BXT_PORT_PLL(port, 9), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 9), temp); - temp = I915_READ(BXT_PORT_PLL(port, 10)); + temp = I915_READ(BXT_PORT_PLL(phy, ch, 10)); temp &= ~PORT_PLL_DCO_AMP_OVR_EN_H; temp &= ~PORT_PLL_DCO_AMP_MASK; temp |= pll->config.hw_state.pll10; - I915_WRITE(BXT_PORT_PLL(port, 10), temp); + I915_WRITE(BXT_PORT_PLL(phy, ch, 10), temp); /* Recalibrate with new settings */ - temp = I915_READ(BXT_PORT_PLL_EBB_4(port)); + temp = I915_READ(BXT_PORT_PLL_EBB_4(phy, ch)); temp |= PORT_PLL_RECALIBRATE; - I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp); + I915_WRITE(BXT_PORT_PLL_EBB_4(phy, ch), temp); temp &= ~PORT_PLL_10BIT_CLK_ENABLE; temp |= pll->config.hw_state.ebb4; - I915_WRITE(BXT_PORT_PLL_EBB_4(port), temp); + I915_WRITE(BXT_PORT_PLL_EBB_4(phy, ch), temp); /* Enable PLL */ temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); @@ -1459,11 +1463,11 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, * While we write to the group register to program all lanes at once we * can read only lane registers and we pick lanes 0/1 for that. */ - temp = I915_READ(BXT_PORT_PCS_DW12_LN01(port)); + temp = I915_READ(BXT_PORT_PCS_DW12_LN01(phy, ch)); temp &= ~LANE_STAGGER_MASK; temp &= ~LANESTAGGER_STRAP_OVRD; temp |= pll->config.hw_state.pcsdw12; - I915_WRITE(BXT_PORT_PCS_DW12_GRP(port), temp); + I915_WRITE(BXT_PORT_PCS_DW12_GRP(phy, ch), temp); } static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv, @@ -1485,6 +1489,10 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ uint32_t val; bool ret; + enum dpio_phy phy; + enum dpio_channel ch; + + bxt_port_to_phy_channel(port, &phy, &ch); if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) return false; @@ -1495,36 +1503,36 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, if (!(val & PORT_PLL_ENABLE)) goto out; - hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(port)); + hw_state->ebb0 = I915_READ(BXT_PORT_PLL_EBB_0(phy, ch)); hw_state->ebb0 &= PORT_PLL_P1_MASK | PORT_PLL_P2_MASK; - hw_state->ebb4 = I915_READ(BXT_PORT_PLL_EBB_4(port)); + hw_state->ebb4 = I915_READ(BXT_PORT_PLL_EBB_4(phy, ch)); hw_state->ebb4 &= PORT_PLL_10BIT_CLK_ENABLE; - hw_state->pll0 = I915_READ(BXT_PORT_PLL(port, 0)); + hw_state->pll0 = I915_READ(BXT_PORT_PLL(phy, ch, 0)); hw_state->pll0 &= PORT_PLL_M2_MASK; - hw_state->pll1 = I915_READ(BXT_PORT_PLL(port, 1)); + hw_state->pll1 = I915_READ(BXT_PORT_PLL(phy, ch, 1)); hw_state->pll1 &= PORT_PLL_N_MASK; - hw_state->pll2 = I915_READ(BXT_PORT_PLL(port, 2)); + hw_state->pll2 = I915_READ(BXT_PORT_PLL(phy, ch, 2)); hw_state->pll2 &= PORT_PLL_M2_FRAC_MASK; - hw_state->pll3 = I915_READ(BXT_PORT_PLL(port, 3)); + hw_state->pll3 = I915_READ(BXT_PORT_PLL(phy, ch, 3)); hw_state->pll3 &= PORT_PLL_M2_FRAC_ENABLE; - hw_state->pll6 = I915_READ(BXT_PORT_PLL(port, 6)); + hw_state->pll6 = I915_READ(BXT_PORT_PLL(phy, ch, 6)); hw_state->pll6 &= PORT_PLL_PROP_COEFF_MASK | PORT_PLL_INT_COEFF_MASK | PORT_PLL_GAIN_CTL_MASK; - hw_state->pll8 = I915_READ(BXT_PORT_PLL(port, 8)); + hw_state->pll8 = I915_READ(BXT_PORT_PLL(phy, ch, 8)); hw_state->pll8 &= PORT_PLL_TARGET_CNT_MASK; - hw_state->pll9 = I915_READ(BXT_PORT_PLL(port, 9)); + hw_state->pll9 = I915_READ(BXT_PORT_PLL(phy, ch, 9)); hw_state->pll9 &= PORT_PLL_LOCK_THRESHOLD_MASK; - hw_state->pll10 = I915_READ(BXT_PORT_PLL(port, 10)); + hw_state->pll10 = I915_READ(BXT_PORT_PLL(phy, ch, 10)); hw_state->pll10 &= PORT_PLL_DCO_AMP_OVR_EN_H | PORT_PLL_DCO_AMP_MASK; @@ -1533,11 +1541,11 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, * can read only lane registers. We configure all lanes the same way, so * here just read out lanes 0/1 and output a note if lanes 2/3 differ. */ - hw_state->pcsdw12 = I915_READ(BXT_PORT_PCS_DW12_LN01(port)); - if (I915_READ(BXT_PORT_PCS_DW12_LN23(port)) != hw_state->pcsdw12) + hw_state->pcsdw12 = I915_READ(BXT_PORT_PCS_DW12_LN01(phy, ch)); + if (I915_READ(BXT_PORT_PCS_DW12_LN23(phy, ch)) != hw_state->pcsdw12) DRM_DEBUG_DRIVER("lane stagger config different for lane 01 (%08x) and 23 (%08x)\n", hw_state->pcsdw12, - I915_READ(BXT_PORT_PCS_DW12_LN23(port))); + I915_READ(BXT_PORT_PCS_DW12_LN23(phy, ch))); hw_state->pcsdw12 &= LANE_STAGGER_MASK | LANESTAGGER_STRAP_OVRD; ret = true; -- cgit v1.1 From 40dba341124bea94be8adbd60821eb2284aa24fe Mon Sep 17 00:00:00 2001 From: "Navare, Manasi D" Date: Wed, 26 Oct 2016 16:25:55 -0700 Subject: drm/i915: Change the placement of some static functions in intel_dp.c These static helper functions are required to be used during fallback link rate implemnetation so they need to be placed at the top of the file. v3: * Add cleanup to other patch (Mika Kahola) v2: * Dont move around functions declared in intel_drv.h (Rodrigo Vivi) Cc: Jani Nikula Cc: Daniel Vetter Cc: Ville Syrjala Signed-off-by: Manasi Navare Reviewed-by: Mika Kahola Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477524358-16563-4-git-send-email-manasi.d.navare@intel.com --- drivers/gpu/drm/i915/intel_dp.c | 150 ++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 75 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8f313c1..1063afe 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -213,6 +213,81 @@ intel_dp_downstream_max_dotclock(struct intel_dp *intel_dp) return max_dotclk; } +static int +intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) +{ + if (intel_dp->num_sink_rates) { + *sink_rates = intel_dp->sink_rates; + return intel_dp->num_sink_rates; + } + + *sink_rates = default_rates; + + return (intel_dp_max_link_bw(intel_dp) >> 3) + 1; +} + +static int +intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + int size; + + if (IS_BROXTON(dev_priv)) { + *source_rates = bxt_rates; + size = ARRAY_SIZE(bxt_rates); + } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + *source_rates = skl_rates; + size = ARRAY_SIZE(skl_rates); + } else { + *source_rates = default_rates; + size = ARRAY_SIZE(default_rates); + } + + /* This depends on the fact that 5.4 is last value in the array */ + if (!intel_dp_source_supports_hbr2(intel_dp)) + size--; + + return size; +} + +static int intersect_rates(const int *source_rates, int source_len, + const int *sink_rates, int sink_len, + int *common_rates) +{ + int i = 0, j = 0, k = 0; + + while (i < source_len && j < sink_len) { + if (source_rates[i] == sink_rates[j]) { + if (WARN_ON(k >= DP_MAX_SUPPORTED_RATES)) + return k; + common_rates[k] = source_rates[i]; + ++k; + ++i; + ++j; + } else if (source_rates[i] < sink_rates[j]) { + ++i; + } else { + ++j; + } + } + return k; +} + +static int intel_dp_common_rates(struct intel_dp *intel_dp, + int *common_rates) +{ + const int *source_rates, *sink_rates; + int source_len, sink_len; + + sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); + source_len = intel_dp_source_rates(intel_dp, &source_rates); + + return intersect_rates(source_rates, source_len, + sink_rates, sink_len, + common_rates); +} + static enum drm_mode_status intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) @@ -1291,19 +1366,6 @@ intel_dp_aux_init(struct intel_dp *intel_dp) intel_dp->aux.transfer = intel_dp_aux_transfer; } -static int -intel_dp_sink_rates(struct intel_dp *intel_dp, const int **sink_rates) -{ - if (intel_dp->num_sink_rates) { - *sink_rates = intel_dp->sink_rates; - return intel_dp->num_sink_rates; - } - - *sink_rates = default_rates; - - return (intel_dp_max_link_bw(intel_dp) >> 3) + 1; -} - bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); @@ -1316,31 +1378,6 @@ bool intel_dp_source_supports_hbr2(struct intel_dp *intel_dp) return false; } -static int -intel_dp_source_rates(struct intel_dp *intel_dp, const int **source_rates) -{ - struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); - int size; - - if (IS_BROXTON(dev_priv)) { - *source_rates = bxt_rates; - size = ARRAY_SIZE(bxt_rates); - } else if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { - *source_rates = skl_rates; - size = ARRAY_SIZE(skl_rates); - } else { - *source_rates = default_rates; - size = ARRAY_SIZE(default_rates); - } - - /* This depends on the fact that 5.4 is last value in the array */ - if (!intel_dp_source_supports_hbr2(intel_dp)) - size--; - - return size; -} - static void intel_dp_set_clock(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config) @@ -1375,43 +1412,6 @@ intel_dp_set_clock(struct intel_encoder *encoder, } } -static int intersect_rates(const int *source_rates, int source_len, - const int *sink_rates, int sink_len, - int *common_rates) -{ - int i = 0, j = 0, k = 0; - - while (i < source_len && j < sink_len) { - if (source_rates[i] == sink_rates[j]) { - if (WARN_ON(k >= DP_MAX_SUPPORTED_RATES)) - return k; - common_rates[k] = source_rates[i]; - ++k; - ++i; - ++j; - } else if (source_rates[i] < sink_rates[j]) { - ++i; - } else { - ++j; - } - } - return k; -} - -static int intel_dp_common_rates(struct intel_dp *intel_dp, - int *common_rates) -{ - const int *source_rates, *sink_rates; - int source_len, sink_len; - - sink_len = intel_dp_sink_rates(intel_dp, &sink_rates); - source_len = intel_dp_source_rates(intel_dp, &source_rates); - - return intersect_rates(source_rates, source_len, - sink_rates, sink_len, - common_rates); -} - static void snprintf_int_array(char *str, size_t len, const int *array, int nelem) { -- cgit v1.1 From 8b364b41ced2ac8eea9958ea084f10289a85419b Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 26 Oct 2016 15:51:28 -0700 Subject: drm/i915: Rename for_each_plane -> for_each_universal_plane This macro's name is a bit misleading; it doesn't actually iterate over all planes since it omits the cursor plane. Its only uses are in gen9 code which is using it to iterate over the universal planes (which we treat as primary+sprites); in these cases the legacy cursor registers are programmed independently if necessary. The macro's iterator value (0 for primary plane, spritenum+1 for each secondary plane) also isn't meaningful outside the gen9 context where the hardware considers them to all be "universal" planes that follow this numbering. This is just a renaming/clarification patch with no functional change. However it will make the subsequent patches more clear. Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1477522291-10874-2-git-send-email-matthew.d.roper@intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index be92efe..9f5a392 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3463,7 +3463,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused) for_each_pipe(dev_priv, pipe) { seq_printf(m, "Pipe %c\n", pipe_name(pipe)); - for_each_plane(dev_priv, pipe, plane) { + for_each_universal_plane(dev_priv, pipe, plane) { entry = &ddb->plane[pipe][plane]; seq_printf(m, " Plane%-8d%8u%8u%8u\n", plane + 1, entry->start, entry->end, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a8f006e..930503a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -312,7 +312,7 @@ struct i915_hotplug { #define for_each_pipe_masked(__dev_priv, __p, __mask) \ for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ for_each_if ((__mask) & (1 << (__p))) -#define for_each_plane(__dev_priv, __pipe, __p) \ +#define for_each_universal_plane(__dev_priv, __pipe, __p) \ for ((__p) = 0; \ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ (__p)++) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 895b3dc..cb7dd11 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13485,7 +13485,7 @@ static void verify_wm_state(struct drm_crtc *crtc, sw_ddb = &dev_priv->wm.skl_hw.ddb; /* planes */ - for_each_plane(dev_priv, pipe, plane) { + for_each_universal_plane(dev_priv, pipe, plane) { hw_plane_wm = &hw_wm.planes[plane]; sw_plane_wm = &sw_wm->planes[plane]; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9e0e874..58d3ba0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3160,7 +3160,7 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - for_each_plane(dev_priv, pipe, plane) { + for_each_universal_plane(dev_priv, pipe, plane) { val = I915_READ(PLANE_BUF_CFG(pipe, plane)); skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane], val); -- cgit v1.1 From 2c4b49a0f73f142cdc81002eb306fd9b8b6308d9 Mon Sep 17 00:00:00 2001 From: Matt Roper Date: Wed, 26 Oct 2016 15:51:29 -0700 Subject: drm/i915: Use macro in place of open-coded for_each_universal_plane loop This was the only use of (misleadingly-named) intel_num_planes() function, so we can remove it as well. Signed-off-by: Matt Roper Link: http://patchwork.freedesktop.org/patch/msgid/1477522291-10874-3-git-send-email-matthew.d.roper@intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/intel_drv.h | 9 --------- drivers/gpu/drm/i915/intel_pm.c | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 29ab526..ace222c 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1108,15 +1108,6 @@ hdmi_to_dig_port(struct intel_hdmi *intel_hdmi) return container_of(intel_hdmi, struct intel_digital_port, hdmi); } -/* - * Returns the number of planes for this pipe, ie the number of sprites + 1 - * (primary plane). This doesn't count the cursor plane then. - */ -static inline unsigned int intel_num_planes(struct intel_crtc *crtc) -{ - return INTEL_INFO(crtc->base.dev)->num_sprites[crtc->pipe] + 1; -} - /* intel_fifo_underrun.c */ bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv, enum pipe pipe, bool enable); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 58d3ba0..6f19e60 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4232,7 +4232,7 @@ static void skl_update_wm(struct drm_crtc *crtc) if (crtc->state->active_changed) { int plane; - for (plane = 0; plane < intel_num_planes(intel_crtc); plane++) + for_each_universal_plane(dev_priv, pipe, plane) skl_write_plane_wm(intel_crtc, &pipe_wm->planes[plane], &results->ddb, plane); -- cgit v1.1 From fc0990903c479ecafc67f7bf030c68b19215f2d5 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 15:27:56 +0100 Subject: drm/i915: Remove insert-page shortcut from execbuf relocate_iomap() We are not allowed to touch the GTT entries underneath an atomic section, as they take a rpm wakelock (which is illegal from atomic context) and in the near future acquiring the DMA address for a page within an object may sleep for an allocation. This makes the current shortcircuit in relocation_iomap() for performing a second relocation on an adjacent page illegal, and we need to release the atomic iomapping, lookup the DMA, insert it into the GTT before reentering the atomic iomap section. As it happens, this is precisely what we do on if we are using an iomapping over the full object and not just a single page and by removing the shortcut, we do the right thing. Fixes: 9c870d03674f ("drm/i915: Use RPM as the barrier for controlling...") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028142756.3850-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index e52affd..4b1019e 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -418,15 +418,6 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, unsigned long offset; void *vaddr; - if (cache->node.allocated) { - wmb(); - ggtt->base.insert_page(&ggtt->base, - i915_gem_object_get_dma_address(obj, page), - cache->node.start, I915_CACHE_NONE, 0); - cache->page = page; - return unmask_page(cache->vaddr); - } - if (cache->vaddr) { io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr)); } else { @@ -466,6 +457,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj, offset = cache->node.start; if (cache->node.allocated) { + wmb(); ggtt->base.insert_page(&ggtt->base, i915_gem_object_get_dma_address(obj, page), offset, I915_CACHE_NONE, 0); -- cgit v1.1 From b52992c06c9020cecb1b9807855301e5f62ec968 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:24 +0100 Subject: drm/i915: Support asynchronous waits on struct fence from i915_gem_request We will need to wait on DMA completion (as signaled via struct fence) before executing our i915_gem_request. Therefore we want to expose a method for adding the await on the fence itself to the request. v2: Add a comment detailing a failure to handle a signal-on-any fence-array. v3: Pretend that magic numbers don't exist. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 3 ++ drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +- drivers/gpu/drm/i915/i915_gem_request.c | 48 ++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_request.h | 6 ++-- 5 files changed, 58 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 930503a..e95352c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1423,6 +1423,9 @@ struct i915_error_state_file_priv { struct drm_i915_error_state *error; }; +#define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ +#define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */ + struct i915_gpu_error { /* For hangcheck timer */ #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 87018df9..0c186b7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -114,7 +114,7 @@ i915_gem_wait_for_error(struct i915_gpu_error *error) */ ret = wait_event_interruptible_timeout(error->reset_queue, !i915_reset_in_progress(error), - 10*HZ); + I915_RESET_TIMEOUT); if (ret == 0) { DRM_ERROR("Timed out waiting for the gpu reset to complete\n"); return -EIO; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4b1019e..61365ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1134,7 +1134,8 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, if (resv) { ret = i915_sw_fence_await_reservation (&req->submit, resv, &i915_fence_ops, - obj->base.pending_write_domain, 10*HZ, + obj->base.pending_write_domain, + I915_FENCE_TIMEOUT, GFP_KERNEL | __GFP_NOWARN); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f9af2a0..5e38bc0 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -23,6 +23,7 @@ */ #include +#include #include "i915_drv.h" @@ -496,6 +497,53 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return 0; } +int +i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, + struct dma_fence *fence) +{ + struct dma_fence_array *array; + int ret; + int i; + + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return 0; + + if (dma_fence_is_i915(fence)) + return i915_gem_request_await_request(req, to_request(fence)); + + if (!dma_fence_is_array(fence)) { + ret = i915_sw_fence_await_dma_fence(&req->submit, + fence, I915_FENCE_TIMEOUT, + GFP_KERNEL); + return ret < 0 ? ret : 0; + } + + /* Note that if the fence-array was created in signal-on-any mode, + * we should *not* decompose it into its individual fences. However, + * we don't currently store which mode the fence-array is operating + * in. Fortunately, the only user of signal-on-any is private to + * amdgpu and we should not see any incoming fence-array from + * sync-file being in signal-on-any mode. + */ + + array = to_dma_fence_array(fence); + for (i = 0; i < array->num_fences; i++) { + struct dma_fence *child = array->fences[i]; + + if (dma_fence_is_i915(child)) + ret = i915_gem_request_await_request(req, + to_request(child)); + else + ret = i915_sw_fence_await_dma_fence(&req->submit, + child, I915_FENCE_TIMEOUT, + GFP_KERNEL); + if (ret < 0) + return ret; + } + + return 0; +} + /** * i915_gem_request_await_object - set this request to (async) wait upon a bo * diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index bceeaa3..4e6d038 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -147,7 +147,7 @@ struct drm_i915_gem_request { extern const struct dma_fence_ops i915_fence_ops; -static inline bool fence_is_i915(struct dma_fence *fence) +static inline bool dma_fence_is_i915(const struct dma_fence *fence) { return fence->ops == &i915_fence_ops; } @@ -176,7 +176,7 @@ to_request(struct dma_fence *fence) { /* We assume that NULL fence/request are interoperable */ BUILD_BUG_ON(offsetof(struct drm_i915_gem_request, fence) != 0); - GEM_BUG_ON(fence && !fence_is_i915(fence)); + GEM_BUG_ON(fence && !dma_fence_is_i915(fence)); return container_of(fence, struct drm_i915_gem_request, fence); } @@ -214,6 +214,8 @@ int i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, bool write); +int i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req, + struct dma_fence *fence); void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request(req) \ -- cgit v1.1 From 7e941861c9c2aa69c0ef6665e2c0c1174c4a4b0e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:25 +0100 Subject: drm/i915: Allow i915_sw_fence_await_sw_fence() to allocate In forthcoming patches, we want to be able to dynamically allocate the wait_queue_t used whilst awaiting. This is more convenient if we extend the i915_sw_fence_await_sw_fence() to perform the allocation for us if we pass in a gfp mask as an alternative than a preallocated struct. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_sw_fence.c | 40 ++++++++++++++++++++++++++++++++---- drivers/gpu/drm/i915/i915_sw_fence.h | 8 ++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 8185002..95f2f12 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -13,6 +13,8 @@ #include "i915_sw_fence.h" +#define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */ + static DEFINE_SPINLOCK(i915_sw_fence_lock); static int __i915_sw_fence_notify(struct i915_sw_fence *fence, @@ -135,6 +137,8 @@ static int i915_sw_fence_wake(wait_queue_t *wq, unsigned mode, int flags, void * list_del(&wq->task_list); __i915_sw_fence_complete(wq->private, key); i915_sw_fence_put(wq->private); + if (wq->flags & I915_SW_FENCE_FLAG_ALLOC) + kfree(wq); return 0; } @@ -192,9 +196,9 @@ static bool i915_sw_fence_check_if_after(struct i915_sw_fence *fence, return err; } -int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, - struct i915_sw_fence *signaler, - wait_queue_t *wq) +static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, + struct i915_sw_fence *signaler, + wait_queue_t *wq, gfp_t gfp) { unsigned long flags; int pending; @@ -206,8 +210,22 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, if (unlikely(i915_sw_fence_check_if_after(fence, signaler))) return -EINVAL; + pending = 0; + if (!wq) { + wq = kmalloc(sizeof(*wq), gfp); + if (!wq) { + if (!gfpflags_allow_blocking(gfp)) + return -ENOMEM; + + i915_sw_fence_wait(signaler); + return 0; + } + + pending |= I915_SW_FENCE_FLAG_ALLOC; + } + INIT_LIST_HEAD(&wq->task_list); - wq->flags = 0; + wq->flags = pending; wq->func = i915_sw_fence_wake; wq->private = i915_sw_fence_get(fence); @@ -226,6 +244,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, return pending; } +int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, + struct i915_sw_fence *signaler, + wait_queue_t *wq) +{ + return __i915_sw_fence_await_sw_fence(fence, signaler, wq, 0); +} + +int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, + struct i915_sw_fence *signaler, + gfp_t gfp) +{ + return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp); +} + struct i915_sw_dma_fence_cb { struct dma_fence_cb base; struct i915_sw_fence *fence; diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h index cd239e9..707dfc4 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.h +++ b/drivers/gpu/drm/i915/i915_sw_fence.h @@ -46,6 +46,9 @@ void i915_sw_fence_commit(struct i915_sw_fence *fence); int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *after, wait_queue_t *wq); +int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence, + struct i915_sw_fence *after, + gfp_t gfp); int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence, struct dma_fence *dma, unsigned long timeout, @@ -62,4 +65,9 @@ static inline bool i915_sw_fence_done(const struct i915_sw_fence *fence) return atomic_read(&fence->pending) < 0; } +static inline void i915_sw_fence_wait(struct i915_sw_fence *fence) +{ + wait_event(fence->wait, i915_sw_fence_done(fence)); +} + #endif /* _I915_SW_FENCE_H_ */ -- cgit v1.1 From c92ac094a9c647ebcff56c0535f865bc7f2aa052 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:26 +0100 Subject: drm/i915: Remove superfluous wait_for_error() from throttle-ioctl The throttle-ioctl never touches the struct_mutex. It does, however, as part of its ABI report whether the hardware is terminally wedged. For that purposes, it only has to report the current state and not incur the cost of checking/waiting every invocation, as we do not have to wait for a reset before waiting on a request to ensure completion (that is baked into the wait request implementation). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0c186b7..1254143 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3705,10 +3705,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_gem_request *request, *target = NULL; int ret; - ret = i915_gem_wait_for_error(&dev_priv->gpu_error); - if (ret) - return ret; - /* ABI: return -EIO if already wedged */ if (i915_terminally_wedged(&dev_priv->gpu_error)) return -EIO; -- cgit v1.1 From e95433c73a11759203af1cae5958f998c9673370 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:27 +0100 Subject: drm/i915: Rearrange i915_wait_request() accounting with callers Our low-level wait routine has evolved from our generic wait interface that handled unlocked, RPS boosting, waits with time tracking. If we push our GEM fence tracking to use reservation_objects (required for handling multiple timelines), we lose the ability to pass the required information down to i915_wait_request(). However, if we push the extra functionality from i915_wait_request() to the individual callsites (i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use of those extras, we can both simplify our low level wait and prepare for extending the GEM interface for use of reservation_objects. v2: Rewrite i915_wait_request() kerneldocs Signed-off-by: Chris Wilson Cc: Matthew Auld Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-4-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +- drivers/gpu/drm/i915/i915_drv.h | 7 +- drivers/gpu/drm/i915/i915_gem.c | 309 ++++++++++++++++++++++++-------- drivers/gpu/drm/i915/i915_gem_request.c | 146 ++++----------- drivers/gpu/drm/i915/i915_gem_request.h | 32 ++-- drivers/gpu/drm/i915/i915_gem_userptr.c | 12 +- drivers/gpu/drm/i915/intel_display.c | 27 +-- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +- 9 files changed, 316 insertions(+), 243 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index f7e320b..18acb45 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -400,6 +400,7 @@ static int workload_thread(void *priv) int ring_id = p->ring_id; struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler; struct intel_vgpu_workload *workload = NULL; + long lret; int ret; bool need_force_wake = IS_SKYLAKE(gvt->dev_priv); DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -449,10 +450,12 @@ static int workload_thread(void *priv) gvt_dbg_sched("ring id %d wait workload %p\n", workload->ring_id, workload); - workload->status = i915_wait_request(workload->req, - 0, NULL, NULL); - if (workload->status != 0) + lret = i915_wait_request(workload->req, + 0, MAX_SCHEDULE_TIMEOUT); + if (lret < 0) { + workload->status = lret; gvt_err("fail to wait workload, skip\n"); + } complete: gvt_dbg_sched("will complete workload %p\n, status: %d\n", diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e95352c..cf4b242 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3319,9 +3319,10 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, int __must_check i915_gem_suspend(struct drm_device *dev); void i915_gem_resume(struct drm_device *dev); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); -int __must_check -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly); +int i915_gem_object_wait(struct drm_i915_gem_object *obj, + unsigned int flags, + long timeout, + struct intel_rps_client *rps); int __must_check i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1254143..537f502 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) * must wait for all rendering to complete to the object (as unbinding * must anyway), and retire the requests. */ - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -311,88 +316,172 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) return ret; } -/** - * Ensures that all rendering to the object has completed and the object is - * safe to unbind from the GTT or access from the CPU. - * @obj: i915 gem object - * @readonly: waiting for just read access or read-write access - */ -int -i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, - bool readonly) +static long +i915_gem_object_wait_fence(struct dma_fence *fence, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) { - struct reservation_object *resv; - struct i915_gem_active *active; - unsigned long active_mask; - int idx; + struct drm_i915_gem_request *rq; - lockdep_assert_held(&obj->base.dev->struct_mutex); + BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1); - if (!readonly) { - active = obj->last_read; - active_mask = i915_gem_object_get_active(obj); - } else { - active_mask = 1; - active = &obj->last_write; + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + return timeout; + + if (!dma_fence_is_i915(fence)) + return dma_fence_wait_timeout(fence, + flags & I915_WAIT_INTERRUPTIBLE, + timeout); + + rq = to_request(fence); + if (i915_gem_request_completed(rq)) + goto out; + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (rps) { + if (INTEL_GEN(rq->i915) >= 6) + gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies); + else + rps = NULL; } - for_each_active(active_mask, idx) { + timeout = i915_wait_request(rq, flags, timeout); + +out: + if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) + i915_gem_request_retire_upto(rq); + + if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&rq->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&rq->i915->rps.client_lock); + } + + return timeout; +} + +static long +i915_gem_object_wait_reservation(struct reservation_object *resv, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) +{ + struct dma_fence *excl; + + if (flags & I915_WAIT_ALL) { + struct dma_fence **shared; + unsigned int count, i; int ret; - ret = i915_gem_active_wait(&active[idx], - &obj->base.dev->struct_mutex); + ret = reservation_object_get_fences_rcu(resv, + &excl, &count, &shared); if (ret) return ret; - } - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - long err; + for (i = 0; i < count; i++) { + timeout = i915_gem_object_wait_fence(shared[i], + flags, timeout, + rps); + if (timeout <= 0) + break; + + dma_fence_put(shared[i]); + } - err = reservation_object_wait_timeout_rcu(resv, !readonly, true, - MAX_SCHEDULE_TIMEOUT); - if (err < 0) - return err; + for (; i < count; i++) + dma_fence_put(shared[i]); + kfree(shared); + } else { + excl = reservation_object_get_excl_rcu(resv); } - return 0; + if (excl && timeout > 0) + timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps); + + dma_fence_put(excl); + + return timeout; } -/* A nonblocking variant of the above wait. Must be called prior to - * acquiring the mutex for the object, as the object state may change - * during this call. A reference must be held by the caller for the object. +/** + * Waits for rendering to the object to be completed + * @obj: i915 gem object + * @flags: how to wait (under a lock, for all rendering or just for writes etc) + * @timeout: how long to wait + * @rps: client (user process) to charge for any waitboosting */ -static __must_check int -__unsafe_wait_rendering(struct drm_i915_gem_object *obj, - struct intel_rps_client *rps, - bool readonly) +int +i915_gem_object_wait(struct drm_i915_gem_object *obj, + unsigned int flags, + long timeout, + struct intel_rps_client *rps) { + struct reservation_object *resv; struct i915_gem_active *active; unsigned long active_mask; int idx; - active_mask = __I915_BO_ACTIVE(obj); - if (!active_mask) - return 0; + might_sleep(); +#if IS_ENABLED(CONFIG_LOCKDEP) + GEM_BUG_ON(debug_locks && + !!lockdep_is_held(&obj->base.dev->struct_mutex) != + !!(flags & I915_WAIT_LOCKED)); +#endif + GEM_BUG_ON(timeout < 0); - if (!readonly) { + if (flags & I915_WAIT_ALL) { active = obj->last_read; + active_mask = i915_gem_object_get_active(obj); } else { active_mask = 1; active = &obj->last_write; } for_each_active(active_mask, idx) { - int ret; - - ret = i915_gem_active_wait_unlocked(&active[idx], - I915_WAIT_INTERRUPTIBLE, - NULL, rps); - if (ret) - return ret; + struct drm_i915_gem_request *request; + + request = i915_gem_active_get_unlocked(&active[idx]); + if (request) { + timeout = i915_gem_object_wait_fence(&request->fence, + flags, timeout, + rps); + i915_gem_request_put(request); + } + if (timeout < 0) + return timeout; } - return 0; + resv = i915_gem_object_get_dmabuf_resv(obj); + if (resv) + timeout = i915_gem_object_wait_reservation(resv, + flags, timeout, + rps); + return timeout < 0 ? timeout : 0; } static struct intel_rps_client *to_rps_client(struct drm_file *file) @@ -449,12 +538,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_device *dev = obj->base.dev; void *vaddr = obj->phys_handle->vaddr + args->offset; char __user *user_data = u64_to_user_ptr(args->data_ptr); - int ret = 0; + int ret; /* We manually control the domain here and pretend that it * remains coherent i.e. in the GTT domain, like shmem_pwrite. */ - ret = i915_gem_object_wait_rendering(obj, false); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file_priv)); if (ret) return ret; @@ -614,12 +709,17 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, { int ret; - *needs_clflush = 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); + *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_wait_rendering(obj, true); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -661,11 +761,18 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, { int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + *needs_clflush = 0; if (!i915_gem_object_has_struct_page(obj)) return -ENODEV; - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -1051,7 +1158,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pread(obj, args->offset, args->size); - ret = __unsafe_wait_rendering(obj, to_rps_client(file), true); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1449,7 +1559,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, trace_i915_gem_object_pwrite(obj, args->offset, args->size); - ret = __unsafe_wait_rendering(obj, to_rps_client(file), false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1536,7 +1650,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + (write_domain ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + to_rps_client(file)); if (ret) goto err; @@ -1772,7 +1890,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) * repeat the flush holding the lock in the normal manner to catch cases * where we are gazumped. */ - ret = __unsafe_wait_rendering(obj, NULL, !write); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) goto err; @@ -2817,6 +2938,17 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) mutex_unlock(&obj->base.dev->struct_mutex); } +static unsigned long to_wait_timeout(s64 timeout_ns) +{ + if (timeout_ns < 0) + return MAX_SCHEDULE_TIMEOUT; + + if (timeout_ns == 0) + return 0; + + return nsecs_to_jiffies_timeout(timeout_ns); +} + /** * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT * @dev: drm device pointer @@ -2845,10 +2977,9 @@ int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { struct drm_i915_gem_wait *args = data; - struct intel_rps_client *rps = to_rps_client(file); struct drm_i915_gem_object *obj; - unsigned long active; - int idx, ret = 0; + ktime_t start; + long ret; if (args->flags != 0) return -EINVAL; @@ -2857,14 +2988,17 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) if (!obj) return -ENOENT; - active = __I915_BO_ACTIVE(obj); - for_each_active(active, idx) { - s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL; - ret = i915_gem_active_wait_unlocked(&obj->last_read[idx], - I915_WAIT_INTERRUPTIBLE, - timeout, rps); - if (ret) - break; + start = ktime_get(); + + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL, + to_wait_timeout(args->timeout_ns), + to_rps_client(file)); + + if (args->timeout_ns > 0) { + args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start)); + if (args->timeout_ns < 0) + args->timeout_ns = 0; } i915_gem_object_put_unlocked(obj); @@ -3283,7 +3417,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - ret = i915_gem_object_wait_rendering(obj, !write); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3400,7 +3540,12 @@ restart: * If we wait upon the object, we know that all the bound * VMA are no longer active. */ - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + I915_WAIT_ALL, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3647,7 +3792,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) uint32_t old_write_domain, old_read_domains; int ret; - ret = i915_gem_object_wait_rendering(obj, !write); + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED | + (write ? I915_WAIT_ALL : 0), + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) return ret; @@ -3703,7 +3854,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) struct drm_i915_file_private *file_priv = file->driver_priv; unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES; struct drm_i915_gem_request *request, *target = NULL; - int ret; + long ret; /* ABI: return -EIO if already wedged */ if (i915_terminally_wedged(&dev_priv->gpu_error)) @@ -3730,10 +3881,12 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) if (target == NULL) return 0; - ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL); + ret = i915_wait_request(target, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); i915_gem_request_put(target); - return ret; + return ret < 0 ? ret : 0; } static bool diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 5e38bc0..fbe0923 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -59,31 +59,9 @@ static bool i915_fence_enable_signaling(struct dma_fence *fence) static signed long i915_fence_wait(struct dma_fence *fence, bool interruptible, - signed long timeout_jiffies) + signed long timeout) { - s64 timeout_ns, *timeout; - int ret; - - if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) { - timeout_ns = jiffies_to_nsecs(timeout_jiffies); - timeout = &timeout_ns; - } else { - timeout = NULL; - } - - ret = i915_wait_request(to_request(fence), - interruptible, timeout, - NO_WAITBOOST); - if (ret == -ETIME) - return 0; - - if (ret < 0) - return ret; - - if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) - timeout_jiffies = nsecs_to_jiffies(timeout_ns); - - return timeout_jiffies; + return i915_wait_request(to_request(fence), interruptible, timeout); } static void i915_fence_value_str(struct dma_fence *fence, char *str, int size) @@ -166,7 +144,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) struct i915_gem_active *active, *next; trace_i915_gem_request_retire(request); - list_del(&request->link); + list_del_init(&request->link); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -224,7 +202,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) struct drm_i915_gem_request *tmp; lockdep_assert_held(&req->i915->drm.struct_mutex); - GEM_BUG_ON(list_empty(&req->link)); + if (list_empty(&req->link)) + return; do { tmp = list_first_entry(&engine->request_list, @@ -780,75 +759,48 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, /** * i915_wait_request - wait until execution of request has finished - * @req: duh! + * @req: the request to wait upon * @flags: how to wait - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: client to charge for RPS boosting + * @timeout: how long to wait in jiffies + * + * i915_wait_request() waits for the request to be completed, for a + * maximum of @timeout jiffies (with MAX_SCHEDULE_TIMEOUT implying an + * unbounded wait). * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. + * If the caller holds the struct_mutex, the caller must pass I915_WAIT_LOCKED + * in via the flags, and vice versa if the struct_mutex is not held, the caller + * must not specify that the wait is locked. * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. + * Returns the remaining time (in jiffies) if the request completed, which may + * be zero or -ETIME if the request is unfinished after the timeout expires. + * May return -EINTR is called with I915_WAIT_INTERRUPTIBLE and a signal is + * pending before the request completes. */ -int i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - s64 *timeout, - struct intel_rps_client *rps) +long i915_wait_request(struct drm_i915_gem_request *req, + unsigned int flags, + long timeout) { const int state = flags & I915_WAIT_INTERRUPTIBLE ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; DEFINE_WAIT(reset); struct intel_wait wait; - unsigned long timeout_remain; - int ret = 0; might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) - GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) != + GEM_BUG_ON(debug_locks && + !!lockdep_is_held(&req->i915->drm.struct_mutex) != !!(flags & I915_WAIT_LOCKED)); #endif + GEM_BUG_ON(timeout < 0); if (i915_gem_request_completed(req)) - return 0; + return timeout; - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; - - if (*timeout == 0) - return -ETIME; - - /* Record current time in case interrupted, or wedged */ - timeout_remain = nsecs_to_jiffies_timeout(*timeout); - *timeout += ktime_get_raw_ns(); - } + if (!timeout) + return -ETIME; trace_i915_gem_request_wait_begin(req); - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; @@ -867,16 +819,17 @@ int i915_wait_request(struct drm_i915_gem_request *req, for (;;) { if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; + timeout = -ERESTARTSYS; break; } - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; + if (!timeout) { + timeout = -ETIME; break; } + timeout = io_schedule_timeout(timeout); + if (intel_wait_complete(&wait)) break; @@ -923,40 +876,7 @@ wakeup: complete: trace_i915_gem_request_wait_end(req); - if (timeout) { - *timeout -= ktime_get_raw_ns(); - if (*timeout < 0) - *timeout = 0; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (IS_RPS_USER(rps) && - req->fence.seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; + return timeout; } static bool engine_retire_requests(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 4e6d038..ae0913a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -228,13 +228,13 @@ struct intel_rps_client; #define IS_RPS_CLIENT(p) (!IS_ERR(p)) #define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p)) -int i915_wait_request(struct drm_i915_gem_request *req, - unsigned int flags, - s64 *timeout, - struct intel_rps_client *rps) +long i915_wait_request(struct drm_i915_gem_request *req, + unsigned int flags, + long timeout) __attribute__((nonnull(1))); #define I915_WAIT_INTERRUPTIBLE BIT(0) #define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */ +#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); @@ -583,14 +583,16 @@ static inline int __must_check i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; + long ret; request = i915_gem_active_peek(active, mutex); if (!request) return 0; - return i915_wait_request(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - NULL, NULL); + ret = i915_wait_request(request, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + return ret < 0 ? ret : 0; } /** @@ -617,20 +619,18 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) */ static inline int i915_gem_active_wait_unlocked(const struct i915_gem_active *active, - unsigned int flags, - s64 *timeout, - struct intel_rps_client *rps) + unsigned int flags) { struct drm_i915_gem_request *request; - int ret = 0; + long ret = 0; request = i915_gem_active_get_unlocked(active); if (request) { - ret = i915_wait_request(request, flags, timeout, rps); + ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT); i915_gem_request_put(request); } - return ret; + return ret < 0 ? ret : 0; } /** @@ -647,7 +647,7 @@ i915_gem_active_retire(struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - int ret; + long ret; request = i915_gem_active_raw(active, mutex); if (!request) @@ -655,8 +655,8 @@ i915_gem_active_retire(struct i915_gem_active *active, ret = i915_wait_request(request, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - NULL, NULL); - if (ret) + MAX_SCHEDULE_TIMEOUT); + if (ret < 0) return ret; list_del_init(&active->link); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index c6f780f..c49dd95 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -61,23 +61,13 @@ struct i915_mmu_object { bool attached; }; -static void wait_rendering(struct drm_i915_gem_object *obj) -{ - unsigned long active = __I915_BO_ACTIVE(obj); - int idx; - - for_each_active(active, idx) - i915_gem_active_wait_unlocked(&obj->last_read[idx], - 0, NULL, NULL); -} - static void cancel_userptr(struct work_struct *work) { struct i915_mmu_object *mo = container_of(work, typeof(*mo), work); struct drm_i915_gem_object *obj = mo->obj; struct drm_device *dev = obj->base.dev; - wait_rendering(obj); + i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL); mutex_lock(&dev->struct_mutex); /* Cancel any active worker and force us to re-evaluate gup */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index cb7dd11..e4800b8 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -12072,7 +12072,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w) if (work->flip_queued_req) WARN_ON(i915_wait_request(work->flip_queued_req, - 0, NULL, NO_WAITBOOST)); + 0, MAX_SCHEDULE_TIMEOUT) < 0); /* For framebuffer backed by dmabuf, wait for fence */ resv = i915_gem_object_get_dmabuf_resv(obj); @@ -14187,19 +14187,21 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = to_intel_plane_state(plane_state); + long timeout; if (!intel_plane_state->wait_req) continue; - ret = i915_wait_request(intel_plane_state->wait_req, - I915_WAIT_INTERRUPTIBLE, - NULL, NULL); - if (ret) { + timeout = i915_wait_request(intel_plane_state->wait_req, + I915_WAIT_INTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) { /* Any hang should be swallowed by the wait */ - WARN_ON(ret == -EIO); + WARN_ON(timeout == -EIO); mutex_lock(&dev->struct_mutex); drm_atomic_helper_cleanup_planes(dev, state); mutex_unlock(&dev->struct_mutex); + ret = timeout; break; } } @@ -14403,7 +14405,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) bool hw_check = intel_state->modeset; unsigned long put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; - int i, ret; + int i; for_each_plane_in_state(state, plane, plane_state, i) { struct intel_plane_state *intel_plane_state = @@ -14412,11 +14414,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (!intel_plane_state->wait_req) continue; - ret = i915_wait_request(intel_plane_state->wait_req, - 0, NULL, NULL); /* EIO should be eaten, and we can't get interrupted in the * worker, and blocking commits have waited already. */ - WARN_ON(ret); + WARN_ON(i915_wait_request(intel_plane_state->wait_req, + 0, MAX_SCHEDULE_TIMEOUT) < 0); } drm_atomic_helper_wait_for_dependencies(state); @@ -14780,7 +14781,11 @@ intel_prepare_plane_fb(struct drm_plane *plane, * can safely continue. */ if (needs_modeset(crtc_state)) - ret = i915_gem_object_wait_rendering(old_obj, true); + ret = i915_gem_object_wait(old_obj, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT, + NULL); if (ret) { /* GPU hangs should have been swallowed by the wait */ WARN_ON(ret == -EIO); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 67a70c5..8ef735f 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2155,7 +2155,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) { struct intel_ring *ring = req->ring; struct drm_i915_gem_request *target; - int ret; + long timeout; + + lockdep_assert_held(&req->i915->drm.struct_mutex); intel_ring_update_space(ring); if (ring->space >= bytes) @@ -2185,11 +2187,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes) if (WARN_ON(&target->ring_link == &ring->request_list)) return -ENOSPC; - ret = i915_wait_request(target, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - NULL, NO_WAITBOOST); - if (ret) - return ret; + timeout = i915_wait_request(target, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT); + if (timeout < 0) + return timeout; i915_gem_request_retire_upto(target); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 32b2e63..884a5ae 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -524,8 +524,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine, unsigned int flags) { /* Wait upon the last request to be completed */ - return i915_gem_active_wait_unlocked(&engine->last_request, - flags, NULL, NULL); + return i915_gem_active_wait_unlocked(&engine->last_request, flags); } int intel_init_render_ring_buffer(struct intel_engine_cs *engine); -- cgit v1.1 From 2e36991a8aa2fb5b06eee7958cced0665ea6f35d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:28 +0100 Subject: drm/i915: Remove unused i915_gem_active_wait() in favour of _unlocked() Since we only use the more generic unlocked variant, just rename it as the normal i915_gem_active_wait(). The temporary cost is that we need to always acquire the reference in a RCU safe manner, but the benefit is that we will combine the common paths. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-5-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.h | 34 +++------------------------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 2 files changed, 4 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index ae0913a..602234f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -569,40 +569,13 @@ i915_gem_active_is_idle(const struct i915_gem_active *active, } /** - * i915_gem_active_wait - waits until the request is completed - * @active - the active request on which to wait - * - * i915_gem_active_wait() waits until the request is completed before - * returning. Note that it does not guarantee that the request is - * retired first, see i915_gem_active_retire(). - * - * i915_gem_active_wait() returns immediately if the active - * request is already complete. - */ -static inline int __must_check -i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) -{ - struct drm_i915_gem_request *request; - long ret; - - request = i915_gem_active_peek(active, mutex); - if (!request) - return 0; - - ret = i915_wait_request(request, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT); - return ret < 0 ? ret : 0; -} - -/** - * i915_gem_active_wait_unlocked - waits until the request is completed + * i915_gem_active_wait- waits until the request is completed * @active - the active request on which to wait * @flags - how to wait * @timeout - how long to wait at most * @rps - userspace client to charge for a waitboost * - * i915_gem_active_wait_unlocked() waits until the request is completed before + * i915_gem_active_wait() waits until the request is completed before * returning, without requiring any locks to be held. Note that it does not * retire any requests before returning. * @@ -618,8 +591,7 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex) * Returns 0 if successful, or a negative error code. */ static inline int -i915_gem_active_wait_unlocked(const struct i915_gem_active *active, - unsigned int flags) +i915_gem_active_wait(const struct i915_gem_active *active, unsigned int flags) { struct drm_i915_gem_request *request; long ret = 0; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 884a5ae..09bb89c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -524,7 +524,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine, unsigned int flags) { /* Wait upon the last request to be completed */ - return i915_gem_active_wait_unlocked(&engine->last_request, flags); + return i915_gem_active_wait(&engine->last_request, flags); } int intel_init_render_ring_buffer(struct intel_engine_cs *engine); -- cgit v1.1 From f8a7fde4561067a8ebc956b27afeb530ac97cb9d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:29 +0100 Subject: drm/i915: Defer active reference until required We only need the active reference to keep the object alive after the handle has been deleted (so as to prevent a synchronous gem_close). Why then pay the price of a kref on every execbuf when we can insert that final active ref just in time for the handle deletion? Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-6-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 28 ++++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem.c | 22 +++++++++++++++++++++- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 2 +- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 -- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 ++++++- drivers/gpu/drm/i915/i915_gem_render_state.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 15 ++++++++++++--- 8 files changed, 71 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index cf4b242..edc59d0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2247,6 +2247,12 @@ struct drm_i915_gem_object { ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** + * Have we taken a reference for the object for incomplete GPU + * activity? + */ +#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES) + + /** * This is set if the object has been written to since last bound * to the GTT */ @@ -2407,6 +2413,28 @@ i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); } +static inline bool +i915_gem_object_has_active_reference(const struct drm_i915_gem_object *obj) +{ + return test_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_set_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __set_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +static inline void +i915_gem_object_clear_active_reference(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + __clear_bit(I915_BO_ACTIVE_REF, &obj->flags); +} + +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj); + static inline unsigned int i915_gem_object_get_tiling(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 537f502..c010304 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2661,7 +2661,10 @@ i915_gem_object_retire__read(struct i915_gem_active *active, list_move_tail(&obj->global_list, &request->i915->mm.bound_list); - i915_gem_object_put(obj); + if (i915_gem_object_has_active_reference(obj)) { + i915_gem_object_clear_active_reference(obj); + i915_gem_object_put(obj); + } } static bool i915_context_is_banned(const struct i915_gem_context *ctx) @@ -2935,6 +2938,12 @@ void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file) list_for_each_entry_safe(vma, vn, &obj->vma_list, obj_link) if (vma->vm->file == fpriv) i915_vma_close(vma); + + if (i915_gem_object_is_active(obj) && + !i915_gem_object_has_active_reference(obj)) { + i915_gem_object_set_active_reference(obj); + i915_gem_object_get(obj); + } mutex_unlock(&obj->base.dev->struct_mutex); } @@ -4475,6 +4484,17 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) intel_runtime_pm_put(dev_priv); } +void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + + GEM_BUG_ON(i915_gem_object_has_active_reference(obj)); + if (i915_gem_object_is_active(obj)) + i915_gem_object_set_active_reference(obj); + else + i915_gem_object_put(obj); +} + int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index ed98959..cb25cad 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -73,7 +73,7 @@ void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool) list_for_each_entry_safe(obj, next, &pool->cache_list[n], batch_pool_link) - i915_gem_object_put(obj); + __i915_gem_object_release_unless_active(obj); INIT_LIST_HEAD(&pool->cache_list[n]); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 5dca32a..47e888c 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -155,7 +155,7 @@ void i915_gem_context_free(struct kref *ctx_ref) if (ce->ring) intel_ring_free(ce->ring); - i915_vma_put(ce->state); + __i915_gem_object_release_unless_active(ce->state->obj); } put_pid(ctx->pid); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 61365ae..4cafce9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1290,8 +1290,6 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - if (!i915_gem_object_is_active(obj)) - i915_gem_object_get(obj); i915_gem_object_set_active(obj, idx); i915_gem_active_set(&obj->last_read[idx], req); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 947d5ad..a3a3644 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3734,11 +3734,16 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) void i915_vma_unpin_and_release(struct i915_vma **p_vma) { struct i915_vma *vma; + struct drm_i915_gem_object *obj; vma = fetch_and_zero(p_vma); if (!vma) return; + obj = vma->obj; + i915_vma_unpin(vma); - i915_vma_put(vma); + i915_vma_close(vma); + + __i915_gem_object_release_unless_active(obj); } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index a98c0f4..e7c3dbc 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -224,7 +224,8 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) i915_vma_move_to_active(so.vma, req, 0); err_unpin: i915_vma_unpin(so.vma); + i915_vma_close(so.vma); err_obj: - i915_gem_object_put(obj); + __i915_gem_object_release_unless_active(obj); return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8ef735f..8eee967 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1762,14 +1762,19 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { struct i915_vma *vma; + struct drm_i915_gem_object *obj; vma = fetch_and_zero(&engine->status_page.vma); if (!vma) return; + obj = vma->obj; + i915_vma_unpin(vma); - i915_gem_object_unpin_map(vma->obj); - i915_vma_put(vma); + i915_vma_close(vma); + + i915_gem_object_unpin_map(obj); + __i915_gem_object_release_unless_active(obj); } static int init_status_page(struct intel_engine_cs *engine) @@ -1967,7 +1972,11 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) void intel_ring_free(struct intel_ring *ring) { - i915_vma_put(ring->vma); + struct drm_i915_gem_object *obj = ring->vma->obj; + + i915_vma_close(ring->vma); + __i915_gem_object_release_unless_active(obj); + kfree(ring); } -- cgit v1.1 From 920cf4194954ec6f971506013c7fe3b7def178b6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:30 +0100 Subject: drm/i915: Introduce an internal allocator for disposable private objects Quite a few of our objects used for internal hardware programming do not benefit from being swappable or from being zero initialised. As such they do not benefit from using a shmemfs backing storage and since they are internal and never directly exposed to the user, we do not need to worry about providing a filp. For these we can use an drm_i915_gem_object wrapper around a sg_table of plain struct page. They are not swap backed and not automatically pinned. If they are reaped by the shrinker, the pages are released and the contents discarded. For the internal use case, this is fine as for example, ringbuffers are pinned from being written by a request to be read by the hardware. Once they are idle, they can be discarded entirely. As such they are a good match for execlist ringbuffers and a small variety of other internal objects. In the first iteration, this is limited to the scratch batch buffers we use (for command parsing and state initialisation). v2: Allocate physically contiguous pages, where possible. v3: Reduce maximum order on subsequent requests following an allocation failure. v4: Fix up mismatch between swiotlb segment size and page count (it counts in 2k units, not 4k pages) Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.h | 5 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 27 ++--- drivers/gpu/drm/i915/i915_gem_internal.c | 170 +++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++- 7 files changed, 197 insertions(+), 24 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_internal.c (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 6123400..7faa04c 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -35,6 +35,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_execbuffer.o \ i915_gem_fence.o \ i915_gem_gtt.o \ + i915_gem_internal.o \ i915_gem.o \ i915_gem_render_state.o \ i915_gem_request.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index edc59d0..96995d6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3556,6 +3556,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 gtt_offset, u32 size); +/* i915_gem_internal.c */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *dev_priv, + unsigned int size); + /* i915_gem_shrinker.c */ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, unsigned long target, diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index cb25cad..aa4e1e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -97,9 +97,9 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size) { struct drm_i915_gem_object *obj = NULL; - struct drm_i915_gem_object *tmp, *next; + struct drm_i915_gem_object *tmp; struct list_head *list; - int n; + int n, ret; lockdep_assert_held(&pool->engine->i915->drm.struct_mutex); @@ -112,19 +112,12 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, n = ARRAY_SIZE(pool->cache_list) - 1; list = &pool->cache_list[n]; - list_for_each_entry_safe(tmp, next, list, batch_pool_link) { + list_for_each_entry(tmp, list, batch_pool_link) { /* The batches are strictly LRU ordered */ if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], &tmp->base.dev->struct_mutex)) break; - /* While we're looping, do some clean up */ - if (tmp->madv == __I915_MADV_PURGED) { - list_del(&tmp->batch_pool_link); - i915_gem_object_put(tmp); - continue; - } - if (tmp->base.size >= size) { obj = tmp; break; @@ -132,19 +125,15 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, } if (obj == NULL) { - int ret; - - obj = i915_gem_object_create(&pool->engine->i915->drm, size); + obj = i915_gem_object_create_internal(pool->engine->i915, size); if (IS_ERR(obj)) return obj; - - ret = i915_gem_object_get_pages(obj); - if (ret) - return ERR_PTR(ret); - - obj->madv = I915_MADV_DONTNEED; } + ret = i915_gem_object_get_pages(obj); + if (ret) + return ERR_PTR(ret); + list_move_tail(&obj->batch_pool_link, list); i915_gem_object_pin_pages(obj); return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c new file mode 100644 index 0000000..02e66fa --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -0,0 +1,170 @@ +/* + * Copyright © 2014-2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include +#include +#include "i915_drv.h" + +#define QUIET (__GFP_NORETRY | __GFP_NOWARN) + +/* convert swiotlb segment size into sensible units (pages)! */ +#define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT) + +static void internal_free_pages(struct sg_table *st) +{ + struct scatterlist *sg; + + for (sg = st->sgl; sg; sg = __sg_next(sg)) + __free_pages(sg_page(sg), get_order(sg->length)); + + sg_free_table(st); + kfree(st); +} + +static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int npages = obj->base.size / PAGE_SIZE; + struct sg_table *st; + struct scatterlist *sg; + int max_order; + gfp_t gfp; + + st = kmalloc(sizeof(*st), GFP_KERNEL); + if (!st) + return -ENOMEM; + + if (sg_alloc_table(st, npages, GFP_KERNEL)) { + kfree(st); + return -ENOMEM; + } + + sg = st->sgl; + st->nents = 0; + + max_order = MAX_ORDER; +#ifdef CONFIG_SWIOTLB + if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */ + max_order = min(max_order, ilog2(IO_TLB_SEGPAGES)); +#endif + + gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; + if (IS_CRESTLINE(i915) || IS_BROADWATER(i915)) { + /* 965gm cannot relocate objects above 4GiB. */ + gfp &= ~__GFP_HIGHMEM; + gfp |= __GFP_DMA32; + } + + do { + int order = min(fls(npages) - 1, max_order); + struct page *page; + + do { + page = alloc_pages(gfp | (order ? QUIET : 0), order); + if (page) + break; + if (!order--) + goto err; + + /* Limit subsequent allocations as well */ + max_order = order; + } while (1); + + sg_set_page(sg, page, PAGE_SIZE << order, 0); + st->nents++; + + npages -= 1 << order; + if (!npages) { + sg_mark_end(sg); + break; + } + + sg = __sg_next(sg); + } while (1); + obj->pages = st; + + if (i915_gem_gtt_prepare_object(obj)) { + obj->pages = NULL; + goto err; + } + + /* Mark the pages as dontneed whilst they are still pinned. As soon + * as they are unpinned they are allowed to be reaped by the shrinker, + * and the caller is expected to repopulate - the contents of this + * object are only valid whilst active and pinned. + */ + obj->madv = I915_MADV_DONTNEED; + return 0; + +err: + sg_mark_end(sg); + internal_free_pages(st); + return -ENOMEM; +} + +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj) +{ + i915_gem_gtt_finish_object(obj); + internal_free_pages(obj->pages); + + obj->dirty = 0; + obj->madv = I915_MADV_WILLNEED; +} + +static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, + .get_pages = i915_gem_object_get_pages_internal, + .put_pages = i915_gem_object_put_pages_internal, +}; + +/** + * Creates a new object that wraps some internal memory for private use. + * This object is not backed by swappable storage, and as such its contents + * are volatile and only valid whilst pinned. If the object is reaped by the + * shrinker, its pages and data will be discarded. Equally, it is not a full + * GEM object and so not valid for access from userspace. This makes it useful + * for hardware interfaces like ringbuffers (which are pinned from the time + * the request is written to the time the hardware stops accessing it), but + * not for contexts (which need to be preserved when not active for later + * reuse). Note that it is not cleared upon allocation. + */ +struct drm_i915_gem_object * +i915_gem_object_create_internal(struct drm_i915_private *i915, + unsigned int size) +{ + struct drm_i915_gem_object *obj; + + obj = i915_gem_object_alloc(&i915->drm); + if (!obj) + return ERR_PTR(-ENOMEM); + + drm_gem_private_object_init(&i915->drm, &obj->base, size); + i915_gem_object_init(obj, &i915_gem_object_internal_ops); + + obj->base.write_domain = I915_GEM_DOMAIN_CPU; + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; + + return obj; +} diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index e7c3dbc..217e0b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -187,7 +187,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (so.rodata->batch_items * 4 > 4096) return -EINVAL; - obj = i915_gem_object_create(&req->i915->drm, 4096); + obj = i915_gem_object_create_internal(req->i915, 4096); if (IS_ERR(obj)) return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 8cceb34..b2de371 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -264,7 +264,7 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) obj = i915_gem_object_create_stolen(&engine->i915->drm, size); if (!obj) - obj = i915_gem_object_create(&engine->i915->drm, size); + obj = i915_gem_object_create_internal(engine->i915, size); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate scratch page\n"); return PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 8eee967..a15b9b5 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1782,9 +1782,10 @@ static int init_status_page(struct intel_engine_cs *engine) struct drm_i915_gem_object *obj; struct i915_vma *vma; unsigned int flags; + void *vaddr; int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); + obj = i915_gem_object_create_internal(engine->i915, 4096); if (IS_ERR(obj)) { DRM_ERROR("Failed to allocate status page\n"); return PTR_ERR(obj); @@ -1817,15 +1818,22 @@ static int init_status_page(struct intel_engine_cs *engine) if (ret) goto err; + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + ret = PTR_ERR(vaddr); + goto err_unpin; + } + engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); - engine->status_page.page_addr = - i915_gem_object_pin_map(obj, I915_MAP_WB); + engine->status_page.page_addr = memset(vaddr, 0, 4096); DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", engine->name, i915_ggtt_offset(vma)); return 0; +err_unpin: + i915_vma_unpin(vma); err: i915_gem_object_put(obj); return ret; -- cgit v1.1 From 4e50f082ac51c95046a8315612ce1d9acb2b3d63 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:31 +0100 Subject: drm/i915: Reuse the active golden render state batch The golden render state is constant, but we recreate the batch setting it up for every new context. If we keep that batch in a volatile cache we can safely reuse it whenever we need to initialise a new context. We mark the pages as purgeable and use the shrinker to recover pages from the batch whenever we face memory pressues, recreating that batch afresh on the next new context. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-8-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_render_state.c | 184 +++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_render_state.h | 4 +- drivers/gpu/drm/i915/intel_engine_cs.c | 5 + drivers/gpu/drm/i915/intel_lrc.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 3 + 6 files changed, 129 insertions(+), 71 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 217e0b5..9625e1a 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,17 +28,19 @@ #include "i915_drv.h" #include "intel_renderstate.h" -struct render_state { +struct intel_render_state { const struct intel_renderstate_rodata *rodata; struct i915_vma *vma; - u32 aux_batch_size; - u32 aux_batch_offset; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; }; static const struct intel_renderstate_rodata * -render_state_get_rodata(const struct drm_i915_gem_request *req) +render_state_get_rodata(const struct intel_engine_cs *engine) { - switch (INTEL_GEN(req->i915)) { + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; case 7: @@ -63,29 +65,27 @@ render_state_get_rodata(const struct drm_i915_gem_request *req) */ #define OUT_BATCH(batch, i, val) \ do { \ - if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \ - ret = -ENOSPC; \ - goto err_out; \ - } \ + if ((i) >= PAGE_SIZE / sizeof(u32)) \ + goto err; \ (batch)[(i)++] = (val); \ } while(0) -static int render_state_setup(struct render_state *so) +static int render_state_setup(struct intel_render_state *so, + struct drm_i915_private *i915) { - struct drm_i915_private *dev_priv = to_i915(so->vma->vm->dev); const struct intel_renderstate_rodata *rodata = so->rodata; - const bool has_64bit_reloc = INTEL_GEN(dev_priv) >= 8; + const bool has_64bit_reloc = INTEL_GEN(i915) >= 8; + struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; - struct page *page; + unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->vma->obj, 0); - d = kmap(page); + d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; @@ -95,10 +95,8 @@ static int render_state_setup(struct render_state *so) s = lower_32_bits(r); if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || - rodata->batch[i + 1] != 0) { - ret = -EINVAL; - goto err_out; - } + rodata->batch[i + 1] != 0) + goto err; d[i++] = s; s = upper_32_bits(r); @@ -110,12 +108,20 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + if (rodata->reloc[reloc_index] != -1) { + DRM_ERROR("only %d relocs resolved\n", reloc_index); + goto err; + } + + so->batch_offset = so->vma->node.start; + so->batch_size = rodata->batch_items * sizeof(u32); + while (i % CACHELINE_DWORDS) OUT_BATCH(d, i, MI_NOOP); - so->aux_batch_offset = i * sizeof(u32); + so->aux_offset = i * sizeof(u32); - if (HAS_POOLED_EU(dev_priv)) { + if (HAS_POOLED_EU(i915)) { /* * We always program 3x6 pool config but depending upon which * subslice is disabled HW drops down to appropriate config @@ -143,89 +149,131 @@ static int render_state_setup(struct render_state *so) } OUT_BATCH(d, i, MI_BATCH_BUFFER_END); - so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset; - + so->aux_size = i * sizeof(u32) - so->aux_offset; + so->aux_offset += so->batch_offset; /* * Since we are sending length, we need to strictly conform to * all requirements. For Gen2 this must be a multiple of 8. */ - so->aux_batch_size = ALIGN(so->aux_batch_size, 8); - - kunmap(page); - - ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false); - if (ret) - return ret; - - if (rodata->reloc[reloc_index] != -1) { - DRM_ERROR("only %d relocs resolved\n", reloc_index); - return -EINVAL; - } + so->aux_size = ALIGN(so->aux_size, 8); - return 0; + if (needs_clflush) + drm_clflush_virt_range(d, i * sizeof(u32)); + kunmap_atomic(d); -err_out: - kunmap(page); + ret = i915_gem_object_set_to_gtt_domain(obj, false); +out: + i915_gem_obj_finish_shmem_access(obj); return ret; + +err: + kunmap_atomic(d); + ret = -EINVAL; + goto out; } #undef OUT_BATCH -int i915_gem_render_state_init(struct drm_i915_gem_request *req) +int i915_gem_render_state_init(struct intel_engine_cs *engine) { - struct render_state so; + struct intel_render_state *so; + const struct intel_renderstate_rodata *rodata; struct drm_i915_gem_object *obj; int ret; - if (WARN_ON(req->engine->id != RCS)) - return -ENOENT; + if (engine->id != RCS) + return 0; - so.rodata = render_state_get_rodata(req); - if (!so.rodata) + rodata = render_state_get_rodata(engine); + if (!rodata) return 0; - if (so.rodata->batch_items * 4 > 4096) + if (rodata->batch_items * 4 > 4096) return -EINVAL; - obj = i915_gem_object_create_internal(req->i915, 4096); - if (IS_ERR(obj)) - return PTR_ERR(obj); + so = kmalloc(sizeof(*so), GFP_KERNEL); + if (!so) + return -ENOMEM; - so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL); - if (IS_ERR(so.vma)) { - ret = PTR_ERR(so.vma); - goto err_obj; + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto err_free; } - ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL); - if (ret) + so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so->vma)) { + ret = PTR_ERR(so->vma); goto err_obj; + } + + so->rodata = rodata; + engine->render_state = so; + return 0; - ret = render_state_setup(&so); +err_obj: + i915_gem_object_put(obj); +err_free: + kfree(so); + return ret; +} + +int i915_gem_render_state_emit(struct drm_i915_gem_request *req) +{ + struct intel_render_state *so; + int ret; + + so = req->engine->render_state; + if (!so) + return 0; + + /* Recreate the page after shrinking */ + if (!so->vma->obj->pages) + so->batch_offset = -1; + + ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (ret) - goto err_unpin; + return ret; - ret = req->engine->emit_bb_start(req, so.vma->node.start, - so.rodata->batch_items * 4, + if (so->vma->node.start != so->batch_offset) { + ret = render_state_setup(so, req->i915); + if (ret) + goto err_unpin; + } + + ret = req->engine->emit_bb_start(req, + so->batch_offset, so->batch_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; - if (so.aux_batch_size > 8) { + if (so->aux_size > 8) { ret = req->engine->emit_bb_start(req, - (so.vma->node.start + - so.aux_batch_offset), - so.aux_batch_size, + so->aux_offset, so->aux_size, I915_DISPATCH_SECURE); if (ret) goto err_unpin; } - i915_vma_move_to_active(so.vma, req, 0); + i915_vma_move_to_active(so->vma, req, 0); err_unpin: - i915_vma_unpin(so.vma); - i915_vma_close(so.vma); -err_obj: - __i915_gem_object_release_unless_active(obj); + i915_vma_unpin(so->vma); return ret; } + +void i915_gem_render_state_fini(struct intel_engine_cs *engine) +{ + struct intel_render_state *so; + struct drm_i915_gem_object *obj; + + so = fetch_and_zero(&engine->render_state); + if (!so) + return; + + obj = so->vma->obj; + + i915_vma_close(so->vma); + __i915_gem_object_release_unless_active(obj); + + kfree(so); +} diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index 18cce3f..8748184 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -26,6 +26,8 @@ struct drm_i915_gem_request; -int i915_gem_render_state_init(struct drm_i915_gem_request *req); +int i915_gem_render_state_init(struct intel_engine_cs *engine); +int i915_gem_render_state_emit(struct drm_i915_gem_request *req); +void i915_gem_render_state_fini(struct intel_engine_cs *engine); #endif /* _I915_GEM_RENDER_STATE_H_ */ diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index b2de371..fd55182 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -314,6 +314,10 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) return ret; + ret = i915_gem_render_state_init(engine); + if (ret) + return ret; + return 0; } @@ -328,6 +332,7 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { intel_engine_cleanup_scratch(engine); + i915_gem_render_state_fini(engine); intel_engine_fini_breadcrumbs(engine); intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index bc86585..1c1bd30 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1637,7 +1637,7 @@ static int gen8_init_rcs_context(struct drm_i915_gem_request *req) if (ret) DRM_ERROR("MOCS failed to program: expect performance issues.\n"); - return i915_gem_render_state_init(req); + return i915_gem_render_state_emit(req); } /** diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index a15b9b5..aaa46d9 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -648,7 +648,7 @@ static int intel_rcs_ctx_init(struct drm_i915_gem_request *req) if (ret != 0) return ret; - ret = i915_gem_render_state_init(req); + ret = i915_gem_render_state_emit(req); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 09bb89c..cb6e96c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -157,6 +157,7 @@ struct i915_ctx_workarounds { }; struct drm_i915_gem_request; +struct intel_render_state; struct intel_engine_cs { struct drm_i915_private *i915; @@ -184,6 +185,8 @@ struct intel_engine_cs { unsigned int irq_shift; struct intel_ring *buffer; + struct intel_render_state *render_state; + /* Rather than have every client wait upon all user interrupts, * with the herd waking after every interrupt and each doing the * heavyweight seqno dance, we delegate the task (of being the -- cgit v1.1 From 4c7d62c6b8a2b4e2300d977644e78b25a2d5f4d0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:32 +0100 Subject: drm/i915: Markup GEM API with lockdep asserts Add lockdep_assert_held(struct_mutex) to the API preamble of the internal GEM interfaces. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-9-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_evict.c | 5 ++++- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 ++ drivers/gpu/drm/i915/i915_gem_render_state.c | 2 ++ drivers/gpu/drm/i915/i915_gem_request.c | 6 ++++++ 6 files changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 96995d6..a1b9801 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3432,6 +3432,7 @@ int __must_check i915_vma_put_fence(struct i915_vma *vma); static inline bool i915_vma_pin_fence(struct i915_vma *vma) { + lockdep_assert_held(&vma->vm->dev->struct_mutex); if (vma->fence) { vma->fence->pin_count++; return true; @@ -3450,6 +3451,7 @@ i915_vma_pin_fence(struct i915_vma *vma) static inline void i915_vma_unpin_fence(struct i915_vma *vma) { + lockdep_assert_held(&vma->vm->dev->struct_mutex); if (vma->fence) { GEM_BUG_ON(vma->fence->pin_count <= 0); vma->fence->pin_count--; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c010304..528958d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -104,6 +104,8 @@ i915_gem_wait_for_error(struct i915_gpu_error *error) { int ret; + might_sleep(); + if (!i915_reset_in_progress(error)) return 0; @@ -2333,6 +2335,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { const struct drm_i915_gem_object_ops *ops = obj->ops; + lockdep_assert_held(&obj->base.dev->struct_mutex); + if (obj->pages == NULL) return 0; @@ -2509,6 +2513,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) const struct drm_i915_gem_object_ops *ops = obj->ops; int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + if (obj->pages) return 0; @@ -2790,6 +2796,8 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) struct intel_engine_cs *engine; enum intel_engine_id id; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + i915_gem_retire_requests(dev_priv); for_each_engine(engine, dev_priv, id) @@ -3031,6 +3039,8 @@ int i915_vma_unbind(struct i915_vma *vma) unsigned long active; int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + /* First wait upon any activity as retiring the request may * have side-effects such as unpinning or even unbinding this vma. */ @@ -3427,6 +3437,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | @@ -3505,6 +3516,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret = 0; + lockdep_assert_held(&obj->base.dev->struct_mutex); + if (obj->cache_level == cache_level) goto out; @@ -3709,6 +3722,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 old_read_domains, old_write_domain; int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + /* Mark the pin_display early so that we account for the * display coherency whilst setting up the cache domains. */ @@ -3774,6 +3789,8 @@ err_unpin_display: void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { + lockdep_assert_held(&vma->vm->dev->struct_mutex); + if (WARN_ON(vma->obj->pin_display == 0)) return; @@ -3802,6 +3819,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED | @@ -3962,6 +3980,7 @@ int __i915_vma_do_pin(struct i915_vma *vma, unsigned int bound = vma->flags; int ret; + lockdep_assert_held(&vma->vm->dev->struct_mutex); GEM_BUG_ON((flags & (PIN_GLOBAL | PIN_USER)) == 0); GEM_BUG_ON((flags & PIN_GLOBAL) && !i915_vma_is_ggtt(vma)); @@ -4003,6 +4022,8 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, struct i915_vma *vma; int ret; + lockdep_assert_held(&obj->base.dev->struct_mutex); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view); if (IS_ERR(vma)) return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index a934f37..79b9641 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -103,6 +103,7 @@ i915_gem_evict_something(struct i915_address_space *vm, struct i915_vma *vma, *next; int ret; + lockdep_assert_held(&vm->dev->struct_mutex); trace_i915_gem_evict(vm, min_size, alignment, flags); /* @@ -213,6 +214,8 @@ i915_gem_evict_for_vma(struct i915_vma *target) { struct drm_mm_node *node, *next; + lockdep_assert_held(&target->vm->dev->struct_mutex); + list_for_each_entry_safe(node, next, &target->vm->mm.head_node.node_list, node_list) { @@ -266,7 +269,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle) struct i915_vma *vma, *next; int ret; - WARN_ON(!mutex_is_locked(&vm->dev->struct_mutex)); + lockdep_assert_held(&vm->dev->struct_mutex); trace_i915_gem_evict_vm(vm); if (do_idle) { diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index a3a3644..2bbbda1 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3449,6 +3449,7 @@ i915_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { + lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); @@ -3476,6 +3477,7 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, { struct i915_vma *vma; + lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); vma = i915_gem_obj_to_vma(obj, vm, view); diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 9625e1a..0529324 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -223,6 +223,8 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) struct intel_render_state *so; int ret; + lockdep_assert_held(&req->i915->drm.struct_mutex); + so = req->engine->render_state; if (!so) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index fbe0923..d234c28 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -143,6 +143,9 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) { struct i915_gem_active *active, *next; + lockdep_assert_held(&request->i915->drm.struct_mutex); + GEM_BUG_ON(!i915_gem_request_completed(request)); + trace_i915_gem_request_retire(request); list_del_init(&request->link); @@ -268,6 +271,8 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) struct drm_i915_private *dev_priv = to_i915(dev); int ret; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + if (seqno == 0) return -EINVAL; @@ -612,6 +617,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) u32 reserved_tail; int ret; + lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_gem_request_add(request); /* -- cgit v1.1 From 96d776345277d81dc96e984f13d8f2b84ab0dee4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:33 +0100 Subject: drm/i915: Use a radixtree for random access to the object's backing storage A while ago we switched from a contiguous array of pages into an sglist, for that was both more convenient for mapping to hardware and avoided the requirement for a vmalloc array of pages on every object. However, certain GEM API calls (like pwrite, pread as well as performing relocations) do desire access to individual struct pages. A quick hack was to introduce a cache of the last access such that finding the following page was quick - this works so long as the caller desired sequential access. Walking backwards, or multiple callers, still hits a slow linear search for each page. One solution is to store each successful lookup in a radix tree. v2: Rewrite building the radixtree for clarity, hopefully. v3: Rearrange execbuf to avoid calling i915_gem_object_get_sg() from within an atomic section and so relax the allocation context to a simple GFP_KERNEL and mutex. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-10-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 69 +++++------- drivers/gpu/drm/i915/i915_gem.c | 185 +++++++++++++++++++++++++++++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +- 4 files changed, 199 insertions(+), 63 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a1b9801..85ae83a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2286,9 +2286,12 @@ struct drm_i915_gem_object { struct sg_table *pages; int pages_pin_count; - struct get_page { - struct scatterlist *sg; - int last; + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ } get_page; void *mapping; @@ -2491,6 +2494,14 @@ static __always_inline struct sgt_iter { return s; } +static inline struct scatterlist *____sg_next(struct scatterlist *sg) +{ + ++sg; + if (unlikely(sg_is_chain(sg))) + sg = sg_chain_ptr(sg); + return sg; +} + /** * __sg_next - return the next scatterlist entry in a list * @sg: The current sg entry @@ -2505,9 +2516,7 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) #ifdef CONFIG_DEBUG_SG BUG_ON(sg->sg_magic != SG_MAGIC); #endif - return sg_is_last(sg) ? NULL : - likely(!sg_is_chain(++sg)) ? sg : - sg_chain_ptr(sg); + return sg_is_last(sg) ? NULL : ____sg_next(sg); } /** @@ -3185,45 +3194,21 @@ static inline int __sg_page_count(struct scatterlist *sg) return sg->length >> PAGE_SHIFT; } -struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); - -static inline dma_addr_t -i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) -{ - if (n < obj->get_page.last) { - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; - } - - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { - obj->get_page.last += __sg_page_count(obj->get_page.sg++); - if (unlikely(sg_is_chain(obj->get_page.sg))) - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); - } +struct scatterlist * +i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + unsigned int n, unsigned int *offset); - return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); -} - -static inline struct page * -i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) -{ - if (WARN_ON(n >= obj->base.size >> PAGE_SHIFT)) - return NULL; - - if (n < obj->get_page.last) { - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; - } +struct page * +i915_gem_object_get_page(struct drm_i915_gem_object *obj, + unsigned int n); - while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { - obj->get_page.last += __sg_page_count(obj->get_page.sg++); - if (unlikely(sg_is_chain(obj->get_page.sg))) - obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); - } +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n); - return nth_page(sg_page(obj->get_page.sg), n - obj->get_page.last); -} +dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, + unsigned long n); static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 528958d..aa0de3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2330,6 +2330,15 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) kfree(obj->pages); } +static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) +{ + struct radix_tree_iter iter; + void **slot; + + radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0) + radix_tree_delete(&obj->get_page.radix, iter.index); +} + int i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { @@ -2362,6 +2371,8 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) obj->mapping = NULL; } + __i915_gem_object_reset_page_iter(obj); + ops->put_pages(obj); obj->pages = NULL; @@ -2531,8 +2542,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; return 0; } @@ -4337,6 +4348,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->madv = I915_MADV_WILLNEED; + INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->get_page.lock); i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); } @@ -5032,21 +5045,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, } } -/* Like i915_gem_object_get_page(), but mark the returned page dirty */ -struct page * -i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) -{ - struct page *page; - - /* Only default objects have per-page dirty tracking */ - if (WARN_ON(!i915_gem_object_has_struct_page(obj))) - return NULL; - - page = i915_gem_object_get_page(obj, n); - set_page_dirty(page); - return page; -} - /* Allocate a new GEM object and fill it with the supplied data */ struct drm_i915_gem_object * i915_gem_object_create_from_data(struct drm_device *dev, @@ -5087,3 +5085,156 @@ fail: i915_gem_object_put(obj); return ERR_PTR(ret); } + +struct scatterlist * +i915_gem_object_get_sg(struct drm_i915_gem_object *obj, + unsigned int n, + unsigned int *offset) +{ + struct i915_gem_object_page_iter *iter = &obj->get_page; + struct scatterlist *sg; + unsigned int idx, count; + + might_sleep(); + GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); + GEM_BUG_ON(obj->pages_pin_count == 0); + + /* As we iterate forward through the sg, we record each entry in a + * radixtree for quick repeated (backwards) lookups. If we have seen + * this index previously, we will have an entry for it. + * + * Initial lookup is O(N), but this is amortized to O(1) for + * sequential page access (where each new request is consecutive + * to the previous one). Repeated lookups are O(lg(obj->base.size)), + * i.e. O(1) with a large constant! + */ + if (n < READ_ONCE(iter->sg_idx)) + goto lookup; + + mutex_lock(&iter->lock); + + /* We prefer to reuse the last sg so that repeated lookup of this + * (or the subsequent) sg are fast - comparing against the last + * sg is faster than going through the radixtree. + */ + + sg = iter->sg_pos; + idx = iter->sg_idx; + count = __sg_page_count(sg); + + while (idx + count <= n) { + unsigned long exception, i; + int ret; + + /* If we cannot allocate and insert this entry, or the + * individual pages from this range, cancel updating the + * sg_idx so that on this lookup we are forced to linearly + * scan onwards, but on future lookups we will try the + * insertion again (in which case we need to be careful of + * the error return reporting that we have already inserted + * this index). + */ + ret = radix_tree_insert(&iter->radix, idx, sg); + if (ret && ret != -EEXIST) + goto scan; + + exception = + RADIX_TREE_EXCEPTIONAL_ENTRY | + idx << RADIX_TREE_EXCEPTIONAL_SHIFT; + for (i = 1; i < count; i++) { + ret = radix_tree_insert(&iter->radix, idx + i, + (void *)exception); + if (ret && ret != -EEXIST) + goto scan; + } + + idx += count; + sg = ____sg_next(sg); + count = __sg_page_count(sg); + } + +scan: + iter->sg_pos = sg; + iter->sg_idx = idx; + + mutex_unlock(&iter->lock); + + if (unlikely(n < idx)) /* insertion completed by another thread */ + goto lookup; + + /* In case we failed to insert the entry into the radixtree, we need + * to look beyond the current sg. + */ + while (idx + count <= n) { + idx += count; + sg = ____sg_next(sg); + count = __sg_page_count(sg); + } + + *offset = n - idx; + return sg; + +lookup: + rcu_read_lock(); + + sg = radix_tree_lookup(&iter->radix, n); + GEM_BUG_ON(!sg); + + /* If this index is in the middle of multi-page sg entry, + * the radixtree will contain an exceptional entry that points + * to the start of that range. We will return the pointer to + * the base page and the offset of this page within the + * sg entry's range. + */ + *offset = 0; + if (unlikely(radix_tree_exception(sg))) { + unsigned long base = + (unsigned long)sg >> RADIX_TREE_EXCEPTIONAL_SHIFT; + + sg = radix_tree_lookup(&iter->radix, base); + GEM_BUG_ON(!sg); + + *offset = n - base; + } + + rcu_read_unlock(); + + return sg; +} + +struct page * +i915_gem_object_get_page(struct drm_i915_gem_object *obj, unsigned int n) +{ + struct scatterlist *sg; + unsigned int offset; + + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); + + sg = i915_gem_object_get_sg(obj, n, &offset); + return nth_page(sg_page(sg), offset); +} + +/* Like i915_gem_object_get_page(), but mark the returned page dirty */ +struct page * +i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, + unsigned int n) +{ + struct page *page; + + page = i915_gem_object_get_page(obj, n); + if (!obj->dirty) + set_page_dirty(page); + + return page; +} + +dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, + unsigned long n) +{ + struct scatterlist *sg; + unsigned int offset; + + sg = i915_gem_object_get_sg(obj, n, &offset); + return sg_dma_address(sg) + (offset << PAGE_SHIFT); +} diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index f4f6d3a..70e61bc 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -595,8 +595,8 @@ _i915_gem_object_create_stolen(struct drm_device *dev, if (obj->pages == NULL) goto cleanup; - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; i915_gem_object_pin_pages(obj); obj->stolen = stolen; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index c49dd95..e2fa970 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -530,8 +530,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (ret == 0) { list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list); - obj->get_page.sg = obj->pages->sgl; - obj->get_page.last = 0; + obj->get_page.sg_pos = obj->pages->sgl; + obj->get_page.sg_idx = 0; pinned = 0; } } -- cgit v1.1 From d2a84a76a3b970fa32e6eda3d85e7782f831379e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:34 +0100 Subject: drm/i915: Use radixtree to jump start intel_partial_pages() We can use the radixtree index of the obj->pages to find the start position of the desired partial range. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-11-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_gtt.c | 40 ++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 2bbbda1..b3f341f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3584,35 +3584,47 @@ intel_partial_pages(const struct i915_ggtt_view *view, struct drm_i915_gem_object *obj) { struct sg_table *st; - struct scatterlist *sg; - struct sg_page_iter obj_sg_iter; + struct scatterlist *sg, *iter; + unsigned int count = view->params.partial.size; + unsigned int offset; int ret = -ENOMEM; st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) goto err_st_alloc; - ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL); + ret = sg_alloc_table(st, count, GFP_KERNEL); if (ret) goto err_sg_alloc; + iter = i915_gem_object_get_sg(obj, + view->params.partial.offset, + &offset); + GEM_BUG_ON(!iter); + sg = st->sgl; st->nents = 0; - for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents, - view->params.partial.offset) - { - if (st->nents >= view->params.partial.size) - break; + do { + unsigned int len; - sg_set_page(sg, NULL, PAGE_SIZE, 0); - sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter); - sg_dma_len(sg) = PAGE_SIZE; + len = min(iter->length - (offset << PAGE_SHIFT), + count << PAGE_SHIFT); + sg_set_page(sg, NULL, len, 0); + sg_dma_address(sg) = + sg_dma_address(iter) + (offset << PAGE_SHIFT); + sg_dma_len(sg) = len; - sg = sg_next(sg); st->nents++; - } + count -= len >> PAGE_SHIFT; + if (count == 0) { + sg_mark_end(sg); + return st; + } - return st; + sg = __sg_next(sg); + iter = __sg_next(iter); + offset = 0; + } while (1); err_sg_alloc: kfree(st); -- cgit v1.1 From a4f5ea64f0a818586b9de71803824b43dd01e517 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:35 +0100 Subject: drm/i915: Refactor object page API The plan is to make obtaining the backing storage for the object avoid struct_mutex (i.e. use its own locking). The first step is to update the API so that normal users only call pin/unpin whilst working on the backing storage. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-12-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- drivers/gpu/drm/i915/i915_debugfs.c | 17 +-- drivers/gpu/drm/i915/i915_drv.h | 94 ++++++++---- drivers/gpu/drm/i915/i915_gem.c | 205 +++++++++++++-------------- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 3 +- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 14 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 +- drivers/gpu/drm/i915/i915_gem_fence.c | 4 +- drivers/gpu/drm/i915/i915_gem_gtt.c | 10 +- drivers/gpu/drm/i915/i915_gem_internal.c | 12 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 2 +- drivers/gpu/drm/i915/i915_gem_shrinker.c | 10 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 24 ++-- drivers/gpu/drm/i915/i915_gem_tiling.c | 8 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 30 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 4 +- drivers/gpu/drm/i915/intel_lrc.c | 6 +- 17 files changed, 233 insertions(+), 214 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index f191d7b..f5039f4 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -1290,7 +1290,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, } if (ret == 0 && needs_clflush_after) - drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len); + drm_clflush_virt_range(shadow_batch_obj->mm.mapping, batch_len); i915_gem_object_unpin_map(shadow_batch_obj); return ret; diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9f5a392..e97a16c 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -112,7 +112,7 @@ static char get_global_flag(struct drm_i915_gem_object *obj) static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) { - return obj->mapping ? 'M' : ' '; + return obj->mm.mapping ? 'M' : ' '; } static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj) @@ -158,8 +158,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) i915_gem_active_get_seqno(&obj->last_write, &obj->base.dev->struct_mutex), i915_cache_level_str(dev_priv, obj->cache_level), - obj->dirty ? " dirty" : "", - obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); + obj->mm.dirty ? " dirty" : "", + obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); list_for_each_entry(vma, &obj->vma_list, obj_link) { @@ -403,12 +403,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size += obj->base.size; ++count; - if (obj->madv == I915_MADV_DONTNEED) { + if (obj->mm.madv == I915_MADV_DONTNEED) { purgeable_size += obj->base.size; ++purgeable_count; } - if (obj->mapping) { + if (obj->mm.mapping) { mapped_count++; mapped_size += obj->base.size; } @@ -425,12 +425,12 @@ static int i915_gem_object_info(struct seq_file *m, void *data) ++dpy_count; } - if (obj->madv == I915_MADV_DONTNEED) { + if (obj->mm.madv == I915_MADV_DONTNEED) { purgeable_size += obj->base.size; ++purgeable_count; } - if (obj->mapping) { + if (obj->mm.mapping) { mapped_count++; mapped_size += obj->base.size; } @@ -2028,7 +2028,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, seq_printf(m, "\tBound in GGTT at 0x%08x\n", i915_ggtt_offset(vma)); - if (i915_gem_object_get_pages(vma->obj)) { + if (i915_gem_object_pin_pages(vma->obj)) { seq_puts(m, "\tFailed to get pages for context object\n\n"); return; } @@ -2047,6 +2047,7 @@ static void i915_dump_lrc_obj(struct seq_file *m, kunmap_atomic(reg_state); } + i915_gem_object_unpin_pages(vma->obj); seq_putc(m, '\n'); } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 85ae83a..50781cb 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2252,17 +2252,6 @@ struct drm_i915_gem_object { */ #define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES) - /** - * This is set if the object has been written to since last bound - * to the GTT - */ - unsigned int dirty:1; - - /** - * Advice: are the backing pages purgeable? - */ - unsigned int madv:2; - /* * Is the object to be mapped as read-only to the GPU * Only honoured if hardware has relevant pte bit @@ -2284,16 +2273,31 @@ struct drm_i915_gem_object { unsigned int bind_count; unsigned int pin_display; - struct sg_table *pages; - int pages_pin_count; - struct i915_gem_object_page_iter { - struct scatterlist *sg_pos; - unsigned int sg_idx; /* in pages, but 32bit eek! */ + struct { + unsigned int pages_pin_count; + + struct sg_table *pages; + void *mapping; - struct radix_tree_root radix; - struct mutex lock; /* protects this cache */ - } get_page; - void *mapping; + struct i915_gem_object_page_iter { + struct scatterlist *sg_pos; + unsigned int sg_idx; /* in pages, but 32bit eek! */ + + struct radix_tree_root radix; + struct mutex lock; /* protects this cache */ + } get_page; + + /** + * Advice: are the backing pages purgeable? + */ + unsigned int madv:2; + + /** + * This is set if the object has been written to since the + * pages were last acquired. + */ + bool dirty:1; + } mm; /** Breadcrumb of last rendering to the buffer. * There can only be one writer, but we allow for multiple readers. @@ -3182,14 +3186,11 @@ void i915_vma_close(struct i915_vma *vma); void i915_vma_destroy(struct i915_vma *vma); int i915_gem_object_unbind(struct drm_i915_gem_object *obj); -int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv); -int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); - -static inline int __sg_page_count(struct scatterlist *sg) +static inline int __sg_page_count(const struct scatterlist *sg) { return sg->length >> PAGE_SHIFT; } @@ -3210,19 +3211,52 @@ dma_addr_t i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); -static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); + +static inline int __must_check +i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + + if (obj->mm.pages_pin_count++) + return 0; + + return __i915_gem_object_get_pages(obj); +} + +static inline void +__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(obj->pages == NULL); - obj->pages_pin_count++; + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(!obj->mm.pages); + + obj->mm.pages_pin_count++; +} + +static inline bool +i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) +{ + return obj->mm.pages_pin_count; +} + +static inline void +__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); + GEM_BUG_ON(!obj->mm.pages); + + obj->mm.pages_pin_count--; + GEM_BUG_ON(obj->mm.pages_pin_count < obj->bind_count); } static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { - GEM_BUG_ON(obj->pages_pin_count == 0); - obj->pages_pin_count--; - GEM_BUG_ON(obj->pages_pin_count < obj->bind_count); + __i915_gem_object_unpin_pages(obj); } +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); + enum i915_map_type { I915_MAP_WB = 0, I915_MAP_WC, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aa0de3a..0d702c8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -216,7 +216,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_address(sg) = obj->phys_handle->busaddr; sg_dma_len(sg) = obj->base.size; - obj->pages = st; + obj->mm.pages = st; return 0; } @@ -225,7 +225,7 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) { int ret; - BUG_ON(obj->madv == __I915_MADV_PURGED); + GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); ret = i915_gem_object_set_to_cpu_domain(obj, true); if (WARN_ON(ret)) { @@ -235,10 +235,10 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; } - if (obj->madv == I915_MADV_DONTNEED) - obj->dirty = 0; + if (obj->mm.madv == I915_MADV_DONTNEED) + obj->mm.dirty = false; - if (obj->dirty) { + if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; int i; @@ -257,22 +257,23 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) kunmap_atomic(dst); set_page_dirty(page); - if (obj->madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); put_page(page); vaddr += PAGE_SIZE; } - obj->dirty = 0; + obj->mm.dirty = false; } - sg_free_table(obj->pages); - kfree(obj->pages); + sg_free_table(obj->mm.pages); + kfree(obj->mm.pages); } static void i915_gem_object_release_phys(struct drm_i915_gem_object *obj) { drm_pci_free(obj->base.dev, obj->phys_handle); + i915_gem_object_unpin_pages(obj); } static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { @@ -507,7 +508,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, return 0; } - if (obj->madv != I915_MADV_WILLNEED) + if (obj->mm.madv != I915_MADV_WILLNEED) return -EFAULT; if (obj->base.filp == NULL) @@ -517,7 +518,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_put_pages(obj); + ret = __i915_gem_object_put_pages(obj); if (ret) return ret; @@ -529,7 +530,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, obj->phys_handle = phys; obj->ops = &i915_gem_phys_ops; - return i915_gem_object_get_pages(obj); + return i915_gem_object_pin_pages(obj); } static int @@ -725,12 +726,10 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ret; - i915_gem_object_pin_pages(obj); - i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu read domain, set ourself into the gtt @@ -778,12 +777,10 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ret; - i915_gem_object_pin_pages(obj); - i915_gem_object_flush_gtt_write_domain(obj); /* If we're not in the cpu write domain, set ourself into the @@ -813,7 +810,7 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, obj->cache_dirty = true; intel_fb_obj_invalidate(obj, ORIGIN_CPU); - obj->dirty = 1; + obj->mm.dirty = true; /* return with the pages pinned */ return 0; @@ -951,13 +948,11 @@ i915_gem_gtt_pread(struct drm_device *dev, if (ret) goto out; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) { remove_mappable_node(&node); goto out; } - - i915_gem_object_pin_pages(obj); } ret = i915_gem_object_set_to_gtt_domain(obj, false); @@ -1064,7 +1059,7 @@ i915_gem_shmem_pread(struct drm_device *dev, offset = args->offset; remain = args->size; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, + for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents, offset >> PAGE_SHIFT) { struct page *page = sg_page_iter_page(&sg_iter); @@ -1254,13 +1249,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, if (ret) goto out; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) { remove_mappable_node(&node); goto out; } - - i915_gem_object_pin_pages(obj); } ret = i915_gem_object_set_to_gtt_domain(obj, true); @@ -1268,7 +1261,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, goto out_unpin; intel_fb_obj_invalidate(obj, ORIGIN_CPU); - obj->dirty = true; + obj->mm.dirty = true; user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; @@ -1439,7 +1432,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, offset = args->offset; remain = args->size; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, + for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents, offset >> PAGE_SHIFT) { struct page *page = sg_page_iter_page(&sg_iter); int partial_cacheline_write; @@ -2266,7 +2259,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) * backing pages, *now*. */ shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1); - obj->madv = __I915_MADV_PURGED; + obj->mm.madv = __I915_MADV_PURGED; } /* Try to discard unwanted pages */ @@ -2275,7 +2268,7 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj) { struct address_space *mapping; - switch (obj->madv) { + switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); case __I915_MADV_PURGED: @@ -2296,7 +2289,7 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) struct page *page; int ret; - BUG_ON(obj->madv == __I915_MADV_PURGED); + GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); ret = i915_gem_object_set_to_cpu_domain(obj, true); if (WARN_ON(ret)) { @@ -2312,22 +2305,22 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_save_bit_17_swizzle(obj); - if (obj->madv == I915_MADV_DONTNEED) - obj->dirty = 0; + if (obj->mm.madv == I915_MADV_DONTNEED) + obj->mm.dirty = false; - for_each_sgt_page(page, sgt_iter, obj->pages) { - if (obj->dirty) + for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + if (obj->mm.dirty) set_page_dirty(page); - if (obj->madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED) mark_page_accessed(page); put_page(page); } - obj->dirty = 0; + obj->mm.dirty = false; - sg_free_table(obj->pages); - kfree(obj->pages); + sg_free_table(obj->mm.pages); + kfree(obj->mm.pages); } static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) @@ -2335,21 +2328,20 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) struct radix_tree_iter iter; void **slot; - radix_tree_for_each_slot(slot, &obj->get_page.radix, &iter, 0) - radix_tree_delete(&obj->get_page.radix, iter.index); + radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0) + radix_tree_delete(&obj->mm.get_page.radix, iter.index); } -int -i915_gem_object_put_pages(struct drm_i915_gem_object *obj) +int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { const struct drm_i915_gem_object_ops *ops = obj->ops; lockdep_assert_held(&obj->base.dev->struct_mutex); - if (obj->pages == NULL) + if (!obj->mm.pages) return 0; - if (obj->pages_pin_count) + if (i915_gem_object_has_pinned_pages(obj)) return -EBUSY; GEM_BUG_ON(obj->bind_count); @@ -2359,22 +2351,22 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) * lists early. */ list_del(&obj->global_list); - if (obj->mapping) { + if (obj->mm.mapping) { void *ptr; - ptr = ptr_mask_bits(obj->mapping); + ptr = ptr_mask_bits(obj->mm.mapping); if (is_vmalloc_addr(ptr)) vunmap(ptr); else kunmap(kmap_to_page(ptr)); - obj->mapping = NULL; + obj->mm.mapping = NULL; } __i915_gem_object_reset_page_iter(obj); ops->put_pages(obj); - obj->pages = NULL; + obj->mm.pages = NULL; i915_gem_object_invalidate(obj); @@ -2474,7 +2466,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) } if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); - obj->pages = st; + obj->mm.pages = st; ret = i915_gem_gtt_prepare_object(obj); if (ret) @@ -2485,7 +2477,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) - i915_gem_object_pin_pages(obj); + __i915_gem_object_pin_pages(obj); return 0; @@ -2517,8 +2509,7 @@ err_pages: * either as a result of memory pressure (reaping pages under the shrinker) * or as the object is itself released. */ -int -i915_gem_object_get_pages(struct drm_i915_gem_object *obj) +int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); const struct drm_i915_gem_object_ops *ops = obj->ops; @@ -2526,24 +2517,25 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) lockdep_assert_held(&obj->base.dev->struct_mutex); - if (obj->pages) + if (obj->mm.pages) return 0; - if (obj->madv != I915_MADV_WILLNEED) { + if (obj->mm.madv != I915_MADV_WILLNEED) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); + __i915_gem_object_unpin_pages(obj); return -EFAULT; } - BUG_ON(obj->pages_pin_count); - ret = ops->get_pages(obj); - if (ret) + if (ret) { + __i915_gem_object_unpin_pages(obj); return ret; + } list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - obj->get_page.sg_pos = obj->pages->sgl; - obj->get_page.sg_idx = 0; + obj->mm.get_page.sg_pos = obj->mm.pages->sgl; + obj->mm.get_page.sg_idx = 0; return 0; } @@ -2553,7 +2545,7 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; - struct sg_table *sgt = obj->pages; + struct sg_table *sgt = obj->mm.pages; struct sgt_iter sgt_iter; struct page *page; struct page *stack_pages[32]; @@ -2607,14 +2599,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ERR_PTR(ret); - i915_gem_object_pin_pages(obj); - pinned = obj->pages_pin_count > 1; + pinned = obj->mm.pages_pin_count > 1; - ptr = ptr_unpack_bits(obj->mapping, has_type); + ptr = ptr_unpack_bits(obj->mm.mapping, has_type); if (ptr && has_type != type) { if (pinned) { ret = -EBUSY; @@ -2626,7 +2617,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, else kunmap(kmap_to_page(ptr)); - ptr = obj->mapping = NULL; + ptr = obj->mm.mapping = NULL; } if (!ptr) { @@ -2636,7 +2627,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, goto err; } - obj->mapping = ptr_pack_bits(ptr, type); + obj->mm.mapping = ptr_pack_bits(ptr, type); } return ptr; @@ -3087,7 +3078,7 @@ int i915_vma_unbind(struct i915_vma *vma) goto destroy; GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!obj->pages); + GEM_BUG_ON(!obj->mm.pages); if (i915_vma_is_map_and_fenceable(vma)) { /* release the fence reg _after_ flushing */ @@ -3111,7 +3102,7 @@ int i915_vma_unbind(struct i915_vma *vma) drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (vma->pages != obj->pages) { + if (vma->pages != obj->mm.pages) { GEM_BUG_ON(!vma->pages); sg_free_table(vma->pages); kfree(vma->pages); @@ -3244,12 +3235,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) return -E2BIG; } - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ret; - i915_gem_object_pin_pages(obj); - if (flags & PIN_OFFSET_FIXED) { u64 offset = flags & PIN_OFFSET_MASK; if (offset & (alignment - 1) || offset > end - size) { @@ -3331,7 +3320,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, * to GPU, and we can ignore the cache flush because it'll happen * again at bind time. */ - if (obj->pages == NULL) + if (!obj->mm.pages) return false; /* @@ -3355,7 +3344,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, } trace_i915_gem_object_clflush(obj); - drm_clflush_sg(obj->pages); + drm_clflush_sg(obj->mm.pages); obj->cache_dirty = false; return true; @@ -3469,7 +3458,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) * continue to assume that the obj remained out of the CPU cached * domain. */ - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ret; @@ -3493,7 +3482,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) if (write) { obj->base.read_domains = I915_GEM_DOMAIN_GTT; obj->base.write_domain = I915_GEM_DOMAIN_GTT; - obj->dirty = 1; + obj->mm.dirty = true; } trace_i915_gem_object_change_domain(obj, @@ -3502,6 +3491,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) /* And bump the LRU for this access */ i915_gem_object_bump_inactive_ggtt(obj); + i915_gem_object_unpin_pages(obj); return 0; } @@ -4304,23 +4294,23 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, goto unlock; } - if (obj->pages && + if (obj->mm.pages && i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (obj->madv == I915_MADV_WILLNEED) - i915_gem_object_unpin_pages(obj); + if (obj->mm.madv == I915_MADV_WILLNEED) + __i915_gem_object_unpin_pages(obj); if (args->madv == I915_MADV_WILLNEED) - i915_gem_object_pin_pages(obj); + __i915_gem_object_pin_pages(obj); } - if (obj->madv != __I915_MADV_PURGED) - obj->madv = args->madv; + if (obj->mm.madv != __I915_MADV_PURGED) + obj->mm.madv = args->madv; /* if the object is no longer attached, discard its backing storage */ - if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) + if (obj->mm.madv == I915_MADV_DONTNEED && !obj->mm.pages) i915_gem_object_truncate(obj); - args->retained = obj->madv != __I915_MADV_PURGED; + args->retained = obj->mm.madv != __I915_MADV_PURGED; i915_gem_object_put(obj); unlock: @@ -4347,9 +4337,10 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, obj->ops = ops; obj->frontbuffer_ggtt_origin = ORIGIN_GTT; - obj->madv = I915_MADV_WILLNEED; - INIT_RADIX_TREE(&obj->get_page.radix, GFP_KERNEL | __GFP_NOWARN); - mutex_init(&obj->get_page.lock); + + obj->mm.madv = I915_MADV_WILLNEED; + INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); + mutex_init(&obj->mm.get_page.lock); i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); } @@ -4441,7 +4432,7 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj) * back the contents from the GPU. */ - if (obj->madv != I915_MADV_WILLNEED) + if (obj->mm.madv != I915_MADV_WILLNEED) return false; if (obj->base.filp == NULL) @@ -4483,32 +4474,27 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) } GEM_BUG_ON(obj->bind_count); - /* Stolen objects don't hold a ref, but do hold pin count. Fix that up - * before progressing. */ - if (obj->stolen) - i915_gem_object_unpin_pages(obj); - WARN_ON(atomic_read(&obj->frontbuffer_bits)); - if (obj->pages && obj->madv == I915_MADV_WILLNEED && + if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && i915_gem_object_is_tiled(obj)) - i915_gem_object_unpin_pages(obj); + __i915_gem_object_unpin_pages(obj); - if (WARN_ON(obj->pages_pin_count)) - obj->pages_pin_count = 0; + if (obj->ops->release) + obj->ops->release(obj); + + if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) + obj->mm.pages_pin_count = 0; if (discard_backing_storage(obj)) - obj->madv = I915_MADV_DONTNEED; - i915_gem_object_put_pages(obj); + obj->mm.madv = I915_MADV_DONTNEED; + __i915_gem_object_put_pages(obj); - BUG_ON(obj->pages); + GEM_BUG_ON(obj->mm.pages); if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); - if (obj->ops->release) - obj->ops->release(obj); - drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(dev_priv, obj->base.size); @@ -5063,14 +5049,13 @@ i915_gem_object_create_from_data(struct drm_device *dev, if (ret) goto fail; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) goto fail; - i915_gem_object_pin_pages(obj); - sg = obj->pages; + sg = obj->mm.pages; bytes = sg_copy_from_buffer(sg->sgl, sg->nents, (void *)data, size); - obj->dirty = 1; /* Backing store is now out of date */ + obj->mm.dirty = true; /* Backing store is now out of date */ i915_gem_object_unpin_pages(obj); if (WARN_ON(bytes != size)) { @@ -5091,13 +5076,13 @@ i915_gem_object_get_sg(struct drm_i915_gem_object *obj, unsigned int n, unsigned int *offset) { - struct i915_gem_object_page_iter *iter = &obj->get_page; + struct i915_gem_object_page_iter *iter = &obj->mm.get_page; struct scatterlist *sg; unsigned int idx, count; might_sleep(); GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT); - GEM_BUG_ON(obj->pages_pin_count == 0); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); /* As we iterate forward through the sg, we record each entry in a * radixtree for quick repeated (backwards) lookups. If we have seen @@ -5222,7 +5207,7 @@ i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, struct page *page; page = i915_gem_object_get_page(obj, n); - if (!obj->dirty) + if (!obj->mm.dirty) set_page_dirty(page); return page; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index aa4e1e0..e0f38e5 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -130,11 +130,10 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, return obj; } - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) return ERR_PTR(ret); list_move_tail(&obj->batch_pool_link, list); - i915_gem_object_pin_pages(obj); return obj; } diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 97c9d68..10441dc 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -48,12 +48,10 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme if (ret) goto err; - ret = i915_gem_object_get_pages(obj); + ret = i915_gem_object_pin_pages(obj); if (ret) goto err_unlock; - i915_gem_object_pin_pages(obj); - /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { @@ -61,13 +59,13 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme goto err_unpin; } - ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL); + ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); if (ret) goto err_free; - src = obj->pages->sgl; + src = obj->mm.pages->sgl; dst = st->sgl; - for (i = 0; i < obj->pages->nents; i++) { + for (i = 0; i < obj->mm.pages->nents; i++) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); src = sg_next(src); @@ -299,14 +297,14 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) if (IS_ERR(sg)) return PTR_ERR(sg); - obj->pages = sg; + obj->mm.pages = sg; return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj) { dma_buf_unmap_attachment(obj->base.import_attach, - obj->pages, DMA_BIDIRECTIONAL); + obj->mm.pages, DMA_BIDIRECTIONAL); } static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 4cafce9..d95c4e0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1281,7 +1281,7 @@ void i915_vma_move_to_active(struct i915_vma *vma, GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - obj->dirty = 1; /* be paranoid */ + obj->mm.dirty = true; /* be paranoid */ /* Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 3c5a808..5aadab5 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -664,7 +664,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) return; i = 0; - for_each_sgt_page(page, sgt_iter, obj->pages) { + for_each_sgt_page(page, sgt_iter, obj->mm.pages) { char new_bit_17 = page_to_phys(page) >> 17; if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { @@ -703,7 +703,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) i = 0; - for_each_sgt_page(page, sgt_iter, obj->pages) { + for_each_sgt_page(page, sgt_iter, obj->mm.pages) { if (page_to_phys(page) & (1 << 17)) __set_bit(i, obj->bit_17); else diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index b3f341f..794ccc4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -175,7 +175,7 @@ static int ppgtt_bind_vma(struct i915_vma *vma, { u32 pte_flags = 0; - vma->pages = vma->obj->pages; + vma->pages = vma->obj->mm.pages; /* Currently applicable only to VLV */ if (vma->obj->gt_ro) @@ -2373,7 +2373,7 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) { if (!dma_map_sg(&obj->base.dev->pdev->dev, - obj->pages->sgl, obj->pages->nents, + obj->mm.pages->sgl, obj->mm.pages->nents, PCI_DMA_BIDIRECTIONAL)) return -ENOSPC; @@ -2710,7 +2710,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) } } - dma_unmap_sg(kdev, obj->pages->sgl, obj->pages->nents, + dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents, PCI_DMA_BIDIRECTIONAL); } @@ -3548,7 +3548,7 @@ intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, /* Populate source page list from the object. */ i = 0; - for_each_sgt_dma(dma_addr, sgt_iter, obj->pages) + for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages) page_addr_list[i++] = dma_addr; GEM_BUG_ON(i != n_pages); @@ -3641,7 +3641,7 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return 0; if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->pages = vma->obj->pages; + vma->pages = vma->obj->mm.pages; else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) vma->pages = intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 02e66fa..08a2576 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -102,10 +102,10 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) sg = __sg_next(sg); } while (1); - obj->pages = st; + obj->mm.pages = st; if (i915_gem_gtt_prepare_object(obj)) { - obj->pages = NULL; + obj->mm.pages = NULL; goto err; } @@ -114,7 +114,7 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) * and the caller is expected to repopulate - the contents of this * object are only valid whilst active and pinned. */ - obj->madv = I915_MADV_DONTNEED; + obj->mm.madv = I915_MADV_DONTNEED; return 0; err: @@ -126,10 +126,10 @@ err: static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj) { i915_gem_gtt_finish_object(obj); - internal_free_pages(obj->pages); + internal_free_pages(obj->mm.pages); - obj->dirty = 0; - obj->madv = I915_MADV_WILLNEED; + obj->mm.dirty = false; + obj->mm.madv = I915_MADV_WILLNEED; } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 0529324..57918f2 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -230,7 +230,7 @@ int i915_gem_render_state_emit(struct drm_i915_gem_request *req) return 0; /* Recreate the page after shrinking */ - if (!so->vma->obj->pages) + if (!so->vma->obj->mm.pages) so->batch_offset = -1; ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index de25b6e..124f69a 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -78,7 +78,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * to the GPU, simply unbinding from the GPU is not going to succeed * in releasing our pin count on the pages themselves. */ - if (obj->pages_pin_count > obj->bind_count) + if (obj->mm.pages_pin_count > obj->bind_count) return false; if (any_vma_pinned(obj)) @@ -88,7 +88,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * discard the contents (because the user has marked them as being * purgeable) or if we can move their contents out to swap. */ - return swap_available() || obj->madv == I915_MADV_DONTNEED; + return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } /** @@ -175,11 +175,11 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, list_move_tail(&obj->global_list, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && - obj->madv != I915_MADV_DONTNEED) + obj->mm.madv != I915_MADV_DONTNEED) continue; if (flags & I915_SHRINK_VMAPS && - !is_vmalloc_addr(obj->mapping)) + !is_vmalloc_addr(obj->mm.mapping)) continue; if (!(flags & I915_SHRINK_ACTIVE) && @@ -194,7 +194,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, /* For the unbound phase, this should be a no-op! */ i915_gem_object_unbind(obj); - if (i915_gem_object_put_pages(obj) == 0) + if (__i915_gem_object_put_pages(obj) == 0) count += obj->base.size >> PAGE_SHIFT; i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 70e61bc..0acbdcb 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -555,16 +555,17 @@ static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj) { /* Should only be called during free */ - sg_free_table(obj->pages); - kfree(obj->pages); + sg_free_table(obj->mm.pages); + kfree(obj->mm.pages); } - static void i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + __i915_gem_object_unpin_pages(obj); + if (obj->stolen) { i915_gem_stolen_remove_node(dev_priv, obj->stolen); kfree(obj->stolen); @@ -590,15 +591,16 @@ _i915_gem_object_create_stolen(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops); - obj->pages = i915_pages_create_for_stolen(dev, - stolen->start, stolen->size); - if (obj->pages == NULL) + obj->mm.pages = i915_pages_create_for_stolen(dev, + stolen->start, + stolen->size); + if (!obj->mm.pages) goto cleanup; - obj->get_page.sg_pos = obj->pages->sgl; - obj->get_page.sg_idx = 0; + obj->mm.get_page.sg_pos = obj->mm.pages->sgl; + obj->mm.get_page.sg_idx = 0; - i915_gem_object_pin_pages(obj); + __i915_gem_object_pin_pages(obj); obj->stolen = stolen; obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; @@ -718,14 +720,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, goto err; } - vma->pages = obj->pages; + vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); obj->bind_count++; list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); - i915_gem_object_pin_pages(obj); + __i915_gem_object_pin_pages(obj); return obj; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 71f80d2..34d5ada 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -259,13 +259,13 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!err) { struct i915_vma *vma; - if (obj->pages && - obj->madv == I915_MADV_WILLNEED && + if (obj->mm.pages && + obj->mm.madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { if (args->tiling_mode == I915_TILING_NONE) - i915_gem_object_unpin_pages(obj); + __i915_gem_object_unpin_pages(obj); if (!i915_gem_object_is_tiled(obj)) - i915_gem_object_pin_pages(obj); + __i915_gem_object_pin_pages(obj); } list_for_each_entry(vma, &obj->vma_list, obj_link) { diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index e2fa970..0cbc8f7 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -73,10 +73,10 @@ static void cancel_userptr(struct work_struct *work) /* Cancel any active worker and force us to re-evaluate gup */ obj->userptr.work = NULL; - if (obj->pages != NULL) { + if (obj->mm.pages) { /* We are inside a kthread context and can't be interrupted */ WARN_ON(i915_gem_object_unbind(obj)); - WARN_ON(i915_gem_object_put_pages(obj)); + WARN_ON(__i915_gem_object_put_pages(obj)); } i915_gem_object_put(obj); @@ -432,15 +432,15 @@ __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj, { int ret; - ret = st_set_pages(&obj->pages, pvec, num_pages); + ret = st_set_pages(&obj->mm.pages, pvec, num_pages); if (ret) return ret; ret = i915_gem_gtt_prepare_object(obj); if (ret) { - sg_free_table(obj->pages); - kfree(obj->pages); - obj->pages = NULL; + sg_free_table(obj->mm.pages); + kfree(obj->mm.pages); + obj->mm.pages = NULL; } return ret; @@ -530,8 +530,8 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) if (ret == 0) { list_add_tail(&obj->global_list, &to_i915(dev)->mm.unbound_list); - obj->get_page.sg_pos = obj->pages->sgl; - obj->get_page.sg_idx = 0; + obj->mm.get_page.sg_pos = obj->mm.pages->sgl; + obj->mm.get_page.sg_idx = 0; pinned = 0; } } @@ -672,22 +672,22 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) BUG_ON(obj->userptr.work != NULL); __i915_gem_userptr_set_active(obj, false); - if (obj->madv != I915_MADV_WILLNEED) - obj->dirty = 0; + if (obj->mm.madv != I915_MADV_WILLNEED) + obj->mm.dirty = false; i915_gem_gtt_finish_object(obj); - for_each_sgt_page(page, sgt_iter, obj->pages) { - if (obj->dirty) + for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + if (obj->mm.dirty) set_page_dirty(page); mark_page_accessed(page); put_page(page); } - obj->dirty = 0; + obj->mm.dirty = false; - sg_free_table(obj->pages); - kfree(obj->pages); + sg_free_table(obj->mm.pages); + kfree(obj->mm.pages); } static void diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d5feace..5bbb372 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -896,8 +896,8 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->write_domain = obj->base.write_domain; err->fence_reg = vma->fence ? vma->fence->id : -1; err->tiling = i915_gem_object_get_tiling(obj); - err->dirty = obj->dirty; - err->purgeable = obj->madv != I915_MADV_WILLNEED; + err->dirty = obj->mm.dirty; + err->purgeable = obj->mm.madv != I915_MADV_WILLNEED; err->userptr = obj->userptr.mm != NULL; err->cache_level = obj->cache_level; } diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 1c1bd30..cb30549 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -744,7 +744,7 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); - ce->state->obj->dirty = true; + ce->state->obj->mm.dirty = true; /* Invalidate GuC TLB. */ if (i915.enable_guc_submission) { @@ -2042,7 +2042,7 @@ populate_lr_context(struct i915_gem_context *ctx, DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); return ret; } - ctx_obj->dirty = true; + ctx_obj->mm.dirty = true; /* The second page of the context object contains some fields which must * be set up prior to the first execution. */ @@ -2180,7 +2180,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) reg[CTX_RING_HEAD+1] = 0; reg[CTX_RING_TAIL+1] = 0; - ce->state->obj->dirty = true; + ce->state->obj->mm.dirty = true; i915_gem_object_unpin_map(ce->state->obj); ce->ring->head = ce->ring->tail = 0; -- cgit v1.1 From 03ac84f1830ec0b90f622500591eb3cc554ee479 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:36 +0100 Subject: drm/i915: Pass around sg_table to get_pages/put_pages backend The plan is to move obj->pages out from under the struct_mutex into its own per-object lock. We need to prune any assumption of the struct_mutex from the get_pages/put_pages backends, and to make it easier we pass around the sg_table to operate on rather than indirectly via the obj. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-13-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 36 +++++-- drivers/gpu/drm/i915/i915_gem.c | 172 +++++++++++++++---------------- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 20 ++-- drivers/gpu/drm/i915/i915_gem_fence.c | 17 +-- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++-- drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +- drivers/gpu/drm/i915/i915_gem_internal.c | 23 ++--- drivers/gpu/drm/i915/i915_gem_shrinker.c | 11 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 43 ++++---- drivers/gpu/drm/i915/i915_gem_userptr.c | 88 ++++++++-------- 10 files changed, 227 insertions(+), 208 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 50781cb..db67bec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2185,8 +2185,8 @@ struct drm_i915_gem_object_ops { * being released or under memory pressure (where we attempt to * reap pages for the shrinker). */ - int (*get_pages)(struct drm_i915_gem_object *); - void (*put_pages)(struct drm_i915_gem_object *); + struct sg_table *(*get_pages)(struct drm_i915_gem_object *); + void (*put_pages)(struct drm_i915_gem_object *, struct sg_table *); int (*dmabuf_export)(struct drm_i915_gem_object *); void (*release)(struct drm_i915_gem_object *); @@ -2321,8 +2321,6 @@ struct drm_i915_gem_object { struct i915_gem_userptr { uintptr_t ptr; unsigned read_only :1; - unsigned workers :4; -#define I915_GEM_USERPTR_MAX_WORKERS 15 struct i915_mm_struct *mm; struct i915_mmu_object *mmu_object; @@ -2384,6 +2382,19 @@ __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); static inline bool +i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) +{ + return atomic_read(&obj->base.refcount.refcount) == 0; +} + +#if IS_ENABLED(CONFIG_LOCKDEP) +#define lockdep_assert_held_unless(lock, cond) \ + GEM_BUG_ON(debug_locks && !lockdep_is_held(lock) && !(cond)) +#else +#define lockdep_assert_held_unless(lock, cond) +#endif + +static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; @@ -3211,6 +3222,8 @@ dma_addr_t i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, unsigned long n); +void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages); int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check @@ -3227,7 +3240,8 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->base.dev->struct_mutex); + lockdep_assert_held_unless(&obj->base.dev->struct_mutex, + i915_gem_object_is_dead(obj)); GEM_BUG_ON(!obj->mm.pages); obj->mm.pages_pin_count++; @@ -3242,7 +3256,8 @@ i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->base.dev->struct_mutex); + lockdep_assert_held_unless(&obj->base.dev->struct_mutex, + i915_gem_object_is_dead(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); GEM_BUG_ON(!obj->mm.pages); @@ -3255,7 +3270,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); +void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); enum i915_map_type { I915_MAP_WB = 0, @@ -3480,8 +3496,10 @@ i915_vma_unpin_fence(struct i915_vma *vma) void i915_gem_restore_fences(struct drm_device *dev); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); -void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj); -void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj); +void i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages); +void i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages); /* i915_gem_context.c */ int __must_check i915_gem_context_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0d702c8..56060ae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -169,7 +169,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, return 0; } -static int +static struct sg_table * i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) { struct address_space *mapping = obj->base.filp->f_mapping; @@ -179,7 +179,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) int i; if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) - return -EINVAL; + return ERR_PTR(-EINVAL); for (i = 0; i < obj->base.size / PAGE_SIZE; i++) { struct page *page; @@ -187,7 +187,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) page = shmem_read_mapping_page(mapping, i); if (IS_ERR(page)) - return PTR_ERR(page); + return ERR_CAST(page); src = kmap_atomic(page); memcpy(vaddr, src, PAGE_SIZE); @@ -202,11 +202,11 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (sg_alloc_table(st, 1, GFP_KERNEL)) { kfree(st); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } sg = st->sgl; @@ -216,28 +216,30 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) sg_dma_address(sg) = obj->phys_handle->busaddr; sg_dma_len(sg) = obj->base.size; - obj->mm.pages = st; - return 0; + return st; } static void -i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) +__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj) { - int ret; - GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (WARN_ON(ret)) { - /* In the event of a disaster, abandon all caches and - * hope for the best. - */ - obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } - if (obj->mm.madv == I915_MADV_DONTNEED) obj->mm.dirty = false; + if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) + i915_gem_clflush_object(obj, false); + + obj->base.read_domains = I915_GEM_DOMAIN_CPU; + obj->base.write_domain = I915_GEM_DOMAIN_CPU; +} + +static void +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + __i915_gem_object_release_shmem(obj); + if (obj->mm.dirty) { struct address_space *mapping = obj->base.filp->f_mapping; char *vaddr = obj->phys_handle->vaddr; @@ -265,8 +267,8 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) obj->mm.dirty = false; } - sg_free_table(obj->mm.pages); - kfree(obj->mm.pages); + sg_free_table(pages); + kfree(pages); } static void @@ -518,9 +520,9 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (ret) return ret; - ret = __i915_gem_object_put_pages(obj); - if (ret) - return ret; + __i915_gem_object_put_pages(obj); + if (obj->mm.pages) + return -EBUSY; /* create a new object */ phys = drm_pci_alloc(obj->base.dev, obj->base.size, align); @@ -536,7 +538,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, static int i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, - struct drm_file *file_priv) + struct drm_file *file) { struct drm_device *dev = obj->base.dev; void *vaddr = obj->phys_handle->vaddr + args->offset; @@ -552,7 +554,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, I915_WAIT_LOCKED | I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, - to_rps_client(file_priv)); + to_rps_client(file)); if (ret) return ret; @@ -2263,8 +2265,7 @@ i915_gem_object_truncate(struct drm_i915_gem_object *obj) } /* Try to discard unwanted pages */ -static void -i915_gem_object_invalidate(struct drm_i915_gem_object *obj) +void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) { struct address_space *mapping; @@ -2283,32 +2284,20 @@ i915_gem_object_invalidate(struct drm_i915_gem_object *obj) } static void -i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) +i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj, + struct sg_table *pages) { struct sgt_iter sgt_iter; struct page *page; - int ret; - GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED); - - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (WARN_ON(ret)) { - /* In the event of a disaster, abandon all caches and - * hope for the best. - */ - i915_gem_clflush_object(obj, true); - obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU; - } + __i915_gem_object_release_shmem(obj); - i915_gem_gtt_finish_object(obj); + i915_gem_gtt_finish_pages(obj, pages); if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_save_bit_17_swizzle(obj); - - if (obj->mm.madv == I915_MADV_DONTNEED) - obj->mm.dirty = false; + i915_gem_object_save_bit_17_swizzle(obj, pages); - for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -2319,8 +2308,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj) } obj->mm.dirty = false; - sg_free_table(obj->mm.pages); - kfree(obj->mm.pages); + sg_free_table(pages); + kfree(pages); } static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) @@ -2332,24 +2321,22 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) radix_tree_delete(&obj->mm.get_page.radix, iter.index); } -int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { - const struct drm_i915_gem_object_ops *ops = obj->ops; + struct sg_table *pages; lockdep_assert_held(&obj->base.dev->struct_mutex); - if (!obj->mm.pages) - return 0; - if (i915_gem_object_has_pinned_pages(obj)) - return -EBUSY; + return; GEM_BUG_ON(obj->bind_count); /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt * lists early. */ - list_del(&obj->global_list); + pages = fetch_and_zero(&obj->mm.pages); + GEM_BUG_ON(!pages); if (obj->mm.mapping) { void *ptr; @@ -2365,12 +2352,7 @@ int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); - ops->put_pages(obj); - obj->mm.pages = NULL; - - i915_gem_object_invalidate(obj); - - return 0; + obj->ops->put_pages(obj, pages); } static unsigned int swiotlb_max_size(void) @@ -2382,7 +2364,7 @@ static unsigned int swiotlb_max_size(void) #endif } -static int +static struct sg_table * i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); @@ -2401,8 +2383,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) * wasn't in the GTT, there shouldn't be any way it could have been in * a GPU cache */ - BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); - BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS); + GEM_BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS); max_segment = swiotlb_max_size(); if (!max_segment) @@ -2410,12 +2392,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); page_count = obj->base.size / PAGE_SIZE; if (sg_alloc_table(st, page_count, GFP_KERNEL)) { kfree(st); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } /* Get the list of pages out of our struct file. They'll be pinned @@ -2466,20 +2448,19 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) } if (sg) /* loop terminated early; short sg table */ sg_mark_end(sg); - obj->mm.pages = st; - ret = i915_gem_gtt_prepare_object(obj); + ret = i915_gem_gtt_prepare_pages(obj, st); if (ret) goto err_pages; if (i915_gem_object_needs_bit17_swizzle(obj)) - i915_gem_object_do_bit_17_swizzle(obj); + i915_gem_object_do_bit_17_swizzle(obj, st); if (i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) __i915_gem_object_pin_pages(obj); - return 0; + return st; err_pages: sg_mark_end(sg); @@ -2499,7 +2480,35 @@ err_pages: if (ret == -ENOSPC) ret = -ENOMEM; - return ret; + return ERR_PTR(ret); +} + +void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) +{ + lockdep_assert_held(&obj->base.dev->struct_mutex); + + obj->mm.get_page.sg_pos = pages->sgl; + obj->mm.get_page.sg_idx = 0; + + obj->mm.pages = pages; +} + +static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) +{ + struct sg_table *pages; + + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { + DRM_DEBUG("Attempting to obtain a purgeable object\n"); + return -EFAULT; + } + + pages = obj->ops->get_pages(obj); + if (unlikely(IS_ERR(pages))) + return PTR_ERR(pages); + + __i915_gem_object_set_pages(obj, pages); + return 0; } /* Ensure that the associated pages are gathered from the backing storage @@ -2511,33 +2520,18 @@ err_pages: */ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - const struct drm_i915_gem_object_ops *ops = obj->ops; - int ret; + int err; lockdep_assert_held(&obj->base.dev->struct_mutex); if (obj->mm.pages) return 0; - if (obj->mm.madv != I915_MADV_WILLNEED) { - DRM_DEBUG("Attempting to obtain a purgeable object\n"); - __i915_gem_object_unpin_pages(obj); - return -EFAULT; - } - - ret = ops->get_pages(obj); - if (ret) { + err = ____i915_gem_object_get_pages(obj); + if (err) __i915_gem_object_unpin_pages(obj); - return ret; - } - list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list); - - obj->mm.get_page.sg_pos = obj->mm.pages->sgl; - obj->mm.get_page.sg_idx = 0; - - return 0; + return err; } /* The 'mapping' part of i915_gem_object_pin_map() below */ diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 10441dc..2abd524 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -289,22 +289,18 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return dma_buf; } -static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) +static struct sg_table * +i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { - struct sg_table *sg; - - sg = dma_buf_map_attachment(obj->base.import_attach, DMA_BIDIRECTIONAL); - if (IS_ERR(sg)) - return PTR_ERR(sg); - - obj->mm.pages = sg; - return 0; + return dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); } -static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj) +static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, + struct sg_table *pages) { - dma_buf_unmap_attachment(obj->base.import_attach, - obj->mm.pages, DMA_BIDIRECTIONAL); + dma_buf_unmap_attachment(obj->base.import_attach, pages, + DMA_BIDIRECTIONAL); } static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = { diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 5aadab5..cd59dbc 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -644,6 +644,7 @@ i915_gem_swizzle_page(struct page *page) /** * i915_gem_object_do_bit_17_swizzle - fixup bit 17 swizzling * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages * * This function fixes up the swizzling in case any page frame number for this * object has changed in bit 17 since that state has been saved with @@ -654,7 +655,8 @@ i915_gem_swizzle_page(struct page *page) * by swapping them out and back in again). */ void -i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) +i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) { struct sgt_iter sgt_iter; struct page *page; @@ -664,10 +666,9 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) return; i = 0; - for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + for_each_sgt_page(page, sgt_iter, pages) { char new_bit_17 = page_to_phys(page) >> 17; - if ((new_bit_17 & 0x1) != - (test_bit(i, obj->bit_17) != 0)) { + if ((new_bit_17 & 0x1) != (test_bit(i, obj->bit_17) != 0)) { i915_gem_swizzle_page(page); set_page_dirty(page); } @@ -678,17 +679,19 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj) /** * i915_gem_object_save_bit_17_swizzle - save bit 17 swizzling * @obj: i915 GEM buffer object + * @pages: the scattergather list of physical pages * * This function saves the bit 17 of each page frame number so that swizzling * can be fixed up later on with i915_gem_object_do_bit_17_swizzle(). This must * be called before the backing storage can be unpinned. */ void -i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) +i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj, + struct sg_table *pages) { + const unsigned int page_count = obj->base.size >> PAGE_SHIFT; struct sgt_iter sgt_iter; struct page *page; - int page_count = obj->base.size >> PAGE_SHIFT; int i; if (obj->bit_17 == NULL) { @@ -703,7 +706,7 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj) i = 0; - for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + for_each_sgt_page(page, sgt_iter, pages) { if (page_to_phys(page) & (1 << 17)) __set_bit(i, obj->bit_17); else diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 794ccc4..1008209 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2370,14 +2370,15 @@ void i915_gem_suspend_gtt_mappings(struct drm_device *dev) i915_ggtt_flush(dev_priv); } -int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) +int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) { - if (!dma_map_sg(&obj->base.dev->pdev->dev, - obj->mm.pages->sgl, obj->mm.pages->nents, - PCI_DMA_BIDIRECTIONAL)) - return -ENOSPC; + if (dma_map_sg(&obj->base.dev->pdev->dev, + pages->sgl, pages->nents, + PCI_DMA_BIDIRECTIONAL)) + return 0; - return 0; + return -ENOSPC; } static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) @@ -2696,7 +2697,8 @@ static void ggtt_unbind_vma(struct i915_vma *vma) vma->node.start, size); } -void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) +void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) { struct drm_i915_private *dev_priv = to_i915(obj->base.dev); struct device *kdev = &dev_priv->drm.pdev->dev; @@ -2710,8 +2712,7 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj) } } - dma_unmap_sg(kdev, obj->mm.pages->sgl, obj->mm.pages->nents, - PCI_DMA_BIDIRECTIONAL); + dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); } static void i915_gtt_color_adjust(struct drm_mm_node *node, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index c241d81..dbe6a6c 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -628,8 +628,10 @@ void i915_check_and_clear_faults(struct drm_i915_private *dev_priv); void i915_gem_suspend_gtt_mappings(struct drm_device *dev); void i915_gem_restore_gtt_mappings(struct drm_device *dev); -int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); -void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); +int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages); +void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages); /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT(0) diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 08a2576..1b0607a 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -42,7 +42,8 @@ static void internal_free_pages(struct sg_table *st) kfree(st); } -static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) +static struct sg_table * +i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); unsigned int npages = obj->base.size / PAGE_SIZE; @@ -53,11 +54,11 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (sg_alloc_table(st, npages, GFP_KERNEL)) { kfree(st); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } sg = st->sgl; @@ -102,12 +103,9 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) sg = __sg_next(sg); } while (1); - obj->mm.pages = st; - if (i915_gem_gtt_prepare_object(obj)) { - obj->mm.pages = NULL; + if (i915_gem_gtt_prepare_pages(obj, st)) goto err; - } /* Mark the pages as dontneed whilst they are still pinned. As soon * as they are unpinned they are allowed to be reaped by the shrinker, @@ -115,18 +113,19 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) * object are only valid whilst active and pinned. */ obj->mm.madv = I915_MADV_DONTNEED; - return 0; + return st; err: sg_mark_end(sg); internal_free_pages(st); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } -static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj) +static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, + struct sg_table *pages) { - i915_gem_gtt_finish_object(obj); - internal_free_pages(obj->mm.pages); + i915_gem_gtt_finish_pages(obj, pages); + internal_free_pages(pages); obj->mm.dirty = false; obj->mm.madv = I915_MADV_WILLNEED; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 124f69a..f95061f 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -91,6 +91,13 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) return swap_available() || obj->mm.madv == I915_MADV_DONTNEED; } +static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) +{ + if (i915_gem_object_unbind(obj) == 0) + __i915_gem_object_put_pages(obj); + return !obj->mm.pages; +} + /** * i915_gem_shrink - Shrink buffer object caches * @dev_priv: i915 device @@ -192,9 +199,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, i915_gem_object_get(obj); - /* For the unbound phase, this should be a no-op! */ - i915_gem_object_unbind(obj); - if (__i915_gem_object_put_pages(obj) == 0) + if (unsafe_drop_pages(obj)) count += obj->base.size >> PAGE_SHIFT; i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 0acbdcb..1a63ffa 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -546,17 +546,20 @@ i915_pages_create_for_stolen(struct drm_device *dev, return st; } -static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) +static struct sg_table * +i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj) { - BUG(); - return -EINVAL; + return i915_pages_create_for_stolen(obj->base.dev, + obj->stolen->start, + obj->stolen->size); } -static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj) +static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj, + struct sg_table *pages) { /* Should only be called during free */ - sg_free_table(obj->mm.pages); - kfree(obj->mm.pages); + sg_free_table(pages); + kfree(pages); } static void @@ -591,21 +594,13 @@ _i915_gem_object_create_stolen(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, stolen->size); i915_gem_object_init(obj, &i915_gem_object_stolen_ops); - obj->mm.pages = i915_pages_create_for_stolen(dev, - stolen->start, - stolen->size); - if (!obj->mm.pages) - goto cleanup; - - obj->mm.get_page.sg_pos = obj->mm.pages->sgl; - obj->mm.get_page.sg_idx = 0; - - __i915_gem_object_pin_pages(obj); obj->stolen = stolen; - obj->base.read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT; obj->cache_level = HAS_LLC(dev) ? I915_CACHE_LLC : I915_CACHE_NONE; + if (i915_gem_object_pin_pages(obj)) + goto cleanup; + return obj; cleanup: @@ -700,10 +695,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, if (gtt_offset == I915_GTT_OFFSET_NONE) return obj; + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); - goto err; + goto err_pages; } /* To simplify the initialisation sequence between KMS and GTT, @@ -717,20 +716,20 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, ret = drm_mm_reserve_node(&ggtt->base.mm, &vma->node); if (ret) { DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); - goto err; + goto err_pages; } vma->pages = obj->mm.pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); + list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); obj->bind_count++; - list_add_tail(&obj->global_list, &dev_priv->mm.bound_list); - __i915_gem_object_pin_pages(obj); - return obj; +err_pages: + i915_gem_object_unpin_pages(obj); err: i915_gem_object_put(obj); return NULL; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 0cbc8f7..a421447 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -73,11 +73,14 @@ static void cancel_userptr(struct work_struct *work) /* Cancel any active worker and force us to re-evaluate gup */ obj->userptr.work = NULL; - if (obj->mm.pages) { - /* We are inside a kthread context and can't be interrupted */ - WARN_ON(i915_gem_object_unbind(obj)); - WARN_ON(__i915_gem_object_put_pages(obj)); - } + /* We are inside a kthread context and can't be interrupted */ + if (i915_gem_object_unbind(obj) == 0) + __i915_gem_object_put_pages(obj); + WARN_ONCE(obj->mm.pages, + "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", + obj->bind_count, + obj->mm.pages_pin_count, + obj->pin_display); i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); @@ -426,24 +429,25 @@ err: return ret; } -static int +static struct sg_table * __i915_gem_userptr_set_pages(struct drm_i915_gem_object *obj, struct page **pvec, int num_pages) { + struct sg_table *pages; int ret; - ret = st_set_pages(&obj->mm.pages, pvec, num_pages); + ret = st_set_pages(&pages, pvec, num_pages); if (ret) - return ret; + return ERR_PTR(ret); - ret = i915_gem_gtt_prepare_object(obj); + ret = i915_gem_gtt_prepare_pages(obj, pages); if (ret) { - sg_free_table(obj->mm.pages); - kfree(obj->mm.pages); - obj->mm.pages = NULL; + sg_free_table(pages); + kfree(pages); + return ERR_PTR(ret); } - return ret; + return pages; } static int @@ -525,20 +529,20 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) mutex_lock(&dev->struct_mutex); if (obj->userptr.work == &work->work) { + struct sg_table *pages = ERR_PTR(ret); + if (pinned == npages) { - ret = __i915_gem_userptr_set_pages(obj, pvec, npages); - if (ret == 0) { - list_add_tail(&obj->global_list, - &to_i915(dev)->mm.unbound_list); - obj->mm.get_page.sg_pos = obj->mm.pages->sgl; - obj->mm.get_page.sg_idx = 0; + pages = __i915_gem_userptr_set_pages(obj, pvec, npages); + if (!IS_ERR(pages)) { + __i915_gem_object_set_pages(obj, pages); pinned = 0; + pages = NULL; } } - obj->userptr.work = ERR_PTR(ret); + + obj->userptr.work = ERR_CAST(pages); } - obj->userptr.workers--; i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); @@ -549,7 +553,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) kfree(work); } -static int +static struct sg_table * __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, bool *active) { @@ -574,15 +578,11 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, * that error back to this function through * obj->userptr.work = ERR_PTR. */ - if (obj->userptr.workers >= I915_GEM_USERPTR_MAX_WORKERS) - return -EAGAIN; - work = kmalloc(sizeof(*work), GFP_KERNEL); if (work == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); obj->userptr.work = &work->work; - obj->userptr.workers++; work->obj = i915_gem_object_get(obj); @@ -593,14 +593,15 @@ __i915_gem_userptr_get_pages_schedule(struct drm_i915_gem_object *obj, schedule_work(&work->work); *active = true; - return -EAGAIN; + return ERR_PTR(-EAGAIN); } -static int +static struct sg_table * i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const int num_pages = obj->base.size >> PAGE_SHIFT; struct page **pvec; + struct sg_table *pages; int pinned, ret; bool active; @@ -624,15 +625,15 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) if (obj->userptr.work) { /* active flag should still be held for the pending work */ if (IS_ERR(obj->userptr.work)) - return PTR_ERR(obj->userptr.work); + return ERR_CAST(obj->userptr.work); else - return -EAGAIN; + return ERR_PTR(-EAGAIN); } /* Let the mmu-notifier know that we have begun and need cancellation */ ret = __i915_gem_userptr_set_active(obj, true); if (ret) - return ret; + return ERR_PTR(ret); pvec = NULL; pinned = 0; @@ -641,7 +642,7 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) GFP_TEMPORARY); if (pvec == NULL) { __i915_gem_userptr_set_active(obj, false); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } pinned = __get_user_pages_fast(obj->userptr.ptr, num_pages, @@ -650,21 +651,22 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) active = false; if (pinned < 0) - ret = pinned, pinned = 0; + pages = ERR_PTR(pinned), pinned = 0; else if (pinned < num_pages) - ret = __i915_gem_userptr_get_pages_schedule(obj, &active); + pages = __i915_gem_userptr_get_pages_schedule(obj, &active); else - ret = __i915_gem_userptr_set_pages(obj, pvec, num_pages); - if (ret) { + pages = __i915_gem_userptr_set_pages(obj, pvec, num_pages); + if (IS_ERR(pages)) { __i915_gem_userptr_set_active(obj, active); release_pages(pvec, pinned, 0); } drm_free_large(pvec); - return ret; + return pages; } static void -i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) +i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj, + struct sg_table *pages) { struct sgt_iter sgt_iter; struct page *page; @@ -675,9 +677,9 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) if (obj->mm.madv != I915_MADV_WILLNEED) obj->mm.dirty = false; - i915_gem_gtt_finish_object(obj); + i915_gem_gtt_finish_pages(obj, pages); - for_each_sgt_page(page, sgt_iter, obj->mm.pages) { + for_each_sgt_page(page, sgt_iter, pages) { if (obj->mm.dirty) set_page_dirty(page); @@ -686,8 +688,8 @@ i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj) } obj->mm.dirty = false; - sg_free_table(obj->mm.pages); - kfree(obj->mm.pages); + sg_free_table(pages); + kfree(pages); } static void -- cgit v1.1 From 1233e2db199dea015391db03d3478b3392201c41 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:37 +0100 Subject: drm/i915: Move object backing storage manipulation to its own locking Break the allocation of the backing storage away from struct_mutex into a per-object lock. This allows parallel page allocation, provided we can do so outside of struct_mutex (i.e. set-domain-ioctl, pwrite, GTT fault), i.e. before execbuf! The increased cost of the atomic counters are hidden behind i915_vma_pin() for the typical case of execbuf, i.e. as the object is typically bound between execbufs, the page_pin_count is static. The cost will be felt around set-domain and pwrite, but offset by the improvement from reduced struct_mutex contention. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-14-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 36 +++++-------- drivers/gpu/drm/i915/i915_gem.c | 93 +++++++++++++++++++++----------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 51 ++++++++++++------ drivers/gpu/drm/i915/i915_gem_tiling.c | 2 + drivers/gpu/drm/i915/i915_gem_userptr.c | 10 ++-- 5 files changed, 114 insertions(+), 78 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index db67bec..f69e0e0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2274,7 +2274,8 @@ struct drm_i915_gem_object { unsigned int pin_display; struct { - unsigned int pages_pin_count; + struct mutex lock; /* protects the pages and their use */ + atomic_t pages_pin_count; struct sg_table *pages; void *mapping; @@ -2387,13 +2388,6 @@ i915_gem_object_is_dead(const struct drm_i915_gem_object *obj) return atomic_read(&obj->base.refcount.refcount) == 0; } -#if IS_ENABLED(CONFIG_LOCKDEP) -#define lockdep_assert_held_unless(lock, cond) \ - GEM_BUG_ON(debug_locks && !lockdep_is_held(lock) && !(cond)) -#else -#define lockdep_assert_held_unless(lock, cond) -#endif - static inline bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) { @@ -3229,9 +3223,9 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __must_check i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->base.dev->struct_mutex); + might_lock(&obj->mm.lock); - if (obj->mm.pages_pin_count++) + if (atomic_inc_not_zero(&obj->mm.pages_pin_count)) return 0; return __i915_gem_object_get_pages(obj); @@ -3240,32 +3234,29 @@ i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) static inline void __i915_gem_object_pin_pages(struct drm_i915_gem_object *obj) { - lockdep_assert_held_unless(&obj->base.dev->struct_mutex, - i915_gem_object_is_dead(obj)); GEM_BUG_ON(!obj->mm.pages); - obj->mm.pages_pin_count++; + atomic_inc(&obj->mm.pages_pin_count); } static inline bool i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj) { - return obj->mm.pages_pin_count; + return atomic_read(&obj->mm.pages_pin_count); } static inline void __i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { - lockdep_assert_held_unless(&obj->base.dev->struct_mutex, - i915_gem_object_is_dead(obj)); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); GEM_BUG_ON(!obj->mm.pages); - obj->mm.pages_pin_count--; - GEM_BUG_ON(obj->mm.pages_pin_count < obj->bind_count); + atomic_dec(&obj->mm.pages_pin_count); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); } -static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) +static inline void +i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) { __i915_gem_object_unpin_pages(obj); } @@ -3288,8 +3279,8 @@ enum i915_map_type { * the kernel address space. Based on the @type of mapping, the PTE will be * set to either WriteBack or WriteCombine (via pgprot_t). * - * The caller must hold the struct_mutex, and is responsible for calling - * i915_gem_object_unpin_map() when the mapping is no longer required. + * The caller is responsible for calling i915_gem_object_unpin_map() when the + * mapping is no longer required. * * Returns the pointer through which to access the mapped object, or an * ERR_PTR() on error. @@ -3305,12 +3296,9 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, * with your access, call i915_gem_object_unpin_map() to release the pin * upon the mapping. Once the pin count reaches zero, that mapping may be * removed. - * - * The caller must hold the struct_mutex. */ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) { - lockdep_assert_held(&obj->base.dev->struct_mutex); i915_gem_object_unpin_pages(obj); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 56060ae..c8e9548 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2269,6 +2269,9 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj) { struct address_space *mapping; + lockdep_assert_held(&obj->mm.lock); + GEM_BUG_ON(obj->mm.pages); + switch (obj->mm.madv) { case I915_MADV_DONTNEED: i915_gem_object_truncate(obj); @@ -2325,12 +2328,17 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; - lockdep_assert_held(&obj->base.dev->struct_mutex); - if (i915_gem_object_has_pinned_pages(obj)) return; GEM_BUG_ON(obj->bind_count); + if (!READ_ONCE(obj->mm.pages)) + return; + + /* May be called by shrinker from within get_pages() (on another bo) */ + mutex_lock_nested(&obj->mm.lock, SINGLE_DEPTH_NESTING); + if (unlikely(atomic_read(&obj->mm.pages_pin_count))) + goto unlock; /* ->put_pages might need to allocate memory for the bit17 swizzle * array, hence protect them from being reaped by removing them from gtt @@ -2353,6 +2361,8 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) __i915_gem_object_reset_page_iter(obj); obj->ops->put_pages(obj, pages); +unlock: + mutex_unlock(&obj->mm.lock); } static unsigned int swiotlb_max_size(void) @@ -2486,7 +2496,7 @@ err_pages: void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - lockdep_assert_held(&obj->base.dev->struct_mutex); + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; obj->mm.get_page.sg_idx = 0; @@ -2512,9 +2522,9 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } /* Ensure that the associated pages are gathered from the backing storage - * and pinned into our object. i915_gem_object_get_pages() may be called + * and pinned into our object. i915_gem_object_pin_pages() may be called * multiple times before they are released by a single call to - * i915_gem_object_put_pages() - once the pages are no longer referenced + * i915_gem_object_unpin_pages() - once the pages are no longer referenced * either as a result of memory pressure (reaping pages under the shrinker) * or as the object is itself released. */ @@ -2522,15 +2532,23 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { int err; - lockdep_assert_held(&obj->base.dev->struct_mutex); + err = mutex_lock_interruptible(&obj->mm.lock); + if (err) + return err; - if (obj->mm.pages) - return 0; + if (likely(obj->mm.pages)) { + __i915_gem_object_pin_pages(obj); + goto unlock; + } + + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); - if (err) - __i915_gem_object_unpin_pages(obj); + if (!err) + atomic_set_release(&obj->mm.pages_pin_count, 1); +unlock: + mutex_unlock(&obj->mm.lock); return err; } @@ -2590,20 +2608,29 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *ptr; int ret; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); - ret = i915_gem_object_pin_pages(obj); + ret = mutex_lock_interruptible(&obj->mm.lock); if (ret) return ERR_PTR(ret); - pinned = obj->mm.pages_pin_count > 1; + pinned = true; + if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { + ret = ____i915_gem_object_get_pages(obj); + if (ret) + goto err_unlock; + + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count)); + atomic_set_release(&obj->mm.pages_pin_count, 1); + pinned = false; + } + GEM_BUG_ON(!obj->mm.pages); ptr = ptr_unpack_bits(obj->mm.mapping, has_type); if (ptr && has_type != type) { if (pinned) { ret = -EBUSY; - goto err; + goto err_unpin; } if (is_vmalloc_addr(ptr)) @@ -2618,17 +2645,21 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, ptr = i915_gem_object_map(obj, type); if (!ptr) { ret = -ENOMEM; - goto err; + goto err_unpin; } obj->mm.mapping = ptr_pack_bits(ptr, type); } +out_unlock: + mutex_unlock(&obj->mm.lock); return ptr; -err: - i915_gem_object_unpin_pages(obj); - return ERR_PTR(ret); +err_unpin: + atomic_dec(&obj->mm.pages_pin_count); +err_unlock: + ptr = ERR_PTR(ret); + goto out_unlock; } static void @@ -4268,7 +4299,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_madvise *args = data; struct drm_i915_gem_object *obj; - int ret; + int err; switch (args->madv) { case I915_MADV_DONTNEED: @@ -4278,15 +4309,13 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; - obj = i915_gem_object_lookup(file_priv, args->handle); - if (!obj) { - ret = -ENOENT; - goto unlock; - } + if (!obj) + return -ENOENT; + + err = mutex_lock_interruptible(&obj->mm.lock); + if (err) + goto out; if (obj->mm.pages && i915_gem_object_is_tiled(obj) && @@ -4305,11 +4334,11 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, i915_gem_object_truncate(obj); args->retained = obj->mm.madv != __I915_MADV_PURGED; + mutex_unlock(&obj->mm.lock); +out: i915_gem_object_put(obj); -unlock: - mutex_unlock(&dev->struct_mutex); - return ret; + return err; } void i915_gem_object_init(struct drm_i915_gem_object *obj, @@ -4317,6 +4346,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { int i; + mutex_init(&obj->mm.lock); + INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->userfault_link); for (i = 0; i < I915_NUM_ENGINES; i++) @@ -4479,7 +4510,7 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) obj->ops->release(obj); if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) - obj->mm.pages_pin_count = 0; + atomic_set(&obj->mm.pages_pin_count, 0); if (discard_backing_storage(obj)) obj->mm.madv = I915_MADV_DONTNEED; __i915_gem_object_put_pages(obj); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f95061f..c8a4c40 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -48,6 +48,20 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task) #endif } +static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) +{ + if (!mutex_trylock(&dev->struct_mutex)) { + if (!mutex_is_locked_by(&dev->struct_mutex, current)) + return false; + + *unlock = false; + } else { + *unlock = true; + } + + return true; +} + static bool any_vma_pinned(struct drm_i915_gem_object *obj) { struct i915_vma *vma; @@ -66,6 +80,9 @@ static bool swap_available(void) static bool can_release_pages(struct drm_i915_gem_object *obj) { + if (!obj->mm.pages) + return false; + /* Only shmemfs objects are backed by swap */ if (!obj->base.filp) return false; @@ -78,7 +95,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) * to the GPU, simply unbinding from the GPU is not going to succeed * in releasing our pin count on the pages themselves. */ - if (obj->mm.pages_pin_count > obj->bind_count) + if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count) return false; if (any_vma_pinned(obj)) @@ -95,7 +112,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { if (i915_gem_object_unbind(obj) == 0) __i915_gem_object_put_pages(obj); - return !obj->mm.pages; + return !READ_ONCE(obj->mm.pages); } /** @@ -135,6 +152,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, { NULL, 0 }, }, *phase; unsigned long count = 0; + bool unlock; + + if (!i915_gem_shrinker_lock(&dev_priv->drm, &unlock)) + return 0; trace_i915_gem_shrink(dev_priv, target, flags); i915_gem_retire_requests(dev_priv); @@ -199,8 +220,14 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, i915_gem_object_get(obj); - if (unsafe_drop_pages(obj)) - count += obj->base.size >> PAGE_SHIFT; + if (unsafe_drop_pages(obj)) { + mutex_lock(&obj->mm.lock); + if (!obj->mm.pages) { + __i915_gem_object_invalidate(obj); + count += obj->base.size >> PAGE_SHIFT; + } + mutex_unlock(&obj->mm.lock); + } i915_gem_object_put(obj); } @@ -211,6 +238,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, intel_runtime_pm_put(dev_priv); i915_gem_retire_requests(dev_priv); + if (unlock) + mutex_unlock(&dev_priv->drm.struct_mutex); + /* expedite the RCU grace period to free some request slabs */ synchronize_rcu_expedited(); @@ -244,19 +274,6 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) return freed; } -static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock) -{ - if (!mutex_trylock(&dev->struct_mutex)) { - if (!mutex_is_locked_by(&dev->struct_mutex, current)) - return false; - - *unlock = false; - } else - *unlock = true; - - return true; -} - static unsigned long i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 34d5ada..6608799 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -259,6 +259,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!err) { struct i915_vma *vma; + mutex_lock(&obj->mm.lock); if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -267,6 +268,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!i915_gem_object_is_tiled(obj)) __i915_gem_object_pin_pages(obj); } + mutex_unlock(&obj->mm.lock); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!vma->fence) diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index a421447..6c8c7b3 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -79,7 +79,7 @@ static void cancel_userptr(struct work_struct *work) WARN_ONCE(obj->mm.pages, "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", obj->bind_count, - obj->mm.pages_pin_count, + atomic_read(&obj->mm.pages_pin_count), obj->pin_display); i915_gem_object_put(obj); @@ -491,7 +491,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) { struct get_pages_work *work = container_of(_work, typeof(*work), work); struct drm_i915_gem_object *obj = work->obj; - struct drm_device *dev = obj->base.dev; const int npages = obj->base.size >> PAGE_SHIFT; struct page **pvec; int pinned, ret; @@ -527,7 +526,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } } - mutex_lock(&dev->struct_mutex); + mutex_lock(&obj->mm.lock); if (obj->userptr.work == &work->work) { struct sg_table *pages = ERR_PTR(ret); @@ -542,13 +541,12 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) obj->userptr.work = ERR_CAST(pages); } - - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&obj->mm.lock); release_pages(pvec, pinned, 0); drm_free_large(pvec); + i915_gem_object_put_unlocked(obj); put_task_struct(work->task); kfree(work); } -- cgit v1.1 From 7dd737f377ffb59010b6811f934998d42f24a8bd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:38 +0100 Subject: drm/i915/dmabuf: Acquire the backing storage outside of struct_mutex Use the per-object mm.lock to allocate the backing storage (and hold a reference to it across the dmabuf access) without resorting to struct_mutex. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-15-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 69 +++++++++++++++------------------- 1 file changed, 30 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 2abd524..4d45f20 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -44,19 +44,15 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme struct scatterlist *src, *dst; int ret, i; - ret = i915_mutex_lock_interruptible(obj->base.dev); - if (ret) - goto err; - ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_unlock; + goto err; /* Copy sg so that we make an independent mapping */ st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); if (st == NULL) { ret = -ENOMEM; - goto err_unpin; + goto err_unpin_pages; } ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); @@ -72,21 +68,18 @@ static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme } if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) { - ret =-ENOMEM; + ret = -ENOMEM; goto err_free_sg; } - mutex_unlock(&obj->base.dev->struct_mutex); return st; err_free_sg: sg_free_table(st); err_free: kfree(st); -err_unpin: +err_unpin_pages: i915_gem_object_unpin_pages(obj); -err_unlock: - mutex_unlock(&obj->base.dev->struct_mutex); err: return ERR_PTR(ret); } @@ -101,36 +94,21 @@ static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment, sg_free_table(sg); kfree(sg); - mutex_lock(&obj->base.dev->struct_mutex); i915_gem_object_unpin_pages(obj); - mutex_unlock(&obj->base.dev->struct_mutex); } static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - void *addr; - int ret; - - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ERR_PTR(ret); - - addr = i915_gem_object_pin_map(obj, I915_MAP_WB); - mutex_unlock(&dev->struct_mutex); - return addr; + return i915_gem_object_pin_map(obj, I915_MAP_WB); } static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); - struct drm_device *dev = obj->base.dev; - mutex_lock(&dev->struct_mutex); i915_gem_object_unpin_map(obj); - mutex_unlock(&dev->struct_mutex); } static void *i915_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf, unsigned long page_num) @@ -177,32 +155,45 @@ static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_dire { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; - int ret; bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE); + int err; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; - ret = i915_gem_object_set_to_cpu_domain(obj, write); + err = i915_gem_object_set_to_cpu_domain(obj, write); mutex_unlock(&dev->struct_mutex); - return ret; + +out: + i915_gem_object_unpin_pages(obj); + return err; } static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) { struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf); struct drm_device *dev = obj->base.dev; - int ret; + int err; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - return ret; + err = i915_gem_object_pin_pages(obj); + if (err) + return err; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out; - ret = i915_gem_object_set_to_gtt_domain(obj, false); + err = i915_gem_object_set_to_gtt_domain(obj, false); mutex_unlock(&dev->struct_mutex); - return ret; +out: + i915_gem_object_unpin_pages(obj); + return err; } static const struct dma_buf_ops i915_dmabuf_ops = { -- cgit v1.1 From bb6dc8d96b683a6052162c36062cd8e9329e4f21 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:39 +0100 Subject: drm/i915: Implement pread without struct-mutex We only need struct_mutex within pread for a brief window where we need to serialise with rendering and control our cache domains. Elsewhere we can rely on the backing storage being pinned, and forgive userspace any races against us. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-16-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 365 +++++++++++++++++----------------------- 1 file changed, 157 insertions(+), 208 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c8e9548..d122658 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -63,13 +63,13 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) } static int -insert_mappable_node(struct drm_i915_private *i915, +insert_mappable_node(struct i915_ggtt *ggtt, struct drm_mm_node *node, u32 size) { memset(node, 0, sizeof(*node)); - return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, - size, 0, 0, 0, - i915->ggtt.mappable_end, + return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node, + size, 0, -1, + 0, ggtt->mappable_end, DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT); } @@ -821,32 +821,6 @@ err_unpin: return ret; } -/* Per-page copy function for the shmem pread fastpath. - * Flushes invalid cachelines before reading the target if - * needs_clflush is set. */ -static int -shmem_pread_fast(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, bool needs_clflush) -{ - char *vaddr; - int ret; - - if (unlikely(page_do_bit17_swizzling)) - return -EINVAL; - - vaddr = kmap_atomic(page); - if (needs_clflush) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - ret = __copy_to_user_inatomic(user_data, - vaddr + shmem_page_offset, - page_length); - kunmap_atomic(vaddr); - - return ret ? -EFAULT : 0; -} - static void shmem_clflush_swizzled_range(char *addr, unsigned long length, bool swizzled) @@ -872,7 +846,7 @@ shmem_clflush_swizzled_range(char *addr, unsigned long length, /* Only difference to the fast-path function is that this can handle bit17 * and uses non-atomic copy and kmap functions. */ static int -shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, +shmem_pread_slow(struct page *page, int offset, int length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush) { @@ -881,61 +855,130 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, vaddr = kmap(page); if (needs_clflush) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, + shmem_clflush_swizzled_range(vaddr + offset, length, page_do_bit17_swizzling); if (page_do_bit17_swizzling) - ret = __copy_to_user_swizzled(user_data, - vaddr, shmem_page_offset, - page_length); + ret = __copy_to_user_swizzled(user_data, vaddr, offset, length); else - ret = __copy_to_user(user_data, - vaddr + shmem_page_offset, - page_length); + ret = __copy_to_user(user_data, vaddr + offset, length); kunmap(page); return ret ? - EFAULT : 0; } -static inline unsigned long -slow_user_access(struct io_mapping *mapping, - uint64_t page_base, int page_offset, - char __user *user_data, - unsigned long length, bool pwrite) +static int +shmem_pread(struct page *page, int offset, int length, char __user *user_data, + bool page_do_bit17_swizzling, bool needs_clflush) +{ + int ret; + + ret = -ENODEV; + if (!page_do_bit17_swizzling) { + char *vaddr = kmap_atomic(page); + + if (needs_clflush) + drm_clflush_virt_range(vaddr + offset, length); + ret = __copy_to_user_inatomic(user_data, vaddr + offset, length); + kunmap_atomic(vaddr); + } + if (ret == 0) + return 0; + + return shmem_pread_slow(page, offset, length, user_data, + page_do_bit17_swizzling, needs_clflush); +} + +static int +i915_gem_shmem_pread(struct drm_i915_gem_object *obj, + struct drm_i915_gem_pread *args) +{ + char __user *user_data; + u64 remain; + unsigned int obj_do_bit17_swizzling; + unsigned int needs_clflush; + unsigned int idx, offset; + int ret; + + obj_do_bit17_swizzling = 0; + if (i915_gem_object_needs_bit17_swizzle(obj)) + obj_do_bit17_swizzling = BIT(17); + + ret = mutex_lock_interruptible(&obj->base.dev->struct_mutex); + if (ret) + return ret; + + ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); + mutex_unlock(&obj->base.dev->struct_mutex); + if (ret) + return ret; + + remain = args->size; + user_data = u64_to_user_ptr(args->data_ptr); + offset = offset_in_page(args->offset); + for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { + struct page *page = i915_gem_object_get_page(obj, idx); + int length; + + length = remain; + if (offset + length > PAGE_SIZE) + length = PAGE_SIZE - offset; + + ret = shmem_pread(page, offset, length, user_data, + page_to_phys(page) & obj_do_bit17_swizzling, + needs_clflush); + if (ret) + break; + + remain -= length; + user_data += length; + offset = 0; + } + + i915_gem_obj_finish_shmem_access(obj); + return ret; +} + +static inline bool +gtt_user_read(struct io_mapping *mapping, + loff_t base, int offset, + char __user *user_data, int length) { - void __iomem *ioaddr; void *vaddr; - uint64_t unwritten; + unsigned long unwritten; - ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force *)ioaddr + page_offset; - if (pwrite) - unwritten = __copy_from_user(vaddr, user_data, length); - else - unwritten = __copy_to_user(user_data, vaddr, length); - - io_mapping_unmap(ioaddr); + vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_to_user_inatomic(user_data, vaddr + offset, length); + io_mapping_unmap_atomic(vaddr); + if (unwritten) { + vaddr = (void __force *) + io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_to_user(user_data, vaddr + offset, length); + io_mapping_unmap(vaddr); + } return unwritten; } static int -i915_gem_gtt_pread(struct drm_device *dev, - struct drm_i915_gem_object *obj, uint64_t size, - uint64_t data_offset, uint64_t data_ptr) +i915_gem_gtt_pread(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pread *args) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; struct drm_mm_node node; - char __user *user_data; - uint64_t remain; - uint64_t offset; + struct i915_vma *vma; + void __user *user_data; + u64 remain, offset; int ret; - intel_runtime_pm_get(to_i915(dev)); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (ret) + return ret; + + intel_runtime_pm_get(i915); + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); if (!IS_ERR(vma)) { node.start = i915_ggtt_offset(vma); node.allocated = false; @@ -946,33 +989,21 @@ i915_gem_gtt_pread(struct drm_device *dev, } } if (IS_ERR(vma)) { - ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out; - - ret = i915_gem_object_pin_pages(obj); - if (ret) { - remove_mappable_node(&node); - goto out; - } + goto out_unlock; + GEM_BUG_ON(!node.allocated); } ret = i915_gem_object_set_to_gtt_domain(obj, false); if (ret) goto out_unpin; - user_data = u64_to_user_ptr(data_ptr); - remain = size; - offset = data_offset; + mutex_unlock(&i915->drm.struct_mutex); - mutex_unlock(&dev->struct_mutex); - if (likely(!i915.prefault_disable)) { - ret = fault_in_pages_writeable(user_data, remain); - if (ret) { - mutex_lock(&dev->struct_mutex); - goto out_unpin; - } - } + user_data = u64_to_user_ptr(args->data_ptr); + remain = args->size; + offset = args->offset; while (remain > 0) { /* Operation in this page @@ -989,19 +1020,14 @@ i915_gem_gtt_pread(struct drm_device *dev, wmb(); ggtt->base.insert_page(&ggtt->base, i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), - node.start, - I915_CACHE_NONE, 0); + node.start, I915_CACHE_NONE, 0); wmb(); } else { page_base += offset & PAGE_MASK; } - /* This is a slow read/write as it tries to read from - * and write to user memory which may result into page - * faults, and so we cannot perform this under struct_mutex. - */ - if (slow_user_access(&ggtt->mappable, page_base, - page_offset, user_data, - page_length, false)) { + + if (gtt_user_read(&ggtt->mappable, page_base, page_offset, + user_data, page_length)) { ret = -EFAULT; break; } @@ -1011,111 +1037,19 @@ i915_gem_gtt_pread(struct drm_device *dev, offset += page_length; } - mutex_lock(&dev->struct_mutex); - if (ret == 0 && (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { - /* The user has modified the object whilst we tried - * reading from it, and we now have no idea what domain - * the pages should be in. As we have just been touching - * them directly, flush everything back to the GTT - * domain. - */ - ret = i915_gem_object_set_to_gtt_domain(obj, false); - } - + mutex_lock(&i915->drm.struct_mutex); out_unpin: if (node.allocated) { wmb(); ggtt->base.clear_range(&ggtt->base, node.start, node.size); - i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { i915_vma_unpin(vma); } -out: - intel_runtime_pm_put(to_i915(dev)); - return ret; -} - -static int -i915_gem_shmem_pread(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pread *args, - struct drm_file *file) -{ - char __user *user_data; - ssize_t remain; - loff_t offset; - int shmem_page_offset, page_length, ret = 0; - int obj_do_bit17_swizzling, page_do_bit17_swizzling; - int prefaulted = 0; - int needs_clflush = 0; - struct sg_page_iter sg_iter; - - ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); - if (ret) - return ret; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - user_data = u64_to_user_ptr(args->data_ptr); - offset = args->offset; - remain = args->size; - - for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents, - offset >> PAGE_SHIFT) { - struct page *page = sg_page_iter_page(&sg_iter); - - if (remain <= 0) - break; - - /* Operation in this page - * - * shmem_page_offset = offset within page in shmem file - * page_length = bytes to copy for this page - */ - shmem_page_offset = offset_in_page(offset); - page_length = remain; - if ((shmem_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - shmem_page_offset; - - page_do_bit17_swizzling = obj_do_bit17_swizzling && - (page_to_phys(page) & (1 << 17)) != 0; - - ret = shmem_pread_fast(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - needs_clflush); - if (ret == 0) - goto next_page; - - mutex_unlock(&dev->struct_mutex); - - if (likely(!i915.prefault_disable) && !prefaulted) { - ret = fault_in_pages_writeable(user_data, remain); - /* Userspace is tricking us, but we've already clobbered - * its pages with the prefault and promised to write the - * data up to the first fault. Hence ignore any errors - * and just continue. */ - (void)ret; - prefaulted = 1; - } - - ret = shmem_pread_slow(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - needs_clflush); - - mutex_lock(&dev->struct_mutex); - - if (ret) - goto out; - -next_page: - remain -= page_length; - user_data += page_length; - offset += page_length; - } - -out: - i915_gem_obj_finish_shmem_access(obj); +out_unlock: + intel_runtime_pm_put(i915); + mutex_unlock(&i915->drm.struct_mutex); return ret; } @@ -1134,7 +1068,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_pread *args = data; struct drm_i915_gem_object *obj; - int ret = 0; + int ret; if (args->size == 0) return 0; @@ -1152,7 +1086,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, if (args->offset > obj->base.size || args->size > obj->base.size - args->offset) { ret = -EINVAL; - goto err; + goto out; } trace_i915_gem_object_pread(obj, args->offset, args->size); @@ -1162,25 +1096,18 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); if (ret) - goto err; + goto out; - ret = i915_mutex_lock_interruptible(dev); + ret = i915_gem_object_pin_pages(obj); if (ret) - goto err; - - ret = i915_gem_shmem_pread(dev, obj, args, file); + goto out; - /* pread for non shmem backed objects */ + ret = i915_gem_shmem_pread(obj, args); if (ret == -EFAULT || ret == -ENODEV) - ret = i915_gem_gtt_pread(dev, obj, args->size, - args->offset, args->data_ptr); - - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - - return ret; + ret = i915_gem_gtt_pread(obj, args); -err: + i915_gem_object_unpin_pages(obj); +out: i915_gem_object_put_unlocked(obj); return ret; } @@ -1208,6 +1135,28 @@ fast_user_write(struct io_mapping *mapping, return unwritten; } +static inline unsigned long +slow_user_access(struct io_mapping *mapping, + unsigned long page_base, int page_offset, + char __user *user_data, + unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + unsigned long unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. @@ -1247,7 +1196,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, } } if (IS_ERR(vma)) { - ret = insert_mappable_node(i915, &node, PAGE_SIZE); + ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) goto out; @@ -1276,8 +1225,8 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, * page_length = bytes to copy for this page */ u32 page_base = node.start; - unsigned page_offset = offset_in_page(offset); - unsigned page_length = PAGE_SIZE - page_offset; + unsigned int page_offset = offset_in_page(offset); + unsigned int page_length = PAGE_SIZE - page_offset; page_length = remain < page_length ? remain : page_length; if (node.allocated) { wmb(); /* flush the write before we modify the GGTT */ -- cgit v1.1 From fe115628d567ba83a1498c585604d9035842b57c Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:40 +0100 Subject: drm/i915: Implement pwrite without struct-mutex We only need struct_mutex within pwrite for a brief window where we need to serialise with rendering and control our cache domains. Elsewhere we can rely on the backing storage being pinned, and forgive userspace any races against us. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-17-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 353 ++++++++++++++-------------------------- 1 file changed, 122 insertions(+), 231 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d122658..fad1487 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1116,72 +1116,50 @@ out: * page faults in the source data */ -static inline int -fast_user_write(struct io_mapping *mapping, - loff_t page_base, int page_offset, - char __user *user_data, - int length) +static inline bool +ggtt_write(struct io_mapping *mapping, + loff_t base, int offset, + char __user *user_data, int length) { - void __iomem *vaddr_atomic; void *vaddr; unsigned long unwritten; - vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base); /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force*)vaddr_atomic + page_offset; - unwritten = __copy_from_user_inatomic_nocache(vaddr, + vaddr = (void __force *)io_mapping_map_atomic_wc(mapping, base); + unwritten = __copy_from_user_inatomic_nocache(vaddr + offset, user_data, length); - io_mapping_unmap_atomic(vaddr_atomic); - return unwritten; -} - -static inline unsigned long -slow_user_access(struct io_mapping *mapping, - unsigned long page_base, int page_offset, - char __user *user_data, - unsigned long length, bool pwrite) -{ - void __iomem *ioaddr; - void *vaddr; - unsigned long unwritten; - - ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); - /* We can use the cpu mem copy function because this is X86. */ - vaddr = (void __force *)ioaddr + page_offset; - if (pwrite) - unwritten = __copy_from_user(vaddr, user_data, length); - else - unwritten = __copy_to_user(user_data, vaddr, length); + io_mapping_unmap_atomic(vaddr); + if (unwritten) { + vaddr = (void __force *) + io_mapping_map_wc(mapping, base, PAGE_SIZE); + unwritten = copy_from_user(vaddr + offset, user_data, length); + io_mapping_unmap(vaddr); + } - io_mapping_unmap(ioaddr); return unwritten; } /** * This is the fast pwrite path, where we copy the data directly from the * user into the GTT, uncached. - * @i915: i915 device private data - * @obj: i915 gem object + * @obj: i915 GEM object * @args: pwrite arguments structure - * @file: drm file pointer */ static int -i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) +i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct i915_ggtt *ggtt = &i915->ggtt; - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma; struct drm_mm_node node; - uint64_t remain, offset; - char __user *user_data; + struct i915_vma *vma; + u64 remain, offset; + void __user *user_data; int ret; - bool hit_slow_path = false; - if (i915_gem_object_is_tiled(obj)) - return -EFAULT; + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); + if (ret) + return ret; intel_runtime_pm_get(i915); vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, @@ -1198,21 +1176,17 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, if (IS_ERR(vma)) { ret = insert_mappable_node(ggtt, &node, PAGE_SIZE); if (ret) - goto out; - - ret = i915_gem_object_pin_pages(obj); - if (ret) { - remove_mappable_node(&node); - goto out; - } + goto out_unlock; + GEM_BUG_ON(!node.allocated); } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) goto out_unpin; + mutex_unlock(&i915->drm.struct_mutex); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); - obj->mm.dirty = true; user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; @@ -1243,92 +1217,36 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (fast_user_write(&ggtt->mappable, page_base, - page_offset, user_data, page_length)) { - hit_slow_path = true; - mutex_unlock(&dev->struct_mutex); - if (slow_user_access(&ggtt->mappable, - page_base, - page_offset, user_data, - page_length, true)) { - ret = -EFAULT; - mutex_lock(&dev->struct_mutex); - goto out_flush; - } - - mutex_lock(&dev->struct_mutex); + if (ggtt_write(&ggtt->mappable, page_base, page_offset, + user_data, page_length)) { + ret = -EFAULT; + break; } remain -= page_length; user_data += page_length; offset += page_length; } - -out_flush: - if (hit_slow_path) { - if (ret == 0 && - (obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) { - /* The user has modified the object whilst we tried - * reading from it, and we now have no idea what domain - * the pages should be in. As we have just been touching - * them directly, flush everything back to the GTT - * domain. - */ - ret = i915_gem_object_set_to_gtt_domain(obj, false); - } - } - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + + mutex_lock(&i915->drm.struct_mutex); out_unpin: if (node.allocated) { wmb(); ggtt->base.clear_range(&ggtt->base, node.start, node.size); - i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { i915_vma_unpin(vma); } -out: +out_unlock: intel_runtime_pm_put(i915); + mutex_unlock(&i915->drm.struct_mutex); return ret; } -/* Per-page copy function for the shmem pwrite fastpath. - * Flushes invalid cachelines before writing to the target if - * needs_clflush_before is set and flushes out any written cachelines after - * writing if needs_clflush is set. */ static int -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length, - char __user *user_data, - bool page_do_bit17_swizzling, - bool needs_clflush_before, - bool needs_clflush_after) -{ - char *vaddr; - int ret; - - if (unlikely(page_do_bit17_swizzling)) - return -EINVAL; - - vaddr = kmap_atomic(page); - if (needs_clflush_before) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - ret = __copy_from_user_inatomic(vaddr + shmem_page_offset, - user_data, page_length); - if (needs_clflush_after) - drm_clflush_virt_range(vaddr + shmem_page_offset, - page_length); - kunmap_atomic(vaddr); - - return ret ? -EFAULT : 0; -} - -/* Only difference to the fast-path function is that this can handle bit17 - * and uses non-atomic copy and kmap functions. */ -static int -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, +shmem_pwrite_slow(struct page *page, int offset, int length, char __user *user_data, bool page_do_bit17_swizzling, bool needs_clflush_before, @@ -1339,124 +1257,114 @@ shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length, vaddr = kmap(page); if (unlikely(needs_clflush_before || page_do_bit17_swizzling)) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, + shmem_clflush_swizzled_range(vaddr + offset, length, page_do_bit17_swizzling); if (page_do_bit17_swizzling) - ret = __copy_from_user_swizzled(vaddr, shmem_page_offset, - user_data, - page_length); + ret = __copy_from_user_swizzled(vaddr, offset, user_data, + length); else - ret = __copy_from_user(vaddr + shmem_page_offset, - user_data, - page_length); + ret = __copy_from_user(vaddr + offset, user_data, length); if (needs_clflush_after) - shmem_clflush_swizzled_range(vaddr + shmem_page_offset, - page_length, + shmem_clflush_swizzled_range(vaddr + offset, length, page_do_bit17_swizzling); kunmap(page); return ret ? -EFAULT : 0; } +/* Per-page copy function for the shmem pwrite fastpath. + * Flushes invalid cachelines before writing to the target if + * needs_clflush_before is set and flushes out any written cachelines after + * writing if needs_clflush is set. + */ static int -i915_gem_shmem_pwrite(struct drm_device *dev, - struct drm_i915_gem_object *obj, - struct drm_i915_gem_pwrite *args, - struct drm_file *file) +shmem_pwrite(struct page *page, int offset, int len, char __user *user_data, + bool page_do_bit17_swizzling, + bool needs_clflush_before, + bool needs_clflush_after) { - ssize_t remain; - loff_t offset; - char __user *user_data; - int shmem_page_offset, page_length, ret = 0; - int obj_do_bit17_swizzling, page_do_bit17_swizzling; - int hit_slowpath = 0; + int ret; + + ret = -ENODEV; + if (!page_do_bit17_swizzling) { + char *vaddr = kmap_atomic(page); + + if (needs_clflush_before) + drm_clflush_virt_range(vaddr + offset, len); + ret = __copy_from_user_inatomic(vaddr + offset, user_data, len); + if (needs_clflush_after) + drm_clflush_virt_range(vaddr + offset, len); + + kunmap_atomic(vaddr); + } + if (ret == 0) + return ret; + + return shmem_pwrite_slow(page, offset, len, user_data, + page_do_bit17_swizzling, + needs_clflush_before, + needs_clflush_after); +} + +static int +i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_pwrite *args) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + void __user *user_data; + u64 remain; + unsigned int obj_do_bit17_swizzling; + unsigned int partial_cacheline_write; unsigned int needs_clflush; - struct sg_page_iter sg_iter; + unsigned int offset, idx; + int ret; - ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + ret = mutex_lock_interruptible(&i915->drm.struct_mutex); if (ret) return ret; - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - user_data = u64_to_user_ptr(args->data_ptr); - offset = args->offset; - remain = args->size; + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); + mutex_unlock(&i915->drm.struct_mutex); + if (ret) + return ret; - for_each_sg_page(obj->mm.pages->sgl, &sg_iter, obj->mm.pages->nents, - offset >> PAGE_SHIFT) { - struct page *page = sg_page_iter_page(&sg_iter); - int partial_cacheline_write; + obj_do_bit17_swizzling = 0; + if (i915_gem_object_needs_bit17_swizzle(obj)) + obj_do_bit17_swizzling = BIT(17); - if (remain <= 0) - break; + /* If we don't overwrite a cacheline completely we need to be + * careful to have up-to-date data by first clflushing. Don't + * overcomplicate things and flush the entire patch. + */ + partial_cacheline_write = 0; + if (needs_clflush & CLFLUSH_BEFORE) + partial_cacheline_write = boot_cpu_data.x86_clflush_size - 1; - /* Operation in this page - * - * shmem_page_offset = offset within page in shmem file - * page_length = bytes to copy for this page - */ - shmem_page_offset = offset_in_page(offset); - - page_length = remain; - if ((shmem_page_offset + page_length) > PAGE_SIZE) - page_length = PAGE_SIZE - shmem_page_offset; - - /* If we don't overwrite a cacheline completely we need to be - * careful to have up-to-date data by first clflushing. Don't - * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE && - ((shmem_page_offset | page_length) - & (boot_cpu_data.x86_clflush_size - 1)); - - page_do_bit17_swizzling = obj_do_bit17_swizzling && - (page_to_phys(page) & (1 << 17)) != 0; - - ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - partial_cacheline_write, - needs_clflush & CLFLUSH_AFTER); - if (ret == 0) - goto next_page; - - hit_slowpath = 1; - mutex_unlock(&dev->struct_mutex); - ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, - user_data, page_do_bit17_swizzling, - partial_cacheline_write, - needs_clflush & CLFLUSH_AFTER); + user_data = u64_to_user_ptr(args->data_ptr); + remain = args->size; + offset = offset_in_page(args->offset); + for (idx = args->offset >> PAGE_SHIFT; remain; idx++) { + struct page *page = i915_gem_object_get_page(obj, idx); + int length; - mutex_lock(&dev->struct_mutex); + length = remain; + if (offset + length > PAGE_SIZE) + length = PAGE_SIZE - offset; + ret = shmem_pwrite(page, offset, length, user_data, + page_to_phys(page) & obj_do_bit17_swizzling, + (offset | length) & partial_cacheline_write, + needs_clflush & CLFLUSH_AFTER); if (ret) - goto out; - -next_page: - remain -= page_length; - user_data += page_length; - offset += page_length; - } - -out: - i915_gem_obj_finish_shmem_access(obj); + break; - if (hit_slowpath) { - /* - * Fixup: Flush cpu caches in case we didn't flush the dirty - * cachelines in-line while writing and the object moved - * out of the cpu write domain while we've dropped the lock. - */ - if (!(needs_clflush & CLFLUSH_AFTER) && - obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - if (i915_gem_clflush_object(obj, obj->pin_display)) - needs_clflush |= CLFLUSH_AFTER; - } + remain -= length; + user_data += length; + offset = 0; } - if (needs_clflush & CLFLUSH_AFTER) - i915_gem_chipset_flush(to_i915(dev)); - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1472,7 +1380,6 @@ int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_pwrite *args = data; struct drm_i915_gem_object *obj; int ret; @@ -1485,13 +1392,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, args->size)) return -EFAULT; - if (likely(!i915.prefault_disable)) { - ret = fault_in_pages_readable(u64_to_user_ptr(args->data_ptr), - args->size); - if (ret) - return -EFAULT; - } - obj = i915_gem_object_lookup(file, args->handle); if (!obj) return -ENOENT; @@ -1513,11 +1413,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret) goto err; - intel_runtime_pm_get(dev_priv); - - ret = i915_mutex_lock_interruptible(dev); + ret = i915_gem_object_pin_pages(obj); if (ret) - goto err_rpm; + goto err; ret = -EFAULT; /* We can only do the GTT pwrite on untiled buffers, as otherwise @@ -1532,23 +1430,16 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * pointers (e.g. gtt mappings when moving data between * textures). Fallback to the shmem path in that case. */ - ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file); + ret = i915_gem_gtt_pwrite_fast(obj, args); if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else - ret = i915_gem_shmem_pwrite(dev, obj, args, file); + ret = i915_gem_shmem_pwrite(obj, args); } - i915_gem_object_put(obj); - mutex_unlock(&dev->struct_mutex); - intel_runtime_pm_put(dev_priv); - - return ret; - -err_rpm: - intel_runtime_pm_put(dev_priv); + i915_gem_object_unpin_pages(obj); err: i915_gem_object_put_unlocked(obj); return ret; -- cgit v1.1 From 40e62d5d6be8b4999068da31ee6aca7ca76669ee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:41 +0100 Subject: drm/i915: Acquire the backing storage outside of struct_mutex in set-domain As we can locklessly (well struct_mutex-lessly) acquire the backing storage, do so in set-domain-ioctl to reduce the contention on the struct_mutex. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-18-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 99 +++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fad1487..9f1bb1f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1452,6 +1452,30 @@ write_origin(struct drm_i915_gem_object *obj, unsigned domain) obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915; + struct list_head *list; + struct i915_vma *vma; + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + continue; + + if (i915_vma_is_active(vma)) + continue; + + if (!drm_mm_node_allocated(&vma->node)) + continue; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + } + + i915 = to_i915(obj->base.dev); + list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; + list_move_tail(&obj->global_list, list); +} + /** * Called when user space prepares to use an object with the CPU, either * through the mmap ioctl's mapping or a GTT mapping. @@ -1467,7 +1491,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; uint32_t read_domains = args->read_domains; uint32_t write_domain = args->write_domain; - int ret; + int err; /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) @@ -1487,33 +1511,48 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, * We will repeat the flush holding the lock in the normal manner * to catch cases where we are gazumped. */ - ret = i915_gem_object_wait(obj, + err = i915_gem_object_wait(obj, I915_WAIT_INTERRUPTIBLE | (write_domain ? I915_WAIT_ALL : 0), MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); - if (ret) - goto err; + if (err) + goto out_unlocked; - ret = i915_mutex_lock_interruptible(dev); - if (ret) - goto err; + /* Flush and acquire obj->pages so that we are coherent through + * direct access in memory with previous cached writes through + * shmemfs and that our cache domain tracking remains valid. + * For example, if the obj->filp was moved to swap without us + * being notified and releasing the pages, we would mistakenly + * continue to assume that the obj remained out of the CPU cached + * domain. + */ + err = i915_gem_object_pin_pages(obj); + if (err) + goto out_unlocked; + + err = i915_mutex_lock_interruptible(dev); + if (err) + goto out_pages; if (read_domains & I915_GEM_DOMAIN_GTT) - ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); + err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); else - ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); + err = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0); - if (write_domain != 0) - intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); + /* And bump the LRU for this access */ + i915_gem_object_bump_inactive_ggtt(obj); - i915_gem_object_put(obj); mutex_unlock(&dev->struct_mutex); - return ret; -err: + if (write_domain != 0) + intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); + +out_pages: + i915_gem_object_unpin_pages(obj); +out_unlocked: i915_gem_object_put_unlocked(obj); - return ret; + return err; } /** @@ -1734,6 +1773,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) if (ret) goto err; + ret = i915_gem_object_pin_pages(obj); + if (ret) + goto err; + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); @@ -1816,6 +1859,7 @@ err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: intel_runtime_pm_put(dev_priv); + i915_gem_object_unpin_pages(obj); err: switch (ret) { case -EIO: @@ -3269,24 +3313,6 @@ i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) I915_GEM_DOMAIN_CPU); } -static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (!i915_vma_is_ggtt(vma)) - continue; - - if (i915_vma_is_active(vma)) - continue; - - if (!drm_mm_node_allocated(&vma->node)) - continue; - - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - } -} - /** * Moves a single object to the GTT read, and possibly write domain. * @obj: object to act on @@ -3342,7 +3368,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); + GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_GTT) != 0); obj->base.read_domains |= I915_GEM_DOMAIN_GTT; if (write) { obj->base.read_domains = I915_GEM_DOMAIN_GTT; @@ -3354,10 +3380,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_read_domains, old_write_domain); - /* And bump the LRU for this access */ - i915_gem_object_bump_inactive_ggtt(obj); i915_gem_object_unpin_pages(obj); - return 0; } @@ -3713,7 +3736,7 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write) /* It should now be out of any other write domains, and we can update * the domain values for our changes. */ - BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); + GEM_BUG_ON((obj->base.write_domain & ~I915_GEM_DOMAIN_CPU) != 0); /* If we're writing through the CPU, then the GPU read domains will * need to be invalidated at next use. -- cgit v1.1 From fbbd37b36fa5e16a03aca07a922192d3db28588d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:42 +0100 Subject: drm/i915: Move object release to a freelist + worker We want to hide the latency of releasing objects and their backing storage from the submission, so we move the actual free to a worker. This allows us to switch to struct_mutex freeing of the object in the next patch. Furthermore, if we know that the object we are dereferencing remains valid for the duration of our access, we can forgo the usual synchronisation barriers and atomic reference counting. To ensure this we defer freeing an object til after an RCU grace period, such that any lookup of the object within an RCU read critical section will remain valid until after we exit that critical section. We also employ this delay for rate-limiting the serialisation on reallocation - we have to slow down object creation in order to prevent resource starvation (in particular, files). v2: Return early in i915_gem_tiling() ioctl to skip over superfluous work on error. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-19-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 15 ++- drivers/gpu/drm/i915/i915_drv.c | 19 ++-- drivers/gpu/drm/i915/i915_drv.h | 44 +++++++- drivers/gpu/drm/i915/i915_gem.c | 166 +++++++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_shrinker.c | 14 ++- drivers/gpu/drm/i915/i915_gem_tiling.c | 21 ++-- 6 files changed, 202 insertions(+), 77 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e97a16c..b0b0100 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4957,10 +4957,12 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_ring_test_irq_fops, #define DROP_BOUND 0x2 #define DROP_RETIRE 0x4 #define DROP_ACTIVE 0x8 -#define DROP_ALL (DROP_UNBOUND | \ - DROP_BOUND | \ - DROP_RETIRE | \ - DROP_ACTIVE) +#define DROP_FREED 0x10 +#define DROP_ALL (DROP_UNBOUND | \ + DROP_BOUND | \ + DROP_RETIRE | \ + DROP_ACTIVE | \ + DROP_FREED) static int i915_drop_caches_get(void *data, u64 *val) { @@ -5004,6 +5006,11 @@ i915_drop_caches_set(void *data, u64 val) unlock: mutex_unlock(&dev->struct_mutex); + if (val & DROP_FREED) { + synchronize_rcu(); + flush_work(&dev_priv->mm.free_work); + } + return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index af3559d..b308f71 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -537,14 +537,17 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = { .can_switch = i915_switcheroo_can_switch, }; -static void i915_gem_fini(struct drm_device *dev) +static void i915_gem_fini(struct drm_i915_private *dev_priv) { - mutex_lock(&dev->struct_mutex); - i915_gem_cleanup_engines(dev); - i915_gem_context_fini(dev); - mutex_unlock(&dev->struct_mutex); + mutex_lock(&dev_priv->drm.struct_mutex); + i915_gem_cleanup_engines(&dev_priv->drm); + i915_gem_context_fini(&dev_priv->drm); + mutex_unlock(&dev_priv->drm.struct_mutex); + + synchronize_rcu(); + flush_work(&dev_priv->mm.free_work); - WARN_ON(!list_empty(&to_i915(dev)->context_list)); + WARN_ON(!list_empty(&dev_priv->context_list)); } static int i915_load_modeset_init(struct drm_device *dev) @@ -619,7 +622,7 @@ static int i915_load_modeset_init(struct drm_device *dev) cleanup_gem: if (i915_gem_suspend(dev)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); - i915_gem_fini(dev); + i915_gem_fini(dev_priv); cleanup_irq: intel_guc_fini(dev); drm_irq_uninstall(dev); @@ -1305,7 +1308,7 @@ void i915_driver_unload(struct drm_device *dev) drain_workqueue(dev_priv->wq); intel_guc_fini(dev); - i915_gem_fini(dev); + i915_gem_fini(dev_priv); intel_fbc_cleanup_cfb(dev_priv); intel_power_domains_fini(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f69e0e0..6f648a3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1365,8 +1365,8 @@ struct i915_gem_mm { struct list_head bound_list; /** * List of objects which are not bound to the GTT (thus - * are idle and not used by the GPU) but still have - * (presumably uncached) pages still attached. + * are idle and not used by the GPU). These objects may or may + * not actually have any pages attached. */ struct list_head unbound_list; @@ -1375,6 +1375,12 @@ struct i915_gem_mm { */ struct list_head userfault_list; + /** + * List of objects which are pending destruction. + */ + struct llist_head free_list; + struct work_struct free_work; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2224,6 +2230,10 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; struct list_head global_list; + union { + struct rcu_head rcu; + struct llist_node freed; + }; /** * Whether the object is currently in the GGTT mmap. @@ -2341,10 +2351,38 @@ to_intel_bo(struct drm_gem_object *gem) return container_of(gem, struct drm_i915_gem_object, base); } +/** + * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle + * @filp: DRM file private date + * @handle: userspace handle + * + * Returns: + * + * A pointer to the object named by the handle if such exists on @filp, NULL + * otherwise. This object is only valid whilst under the RCU read lock, and + * note carefully the object may be in the process of being destroyed. + */ +static inline struct drm_i915_gem_object * +i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle) +{ +#ifdef CONFIG_LOCKDEP + WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map)); +#endif + return idr_find(&file->object_idr, handle); +} + static inline struct drm_i915_gem_object * i915_gem_object_lookup(struct drm_file *file, u32 handle) { - return to_intel_bo(drm_gem_object_lookup(file, handle)); + struct drm_i915_gem_object *obj; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, handle); + if (obj && !kref_get_unless_zero(&obj->base.refcount)) + obj = NULL; + rcu_read_unlock(); + + return obj; } __deprecated diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9f1bb1f..07b2eb6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -42,6 +42,7 @@ #include #include +static void i915_gem_flush_free_objects(struct drm_i915_private *i915); static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj); static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj); @@ -648,6 +649,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_create *args = data; + i915_gem_flush_free_objects(to_i915(dev)); + return i915_gem_create(file, dev, args->size, &args->handle); } @@ -3524,10 +3527,14 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_caching *args = data; struct drm_i915_gem_object *obj; + int err = 0; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, args->handle); + if (!obj) { + err = -ENOENT; + goto out; + } switch (obj->cache_level) { case I915_CACHE_LLC: @@ -3543,9 +3550,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, args->caching = I915_CACHING_NONE; break; } - - i915_gem_object_put_unlocked(obj); - return 0; +out: + rcu_read_unlock(); + return err; } int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, @@ -4089,10 +4096,14 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; unsigned long active; + int err; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, args->handle); + if (!obj) { + err = -ENOENT; + goto out; + } args->busy = 0; active = __I915_BO_ACTIVE(obj); @@ -4122,7 +4133,6 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * are busy is not completely reliable - we only guarantee * that the object was busy. */ - rcu_read_lock(); for_each_active(active, idx) args->busy |= busy_check_reader(&obj->last_read[idx]); @@ -4140,12 +4150,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * the result. */ args->busy |= busy_check_writer(&obj->last_write); - - rcu_read_unlock(); } - i915_gem_object_put_unlocked(obj); - return 0; +out: + rcu_read_unlock(); + return err; } int @@ -4308,7 +4317,6 @@ i915_gem_object_create(struct drm_device *dev, u64 size) fail: i915_gem_object_free(obj); - return ERR_PTR(ret); } @@ -4336,16 +4344,69 @@ static bool discard_backing_storage(struct drm_i915_gem_object *obj) return atomic_long_read(&obj->base.filp->f_count) == 1; } -void i915_gem_free_object(struct drm_gem_object *gem_obj) +static void __i915_gem_free_objects(struct drm_i915_private *i915, + struct llist_node *freed) { - struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_vma *vma, *next; + struct drm_i915_gem_object *obj, *on; - intel_runtime_pm_get(dev_priv); + mutex_lock(&i915->drm.struct_mutex); + intel_runtime_pm_get(i915); + llist_for_each_entry(obj, freed, freed) { + struct i915_vma *vma, *vn; + + trace_i915_gem_object_destroy(obj); + + GEM_BUG_ON(i915_gem_object_is_active(obj)); + list_for_each_entry_safe(vma, vn, + &obj->vma_list, obj_link) { + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(i915_vma_is_active(vma)); + vma->flags &= ~I915_VMA_PIN_MASK; + i915_vma_close(vma); + } + + list_del(&obj->global_list); + } + intel_runtime_pm_put(i915); + mutex_unlock(&i915->drm.struct_mutex); + + llist_for_each_entry_safe(obj, on, freed, freed) { + GEM_BUG_ON(obj->bind_count); + GEM_BUG_ON(atomic_read(&obj->frontbuffer_bits)); + + if (obj->ops->release) + obj->ops->release(obj); + + if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) + atomic_set(&obj->mm.pages_pin_count, 0); + __i915_gem_object_put_pages(obj); + GEM_BUG_ON(obj->mm.pages); + + if (obj->base.import_attach) + drm_prime_gem_destroy(&obj->base, NULL); + + drm_gem_object_release(&obj->base); + i915_gem_info_remove_obj(i915, obj->base.size); + + kfree(obj->bit_17); + i915_gem_object_free(obj); + } +} - trace_i915_gem_object_destroy(obj); +static void i915_gem_flush_free_objects(struct drm_i915_private *i915) +{ + struct llist_node *freed; + + freed = llist_del_all(&i915->mm.free_list); + if (unlikely(freed)) + __i915_gem_free_objects(i915, freed); +} + +static void __i915_gem_free_work(struct work_struct *work) +{ + struct drm_i915_private *i915 = + container_of(work, struct drm_i915_private, mm.free_work); + struct llist_node *freed; /* All file-owned VMA should have been released by this point through * i915_gem_close_object(), or earlier by i915_gem_context_close(). @@ -4354,42 +4415,44 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) * the GTT either for the user or for scanout). Those VMA still need to * unbound now. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { - GEM_BUG_ON(!i915_vma_is_ggtt(vma)); - GEM_BUG_ON(i915_vma_is_active(vma)); - vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_close(vma); - } - GEM_BUG_ON(obj->bind_count); - WARN_ON(atomic_read(&obj->frontbuffer_bits)); + while ((freed = llist_del_all(&i915->mm.free_list))) + __i915_gem_free_objects(i915, freed); +} - if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES && - i915_gem_object_is_tiled(obj)) - __i915_gem_object_unpin_pages(obj); +static void __i915_gem_free_object_rcu(struct rcu_head *head) +{ + struct drm_i915_gem_object *obj = + container_of(head, typeof(*obj), rcu); + struct drm_i915_private *i915 = to_i915(obj->base.dev); - if (obj->ops->release) - obj->ops->release(obj); + /* We can't simply use call_rcu() from i915_gem_free_object() + * as we need to block whilst unbinding, and the call_rcu + * task may be called from softirq context. So we take a + * detour through a worker. + */ + if (llist_add(&obj->freed, &i915->mm.free_list)) + schedule_work(&i915->mm.free_work); +} + +void i915_gem_free_object(struct drm_gem_object *gem_obj) +{ + struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) - atomic_set(&obj->mm.pages_pin_count, 0); if (discard_backing_storage(obj)) obj->mm.madv = I915_MADV_DONTNEED; - __i915_gem_object_put_pages(obj); - GEM_BUG_ON(obj->mm.pages); - - if (obj->base.import_attach) - drm_prime_gem_destroy(&obj->base, NULL); - - drm_gem_object_release(&obj->base); - i915_gem_info_remove_obj(dev_priv, obj->base.size); - - kfree(obj->bit_17); - i915_gem_object_free(obj); + if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && + to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES && + i915_gem_object_is_tiled(obj)) + __i915_gem_object_unpin_pages(obj); - intel_runtime_pm_put(dev_priv); + /* Before we free the object, make sure any pure RCU-only + * read-side critical sections are complete, e.g. + * i915_gem_busy_ioctl(). For the corresponding synchronized + * lookup see i915_gem_object_lookup_rcu(). + */ + call_rcu(&obj->rcu, __i915_gem_free_object_rcu); } void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) @@ -4438,6 +4501,7 @@ int i915_gem_suspend(struct drm_device *dev) cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work); cancel_delayed_work_sync(&dev_priv->gt.retire_work); flush_delayed_work(&dev_priv->gt.idle_work); + flush_work(&dev_priv->mm.free_work); /* Assert that we sucessfully flushed all the work and * reset the GPU back to its idle, low power state. @@ -4753,6 +4817,8 @@ i915_gem_load_init(struct drm_device *dev) NULL); INIT_LIST_HEAD(&dev_priv->context_list); + INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); + init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index c8a4c40..0241658 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -201,6 +201,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, typeof(*obj), global_list))) { list_move_tail(&obj->global_list, &still_in_list); + if (!obj->mm.pages) { + list_del_init(&obj->global_list); + continue; + } if (flags & I915_SHRINK_PURGEABLE && obj->mm.madv != I915_MADV_DONTNEED) @@ -218,8 +222,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; - i915_gem_object_get(obj); - if (unsafe_drop_pages(obj)) { mutex_lock(&obj->mm.lock); if (!obj->mm.pages) { @@ -228,8 +230,6 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, } mutex_unlock(&obj->mm.lock); } - - i915_gem_object_put(obj); } list_splice(&still_in_list, phase->list); } @@ -396,12 +396,18 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) */ unbound = bound = unevictable = 0; list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + if (!obj->mm.pages) + continue; + if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + if (!obj->mm.pages) + continue; + if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6608799..c63a9cf 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -325,12 +325,19 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_get_tiling *args = data; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; + int err = -ENOENT; + + rcu_read_lock(); + obj = i915_gem_object_lookup_rcu(file, args->handle); + if (obj) { + args->tiling_mode = + READ_ONCE(obj->tiling_and_stride) & TILING_MASK; + err = 0; + } + rcu_read_unlock(); + if (unlikely(err)) + return err; - obj = i915_gem_object_lookup(file, args->handle); - if (!obj) - return -ENOENT; - - args->tiling_mode = READ_ONCE(obj->tiling_and_stride) & TILING_MASK; switch (args->tiling_mode) { case I915_TILING_X: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; @@ -338,11 +345,10 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, case I915_TILING_Y: args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; break; + default: case I915_TILING_NONE: args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; break; - default: - DRM_ERROR("unknown tiling mode\n"); } /* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */ @@ -355,6 +361,5 @@ i915_gem_get_tiling(struct drm_device *dev, void *data, if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17) args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10; - i915_gem_object_put_unlocked(obj); return 0; } -- cgit v1.1 From f0cd518206e1a47e57bc251e1faba9d38eadcc59 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:43 +0100 Subject: drm/i915: Use lockless object free Having moved the locked phase of freeing an object to a separate worker, we can now declare to the core that we only need the unlocked variant of driver->gem_free_object, and can use the simple unreference internally. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-20-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 10 +--------- drivers/gpu/drm/i915/i915_gem.c | 30 +++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 ++-- drivers/gpu/drm/i915/intel_display.c | 6 +++--- drivers/gpu/drm/i915/intel_overlay.c | 4 ++-- drivers/gpu/drm/i915/intel_pm.c | 2 +- 8 files changed, 26 insertions(+), 34 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b308f71..91cd7b29 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2575,7 +2575,7 @@ static struct drm_driver driver = { .set_busid = drm_pci_set_busid, .gem_close_object = i915_gem_close_object, - .gem_free_object = i915_gem_free_object, + .gem_free_object_unlocked = i915_gem_free_object, .gem_vm_ops = &i915_gem_vm_ops, .prime_handle_to_fd = drm_gem_prime_handle_to_fd, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6f648a3..c3c49bc 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2404,19 +2404,12 @@ __attribute__((nonnull)) static inline void i915_gem_object_put(struct drm_i915_gem_object *obj) { - drm_gem_object_unreference(&obj->base); + __drm_gem_object_unreference(&obj->base); } __deprecated extern void drm_gem_object_unreference(struct drm_gem_object *); -__attribute__((nonnull)) -static inline void -i915_gem_object_put_unlocked(struct drm_i915_gem_object *obj) -{ - drm_gem_object_unreference_unlocked(&obj->base); -} - __deprecated extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *); @@ -2511,7 +2504,6 @@ static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) static inline void i915_vma_put(struct i915_vma *vma) { - lockdep_assert_held(&vma->vm->dev->struct_mutex); i915_gem_object_put(vma->obj); } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 07b2eb6..fdcbaab 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -617,7 +617,7 @@ i915_gem_create(struct drm_file *file, ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); if (ret) return ret; @@ -1111,7 +1111,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, i915_gem_object_unpin_pages(obj); out: - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return ret; } @@ -1444,7 +1444,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, i915_gem_object_unpin_pages(obj); err: - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return ret; } @@ -1520,7 +1520,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, MAX_SCHEDULE_TIMEOUT, to_rps_client(file)); if (err) - goto out_unlocked; + goto out; /* Flush and acquire obj->pages so that we are coherent through * direct access in memory with previous cached writes through @@ -1532,11 +1532,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, */ err = i915_gem_object_pin_pages(obj); if (err) - goto out_unlocked; + goto out; err = i915_mutex_lock_interruptible(dev); if (err) - goto out_pages; + goto out_unpin; if (read_domains & I915_GEM_DOMAIN_GTT) err = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0); @@ -1551,10 +1551,10 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, if (write_domain != 0) intel_fb_obj_invalidate(obj, write_origin(obj, write_domain)); -out_pages: +out_unpin: i915_gem_object_unpin_pages(obj); -out_unlocked: - i915_gem_object_put_unlocked(obj); +out: + i915_gem_object_put(obj); return err; } @@ -1585,7 +1585,7 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, } } - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return err; } @@ -1631,7 +1631,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * pages from. */ if (!obj->base.filp) { - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return -EINVAL; } @@ -1643,7 +1643,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return -EINTR; } vma = find_vma(mm, addr); @@ -1657,7 +1657,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, /* This may race, but that's ok, it only gets set */ WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); if (IS_ERR((void *)addr)) return addr; @@ -2105,7 +2105,7 @@ i915_gem_mmap_gtt(struct drm_file *file, if (ret == 0) *offset = drm_vma_node_offset_addr(&obj->base.vma_node); - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return ret; } @@ -2932,7 +2932,7 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) args->timeout_ns = 0; } - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index c63a9cf..6395e62 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -201,7 +201,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (!i915_tiling_ok(dev, args->stride, obj->base.size, args->tiling_mode)) { - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 6c8c7b3..c30d04f 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -546,7 +546,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) release_pages(pvec, pinned, 0); drm_free_large(pvec); - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); put_task_struct(work->task); kfree(work); } @@ -806,7 +806,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev, void *data, struct drm_file *file ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e4800b8..87135d93 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -11051,7 +11051,7 @@ intel_framebuffer_create_for_mode(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return fb; } @@ -12360,7 +12360,7 @@ cleanup: crtc->primary->fb = old_fb; update_state_fb(crtc->primary); - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); drm_framebuffer_unreference(work->old_fb); spin_lock_irq(&dev->event_lock); @@ -15934,7 +15934,7 @@ intel_user_framebuffer_create(struct drm_device *dev, fb = intel_framebuffer_create(dev, &mode_cmd, obj); if (IS_ERR(fb)) - i915_gem_object_put_unlocked(obj); + i915_gem_object_put(obj); return fb; } diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 25bcd4a1..fd0e4da 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -1222,7 +1222,7 @@ int intel_overlay_put_image_ioctl(struct drm_device *dev, void *data, out_unlock: mutex_unlock(&dev->struct_mutex); drm_modeset_unlock_all(dev); - i915_gem_object_put_unlocked(new_bo); + i915_gem_object_put(new_bo); out_free: kfree(params); @@ -1466,7 +1466,7 @@ void intel_cleanup_overlay(struct drm_i915_private *dev_priv) * hardware should be off already */ WARN_ON(dev_priv->overlay->active); - i915_gem_object_put_unlocked(dev_priv->overlay->reg_bo); + i915_gem_object_put(dev_priv->overlay->reg_bo); kfree(dev_priv->overlay); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6f19e60..b544248 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5878,7 +5878,7 @@ static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) if (WARN_ON(!dev_priv->vlv_pctx)) return; - i915_gem_object_put_unlocked(dev_priv->vlv_pctx); + i915_gem_object_put(dev_priv->vlv_pctx); dev_priv->vlv_pctx = NULL; } -- cgit v1.1 From d07f0e59b2c762584478920cd2d11fba2980a94a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:44 +0100 Subject: drm/i915: Move GEM activity tracking into a common struct reservation_object In preparation to support many distinct timelines, we need to expand the activity tracking on the GEM object to handle more than just a request per engine. We already use the struct reservation_object on the dma-buf to handle many fence contexts, so integrating that into the GEM object itself is the preferred solution. (For example, we can now share the same reservation_object between every consumer/producer using this buffer and skip the manual import/export via dma-buf.) v2: Reimplement busy-ioctl (by walking the reservation object), postpone the ABI change for another day. Similarly use the reservation object to find the last_write request (if active and from i915) for choosing display CS flips. Caveats: * busy-ioctl: busy-ioctl only reports on the native fences, it will not warn of stalls (in set-domain-ioctl, pread/pwrite etc) if the object is being rendered to by external fences. It also will not report the same busy state as wait-ioctl (or polling on the dma-buf) in the same circumstances. On the plus side, it does retain reporting of which *i915* engines are engaged with this object. * non-blocking atomic modesets take a step backwards as the wait for render completion blocks the ioctl. This is fixed in a subsequent patch to use a fence instead for awaiting on the rendering, see "drm/i915: Restore nonblocking awaits for modesetting" * dynamic array manipulation for shared-fences in reservation is slower than the previous lockless static assignment (e.g. gem_exec_lut_handle runtime on ivb goes from 42s to 66s), mainly due to atomic operations (maintaining the fence refcounts). * loss of object-level retirement callbacks, emulated by VMA retirement tracking. * minor loss of object-level last activity information from debugfs, could be replaced with per-vma information if desired Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-21-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 15 +- drivers/gpu/drm/i915/i915_drv.h | 62 +++---- drivers/gpu/drm/i915/i915_gem.c | 263 ++++++++--------------------- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 11 +- drivers/gpu/drm/i915/i915_gem_dmabuf.c | 53 +----- drivers/gpu/drm/i915/i915_gem_dmabuf.h | 45 ----- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 55 ++---- drivers/gpu/drm/i915/i915_gem_gtt.c | 32 ++++ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + drivers/gpu/drm/i915/i915_gem_request.c | 48 +++--- drivers/gpu/drm/i915/i915_gem_request.h | 35 +--- drivers/gpu/drm/i915/i915_gpu_error.c | 6 +- drivers/gpu/drm/i915/intel_atomic_plane.c | 2 - drivers/gpu/drm/i915/intel_display.c | 114 +++---------- drivers/gpu/drm/i915/intel_drv.h | 3 - 15 files changed, 212 insertions(+), 533 deletions(-) delete mode 100644 drivers/gpu/drm/i915/i915_gem_dmabuf.h (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b0b0100..b632506 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -136,11 +136,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) struct i915_vma *vma; unsigned int frontbuffer_bits; int pin_count = 0; - enum intel_engine_id id; lockdep_assert_held(&obj->base.dev->struct_mutex); - seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x [ ", + seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s", &obj->base, get_active_flag(obj), get_pin_flag(obj), @@ -149,14 +148,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) get_pin_mapped_flag(obj), obj->base.size / 1024, obj->base.read_domains, - obj->base.write_domain); - for_each_engine(engine, dev_priv, id) - seq_printf(m, "%x ", - i915_gem_active_get_seqno(&obj->last_read[id], - &obj->base.dev->struct_mutex)); - seq_printf(m, "] %x %s%s%s", - i915_gem_active_get_seqno(&obj->last_write, - &obj->base.dev->struct_mutex), + obj->base.write_domain, i915_cache_level_str(dev_priv, obj->cache_level), obj->mm.dirty ? " dirty" : "", obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -187,8 +179,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) if (obj->stolen) seq_printf(m, " (stolen: %08llx)", obj->stolen->start); - engine = i915_gem_active_get_engine(&obj->last_write, - &dev_priv->drm.struct_mutex); + engine = i915_gem_object_last_write_engine(obj); if (engine) seq_printf(m, " (%s)", engine->name); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c3c49bc..693ee69 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -2246,21 +2247,12 @@ struct drm_i915_gem_object { struct list_head batch_pool_link; unsigned long flags; - /** - * This is set if the object is on the active lists (has pending - * rendering and so a non-zero seqno), and is not set if it i s on - * inactive (ready to be unbound) list. - */ -#define I915_BO_ACTIVE_SHIFT 0 -#define I915_BO_ACTIVE_MASK ((1 << I915_NUM_ENGINES) - 1) -#define __I915_BO_ACTIVE(bo) \ - ((READ_ONCE((bo)->flags) >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK) /** * Have we taken a reference for the object for incomplete GPU * activity? */ -#define I915_BO_ACTIVE_REF (I915_BO_ACTIVE_SHIFT + I915_NUM_ENGINES) +#define I915_BO_ACTIVE_REF 0 /* * Is the object to be mapped as read-only to the GPU @@ -2281,6 +2273,7 @@ struct drm_i915_gem_object { /** Count of VMA actually bound by this object */ unsigned int bind_count; + unsigned int active_count; unsigned int pin_display; struct { @@ -2320,8 +2313,7 @@ struct drm_i915_gem_object { * read request. This allows for the CPU to read from an active * buffer by only waiting for the write to complete. */ - struct i915_gem_active last_read[I915_NUM_ENGINES]; - struct i915_gem_active last_write; + struct reservation_object *resv; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2340,6 +2332,8 @@ struct drm_i915_gem_object { /** for phys allocated objects */ struct drm_dma_handle *phys_handle; + + struct reservation_object __builtin_resv; }; static inline struct drm_i915_gem_object * @@ -2425,35 +2419,10 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) return obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE; } -static inline unsigned long -i915_gem_object_get_active(const struct drm_i915_gem_object *obj) -{ - return (obj->flags >> I915_BO_ACTIVE_SHIFT) & I915_BO_ACTIVE_MASK; -} - static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { - return i915_gem_object_get_active(obj); -} - -static inline void -i915_gem_object_set_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags |= BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline void -i915_gem_object_clear_active(struct drm_i915_gem_object *obj, int engine) -{ - obj->flags &= ~BIT(engine + I915_BO_ACTIVE_SHIFT); -} - -static inline bool -i915_gem_object_has_active_engine(const struct drm_i915_gem_object *obj, - int engine) -{ - return obj->flags & BIT(engine + I915_BO_ACTIVE_SHIFT); + return obj->active_count; } static inline bool @@ -2496,6 +2465,23 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj) return obj->tiling_and_stride & STRIDE_MASK; } +static inline struct intel_engine_cs * +i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj) +{ + struct intel_engine_cs *engine = NULL; + struct dma_fence *fence; + + rcu_read_lock(); + fence = reservation_object_get_excl_rcu(obj->resv); + rcu_read_unlock(); + + if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence)) + engine = to_request(fence)->engine; + dma_fence_put(fence); + + return engine; +} + static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) { i915_gem_object_get(vma->obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fdcbaab..1161a21 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -29,7 +29,6 @@ #include #include #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" @@ -447,11 +446,6 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, long timeout, struct intel_rps_client *rps) { - struct reservation_object *resv; - struct i915_gem_active *active; - unsigned long active_mask; - int idx; - might_sleep(); #if IS_ENABLED(CONFIG_LOCKDEP) GEM_BUG_ON(debug_locks && @@ -460,33 +454,9 @@ i915_gem_object_wait(struct drm_i915_gem_object *obj, #endif GEM_BUG_ON(timeout < 0); - if (flags & I915_WAIT_ALL) { - active = obj->last_read; - active_mask = i915_gem_object_get_active(obj); - } else { - active_mask = 1; - active = &obj->last_write; - } - - for_each_active(active_mask, idx) { - struct drm_i915_gem_request *request; - - request = i915_gem_active_get_unlocked(&active[idx]); - if (request) { - timeout = i915_gem_object_wait_fence(&request->fence, - flags, timeout, - rps); - i915_gem_request_put(request); - } - if (timeout < 0) - return timeout; - } - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) - timeout = i915_gem_object_wait_reservation(resv, - flags, timeout, - rps); + timeout = i915_gem_object_wait_reservation(obj->resv, + flags, timeout, + rps); return timeout < 0 ? timeout : 0; } @@ -2549,44 +2519,6 @@ err_unlock: goto out_unlock; } -static void -i915_gem_object_retire__write(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - struct drm_i915_gem_object *obj = - container_of(active, struct drm_i915_gem_object, last_write); - - intel_fb_obj_flush(obj, true, ORIGIN_CS); -} - -static void -i915_gem_object_retire__read(struct i915_gem_active *active, - struct drm_i915_gem_request *request) -{ - int idx = request->engine->id; - struct drm_i915_gem_object *obj = - container_of(active, struct drm_i915_gem_object, last_read[idx]); - - GEM_BUG_ON(!i915_gem_object_has_active_engine(obj, idx)); - - i915_gem_object_clear_active(obj, idx); - if (i915_gem_object_is_active(obj)) - return; - - /* Bump our place on the bound list to keep it roughly in LRU order - * so that we don't steal from recently used but inactive objects - * (unless we are forced to ofc!) - */ - if (obj->bind_count) - list_move_tail(&obj->global_list, - &request->i915->mm.bound_list); - - if (i915_gem_object_has_active_reference(obj)) { - i915_gem_object_clear_active_reference(obj); - i915_gem_object_put(obj); - } -} - static bool i915_context_is_banned(const struct i915_gem_context *ctx) { unsigned long elapsed; @@ -2966,6 +2898,13 @@ int i915_vma_unbind(struct i915_vma *vma) * In order to prevent it from being recursively closed, * take a pin on the vma so that the second unbind is * aborted. + * + * Even more scary is that the retire callback may free + * the object (last active vma). To prevent the explosion + * we defer the actual object free to a worker that can + * only proceed once it acquires the struct_mutex (which + * we currently hold, therefore it cannot free this object + * before we are finished). */ __i915_vma_pin(vma); @@ -4010,83 +3949,42 @@ static __always_inline unsigned int __busy_write_id(unsigned int id) } static __always_inline unsigned int -__busy_set_if_active(const struct i915_gem_active *active, +__busy_set_if_active(const struct dma_fence *fence, unsigned int (*flag)(unsigned int id)) { - struct drm_i915_gem_request *request; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return 0; + struct drm_i915_gem_request *rq; - /* This is racy. See __i915_gem_active_get_rcu() for an in detail - * discussion of how to handle the race correctly, but for reporting - * the busy state we err on the side of potentially reporting the - * wrong engine as being busy (but we guarantee that the result - * is at least self-consistent). - * - * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated - * whilst we are inspecting it, even under the RCU read lock as we are. - * This means that there is a small window for the engine and/or the - * seqno to have been overwritten. The seqno will always be in the - * future compared to the intended, and so we know that if that - * seqno is idle (on whatever engine) our request is idle and the - * return 0 above is correct. + /* We have to check the current hw status of the fence as the uABI + * guarantees forward progress. We could rely on the idle worker + * to eventually flush us, but to minimise latency just ask the + * hardware. * - * The issue is that if the engine is switched, it is just as likely - * to report that it is busy (but since the switch happened, we know - * the request should be idle). So there is a small chance that a busy - * result is actually the wrong engine. - * - * So why don't we care? - * - * For starters, the busy ioctl is a heuristic that is by definition - * racy. Even with perfect serialisation in the driver, the hardware - * state is constantly advancing - the state we report to the user - * is stale. - * - * The critical information for the busy-ioctl is whether the object - * is idle as userspace relies on that to detect whether its next - * access will stall, or if it has missed submitting commands to - * the hardware allowing the GPU to stall. We never generate a - * false-positive for idleness, thus busy-ioctl is reliable at the - * most fundamental level, and we maintain the guarantee that a - * busy object left to itself will eventually become idle (and stay - * idle!). - * - * We allow ourselves the leeway of potentially misreporting the busy - * state because that is an optimisation heuristic that is constantly - * in flux. Being quickly able to detect the busy/idle state is much - * more important than accurate logging of exactly which engines were - * busy. - * - * For accuracy in reporting the engine, we could use - * - * result = 0; - * request = __i915_gem_active_get_rcu(active); - * if (request) { - * if (!i915_gem_request_completed(request)) - * result = flag(request->engine->exec_id); - * i915_gem_request_put(request); - * } - * - * but that still remains susceptible to both hardware and userspace - * races. So we accept making the result of that race slightly worse, - * given the rarity of the race and its low impact on the result. + * Note we only report on the status of native fences. */ - return flag(READ_ONCE(request->engine->exec_id)); + if (!dma_fence_is_i915(fence)) + return 0; + + /* opencode to_request() in order to avoid const warnings */ + rq = container_of(fence, struct drm_i915_gem_request, fence); + if (i915_gem_request_completed(rq)) + return 0; + + return flag(rq->engine->exec_id); } static __always_inline unsigned int -busy_check_reader(const struct i915_gem_active *active) +busy_check_reader(const struct dma_fence *fence) { - return __busy_set_if_active(active, __busy_read_flag); + return __busy_set_if_active(fence, __busy_read_flag); } static __always_inline unsigned int -busy_check_writer(const struct i915_gem_active *active) +busy_check_writer(const struct dma_fence *fence) { - return __busy_set_if_active(active, __busy_write_id); + if (!fence) + return 0; + + return __busy_set_if_active(fence, __busy_write_id); } int @@ -4095,63 +3993,55 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_busy *args = data; struct drm_i915_gem_object *obj; - unsigned long active; + struct reservation_object_list *list; + unsigned int seq; int err; + err = -ENOENT; rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); - if (!obj) { - err = -ENOENT; + if (!obj) goto out; - } - args->busy = 0; - active = __I915_BO_ACTIVE(obj); - if (active) { - int idx; + /* A discrepancy here is that we do not report the status of + * non-i915 fences, i.e. even though we may report the object as idle, + * a call to set-domain may still stall waiting for foreign rendering. + * This also means that wait-ioctl may report an object as busy, + * where busy-ioctl considers it idle. + * + * We trade the ability to warn of foreign fences to report on which + * i915 engines are active for the object. + * + * Alternatively, we can trade that extra information on read/write + * activity with + * args->busy = + * !reservation_object_test_signaled_rcu(obj->resv, true); + * to report the overall busyness. This is what the wait-ioctl does. + * + */ +retry: + seq = raw_read_seqcount(&obj->resv->seq); - /* Yes, the lookups are intentionally racy. - * - * First, we cannot simply rely on __I915_BO_ACTIVE. We have - * to regard the value as stale and as our ABI guarantees - * forward progress, we confirm the status of each active - * request with the hardware. - * - * Even though we guard the pointer lookup by RCU, that only - * guarantees that the pointer and its contents remain - * dereferencable and does *not* mean that the request we - * have is the same as the one being tracked by the object. - * - * Consider that we lookup the request just as it is being - * retired and freed. We take a local copy of the pointer, - * but before we add its engine into the busy set, the other - * thread reallocates it and assigns it to a task on another - * engine with a fresh and incomplete seqno. Guarding against - * that requires careful serialisation and reference counting, - * i.e. using __i915_gem_active_get_request_rcu(). We don't, - * instead we expect that if the result is busy, which engines - * are busy is not completely reliable - we only guarantee - * that the object was busy. - */ + /* Translate the exclusive fence to the READ *and* WRITE engine */ + args->busy = busy_check_writer(rcu_dereference(obj->resv->fence_excl)); - for_each_active(active, idx) - args->busy |= busy_check_reader(&obj->last_read[idx]); + /* Translate shared fences to READ set of engines */ + list = rcu_dereference(obj->resv->fence); + if (list) { + unsigned int shared_count = list->shared_count, i; - /* For ABI sanity, we only care that the write engine is in - * the set of read engines. This should be ensured by the - * ordering of setting last_read/last_write in - * i915_vma_move_to_active(), and then in reverse in retire. - * However, for good measure, we always report the last_write - * request as a busy read as well as being a busy write. - * - * We don't care that the set of active read/write engines - * may change during construction of the result, as it is - * equally liable to change before userspace can inspect - * the result. - */ - args->busy |= busy_check_writer(&obj->last_write); + for (i = 0; i < shared_count; ++i) { + struct dma_fence *fence = + rcu_dereference(list->shared[i]); + + args->busy |= busy_check_reader(fence); + } } + if (args->busy && read_seqcount_retry(&obj->resv->seq, seq)) + goto retry; + + err = 0; out: rcu_read_unlock(); return err; @@ -4216,23 +4106,19 @@ out: void i915_gem_object_init(struct drm_i915_gem_object *obj, const struct drm_i915_gem_object_ops *ops) { - int i; - mutex_init(&obj->mm.lock); INIT_LIST_HEAD(&obj->global_list); INIT_LIST_HEAD(&obj->userfault_link); - for (i = 0; i < I915_NUM_ENGINES; i++) - init_request_active(&obj->last_read[i], - i915_gem_object_retire__read); - init_request_active(&obj->last_write, - i915_gem_object_retire__write); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; + reservation_object_init(&obj->__builtin_resv); + obj->resv = &obj->__builtin_resv; + obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->mm.madv = I915_MADV_WILLNEED; @@ -4385,6 +4271,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (obj->base.import_attach) drm_prime_gem_destroy(&obj->base, NULL); + reservation_object_fini(&obj->__builtin_resv); drm_gem_object_release(&obj->base); i915_gem_info_remove_obj(i915, obj->base.size); diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index e0f38e5..b3bc119e 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -114,11 +114,18 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, list_for_each_entry(tmp, list, batch_pool_link) { /* The batches are strictly LRU ordered */ - if (!i915_gem_active_is_idle(&tmp->last_read[pool->engine->id], - &tmp->base.dev->struct_mutex)) + if (i915_gem_object_is_active(tmp)) break; + GEM_BUG_ON(!reservation_object_test_signaled_rcu(tmp->resv, + true)); + if (tmp->base.size >= size) { + /* Clear the set of shared fences early */ + ww_mutex_lock(&tmp->resv->lock, NULL); + reservation_object_add_excl_fence(tmp->resv, NULL); + ww_mutex_unlock(&tmp->resv->lock); + obj = tmp; break; } diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 4d45f20..5e38299 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -211,60 +211,17 @@ static const struct dma_buf_ops i915_dmabuf_ops = { .end_cpu_access = i915_gem_end_cpu_access, }; -static void export_fences(struct drm_i915_gem_object *obj, - struct dma_buf *dma_buf) -{ - struct reservation_object *resv = dma_buf->resv; - struct drm_i915_gem_request *req; - unsigned long active; - int idx; - - active = __I915_BO_ACTIVE(obj); - if (!active) - return; - - /* Serialise with execbuf to prevent concurrent fence-loops */ - mutex_lock(&obj->base.dev->struct_mutex); - - /* Mark the object for future fences before racily adding old fences */ - obj->base.dma_buf = dma_buf; - - ww_mutex_lock(&resv->lock, NULL); - - for_each_active(active, idx) { - req = i915_gem_active_get(&obj->last_read[idx], - &obj->base.dev->struct_mutex); - if (!req) - continue; - - if (reservation_object_reserve_shared(resv) == 0) - reservation_object_add_shared_fence(resv, &req->fence); - - i915_gem_request_put(req); - } - - req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); - if (req) { - reservation_object_add_excl_fence(resv, &req->fence); - i915_gem_request_put(req); - } - - ww_mutex_unlock(&resv->lock); - mutex_unlock(&obj->base.dev->struct_mutex); -} - struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); DEFINE_DMA_BUF_EXPORT_INFO(exp_info); - struct dma_buf *dma_buf; exp_info.ops = &i915_dmabuf_ops; exp_info.size = gem_obj->size; exp_info.flags = flags; exp_info.priv = gem_obj; + exp_info.resv = obj->resv; if (obj->ops->dmabuf_export) { int ret = obj->ops->dmabuf_export(obj); @@ -272,12 +229,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, return ERR_PTR(ret); } - dma_buf = drm_gem_dmabuf_export(dev, &exp_info); - if (IS_ERR(dma_buf)) - return dma_buf; - - export_fences(obj, dma_buf); - return dma_buf; + return drm_gem_dmabuf_export(dev, &exp_info); } static struct sg_table * @@ -335,6 +287,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, drm_gem_private_object_init(dev, &obj->base, dma_buf->size); i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops); obj->base.import_attach = attach; + obj->resv = dma_buf->resv; /* We use GTT as shorthand for a coherent domain, one that is * neither in the GPU cache nor in the CPU cache, where all diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/i915_gem_dmabuf.h deleted file mode 100644 index 9131555..0000000 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _I915_GEM_DMABUF_H_ -#define _I915_GEM_DMABUF_H_ - -#include - -static inline struct reservation_object * -i915_gem_object_get_dmabuf_resv(struct drm_i915_gem_object *obj) -{ - struct dma_buf *dma_buf; - - if (obj->base.dma_buf) - dma_buf = obj->base.dma_buf; - else if (obj->base.import_attach) - dma_buf = obj->base.import_attach->dmabuf; - else - return NULL; - - return dma_buf->resv; -} - -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index d95c4e0..c35e847 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -34,7 +34,6 @@ #include #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "i915_trace.h" #include "intel_drv.h" #include "intel_frontbuffer.h" @@ -1101,45 +1100,20 @@ err: return ret; } -static unsigned int eb_other_engines(struct drm_i915_gem_request *req) -{ - unsigned int mask; - - mask = ~intel_engine_flag(req->engine) & I915_BO_ACTIVE_MASK; - mask <<= I915_BO_ACTIVE_SHIFT; - - return mask; -} - static int i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, struct list_head *vmas) { - const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; int ret; list_for_each_entry(vma, vmas, exec_list) { struct drm_i915_gem_object *obj = vma->obj; - struct reservation_object *resv; - if (obj->flags & other_rings) { - ret = i915_gem_request_await_object - (req, obj, obj->base.pending_write_domain); - if (ret) - return ret; - } - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - ret = i915_sw_fence_await_reservation - (&req->submit, resv, &i915_fence_ops, - obj->base.pending_write_domain, - I915_FENCE_TIMEOUT, - GFP_KERNEL | __GFP_NOWARN); - if (ret < 0) - return ret; - } + ret = i915_gem_request_await_object + (req, obj, obj->base.pending_write_domain); + if (ret) + return ret; if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj, false); @@ -1281,8 +1255,6 @@ void i915_vma_move_to_active(struct i915_vma *vma, GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); - obj->mm.dirty = true; /* be paranoid */ - /* Add a reference if we're newly entering the active list. * The order in which we add operations to the retirement queue is * vital here: mark_active adds to the start of the callback list, @@ -1290,11 +1262,14 @@ void i915_vma_move_to_active(struct i915_vma *vma, * add the active reference first and queue for it to be dropped * *last*. */ - i915_gem_object_set_active(obj, idx); - i915_gem_active_set(&obj->last_read[idx], req); + if (!i915_vma_is_active(vma)) + obj->active_count++; + i915_vma_set_active(vma, idx); + i915_gem_active_set(&vma->last_read[idx], req); + list_move_tail(&vma->vm_link, &vma->vm->active_list); if (flags & EXEC_OBJECT_WRITE) { - i915_gem_active_set(&obj->last_write, req); + i915_gem_active_set(&vma->last_write, req); intel_fb_obj_invalidate(obj, ORIGIN_CS); @@ -1304,21 +1279,13 @@ void i915_vma_move_to_active(struct i915_vma *vma, if (flags & EXEC_OBJECT_NEEDS_FENCE) i915_gem_active_set(&vma->last_fence, req); - - i915_vma_set_active(vma, idx); - i915_gem_active_set(&vma->last_read[idx], req); - list_move_tail(&vma->vm_link, &vma->vm->active_list); } static void eb_export_fence(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *req, unsigned int flags) { - struct reservation_object *resv; - - resv = i915_gem_object_get_dmabuf_resv(obj); - if (!resv) - return; + struct reservation_object *resv = obj->resv; /* Ignore errors from failing to allocate the new fence, we can't * handle an error right now. Worst case should be missed diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1008209..1b1a459 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -31,6 +31,7 @@ #include "i915_vgpu.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_frontbuffer.h" #define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM) @@ -3343,6 +3344,7 @@ i915_vma_retire(struct i915_gem_active *active, const unsigned int idx = rq->engine->id; struct i915_vma *vma = container_of(active, struct i915_vma, last_read[idx]); + struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!i915_vma_has_active_engine(vma, idx)); @@ -3353,6 +3355,34 @@ i915_vma_retire(struct i915_gem_active *active, list_move_tail(&vma->vm_link, &vma->vm->inactive_list); if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) WARN_ON(i915_vma_unbind(vma)); + + GEM_BUG_ON(!i915_gem_object_is_active(obj)); + if (--obj->active_count) + return; + + /* Bump our place on the bound list to keep it roughly in LRU order + * so that we don't steal from recently used but inactive objects + * (unless we are forced to ofc!) + */ + if (obj->bind_count) + list_move_tail(&obj->global_list, &rq->i915->mm.bound_list); + + obj->mm.dirty = true; /* be paranoid */ + + if (i915_gem_object_has_active_reference(obj)) { + i915_gem_object_clear_active_reference(obj); + i915_gem_object_put(obj); + } +} + +static void +i915_ggtt_retire__write(struct i915_gem_active *active, + struct drm_i915_gem_request *request) +{ + struct i915_vma *vma = + container_of(active, struct i915_vma, last_write); + + intel_fb_obj_flush(vma->obj, true, ORIGIN_CS); } void i915_vma_destroy(struct i915_vma *vma) @@ -3396,6 +3426,8 @@ __i915_vma_create(struct drm_i915_gem_object *obj, INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_write, + i915_is_ggtt(vm) ? i915_ggtt_retire__write : NULL); init_request_active(&vma->last_fence, NULL); list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index dbe6a6c..9f0327e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -211,6 +211,7 @@ struct i915_vma { unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_write; struct i915_gem_active last_fence; /** diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index d234c28..01a7fa5 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -196,6 +196,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) } i915_gem_context_put(request->ctx); + + dma_fence_signal(&request->fence); i915_gem_request_put(request); } @@ -553,33 +555,41 @@ i915_gem_request_await_object(struct drm_i915_gem_request *to, struct drm_i915_gem_object *obj, bool write) { - struct i915_gem_active *active; - unsigned long active_mask; - int idx; + struct dma_fence *excl; + int ret = 0; if (write) { - active_mask = i915_gem_object_get_active(obj); - active = obj->last_read; + struct dma_fence **shared; + unsigned int count, i; + + ret = reservation_object_get_fences_rcu(obj->resv, + &excl, &count, &shared); + if (ret) + return ret; + + for (i = 0; i < count; i++) { + ret = i915_gem_request_await_dma_fence(to, shared[i]); + if (ret) + break; + + dma_fence_put(shared[i]); + } + + for (; i < count; i++) + dma_fence_put(shared[i]); + kfree(shared); } else { - active_mask = 1; - active = &obj->last_write; + excl = reservation_object_get_excl_rcu(obj->resv); } - for_each_active(active_mask, idx) { - struct drm_i915_gem_request *request; - int ret; - - request = i915_gem_active_peek(&active[idx], - &obj->base.dev->struct_mutex); - if (!request) - continue; + if (excl) { + if (ret == 0) + ret = i915_gem_request_await_dma_fence(to, excl); - ret = i915_gem_request_await_request(to, request); - if (ret) - return ret; + dma_fence_put(excl); } - return 0; + return ret; } static void i915_gem_mark_busy(const struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 602234f..a51d596 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -554,22 +554,7 @@ i915_gem_active_isset(const struct i915_gem_active *active) } /** - * i915_gem_active_is_idle - report whether the active tracker is idle - * @active - the active tracker - * - * i915_gem_active_is_idle() returns true if the active tracker is currently - * unassigned or if the request is complete (but not yet retired). Requires - * the caller to hold struct_mutex (but that can be relaxed if desired). - */ -static inline bool -i915_gem_active_is_idle(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return !i915_gem_active_peek(active, mutex); -} - -/** - * i915_gem_active_wait- waits until the request is completed + * i915_gem_active_wait - waits until the request is completed * @active - the active request on which to wait * @flags - how to wait * @timeout - how long to wait at most @@ -639,24 +624,6 @@ i915_gem_active_retire(struct i915_gem_active *active, return 0; } -/* Convenience functions for peeking at state inside active's request whilst - * guarded by the struct_mutex. - */ - -static inline uint32_t -i915_gem_active_get_seqno(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return i915_gem_request_get_seqno(i915_gem_active_peek(active, mutex)); -} - -static inline struct intel_engine_cs * -i915_gem_active_get_engine(const struct i915_gem_active *active, - struct mutex *mutex) -{ - return i915_gem_request_get_engine(i915_gem_active_peek(active, mutex)); -} - #define for_each_active(mask, idx) \ for (; mask ? idx = ffs(mask) - 1, 1 : 0; mask &= ~BIT(idx)) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 5bbb372..aa8dadc 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -887,9 +887,9 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->name = obj->base.name; for (i = 0; i < I915_NUM_ENGINES; i++) - err->rseqno[i] = __active_get_seqno(&obj->last_read[i]); - err->wseqno = __active_get_seqno(&obj->last_write); - err->engine = __active_get_engine_id(&obj->last_write); + err->rseqno[i] = __active_get_seqno(&vma->last_read[i]); + err->wseqno = __active_get_seqno(&vma->last_write); + err->engine = __active_get_engine_id(&vma->last_write); err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index c762ae5..cb55944 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -84,7 +84,6 @@ intel_plane_duplicate_state(struct drm_plane *plane) state = &intel_state->base; __drm_atomic_helper_plane_duplicate_state(plane, state); - intel_state->wait_req = NULL; return state; } @@ -101,7 +100,6 @@ void intel_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state) { - WARN_ON(state && to_intel_plane_state(state)->wait_req); drm_atomic_helper_plane_destroy_state(plane, state); } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 87135d93..622f733 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -37,7 +37,6 @@ #include "intel_frontbuffer.h" #include #include "i915_drv.h" -#include "i915_gem_dmabuf.h" #include "intel_dsi.h" #include "i915_trace.h" #include @@ -11967,8 +11966,6 @@ static int intel_gen7_queue_flip(struct drm_device *dev, static bool use_mmio_flip(struct intel_engine_cs *engine, struct drm_i915_gem_object *obj) { - struct reservation_object *resv; - /* * This is not being used for older platforms, because * non-availability of flip done interrupt forces us to use @@ -11990,12 +11987,7 @@ static bool use_mmio_flip(struct intel_engine_cs *engine, else if (i915.enable_execlists) return true; - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv && !reservation_object_test_signaled_rcu(resv, false)) - return true; - - return engine != i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex); + return engine != i915_gem_object_last_write_engine(obj); } static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, @@ -12068,17 +12060,8 @@ static void intel_mmio_flip_work_func(struct work_struct *w) struct intel_framebuffer *intel_fb = to_intel_framebuffer(crtc->base.primary->fb); struct drm_i915_gem_object *obj = intel_fb->obj; - struct reservation_object *resv; - if (work->flip_queued_req) - WARN_ON(i915_wait_request(work->flip_queued_req, - 0, MAX_SCHEDULE_TIMEOUT) < 0); - - /* For framebuffer backed by dmabuf, wait for fence */ - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) - WARN_ON(reservation_object_wait_timeout_rcu(resv, false, false, - MAX_SCHEDULE_TIMEOUT) < 0); + WARN_ON(i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT, NULL) < 0); intel_pipe_update_start(crtc); @@ -12279,8 +12262,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, } else if (IS_IVYBRIDGE(dev_priv) || IS_HASWELL(dev_priv)) { engine = dev_priv->engine[BCS]; } else if (INTEL_INFO(dev)->gen >= 7) { - engine = i915_gem_active_get_engine(&obj->last_write, - &obj->base.dev->struct_mutex); + engine = i915_gem_object_last_write_engine(obj); if (engine == NULL || engine->id != RCS) engine = dev_priv->engine[BCS]; } else { @@ -12312,9 +12294,6 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (mmio_flip) { INIT_WORK(&work->mmio_work, intel_mmio_flip_work_func); - - work->flip_queued_req = i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); queue_work(system_unbound_wq, &work->mmio_work); } else { request = i915_gem_request_alloc(engine, engine->last_context); @@ -14154,13 +14133,10 @@ static int intel_atomic_check(struct drm_device *dev, } static int intel_atomic_prepare_commit(struct drm_device *dev, - struct drm_atomic_state *state, - bool nonblock) + struct drm_atomic_state *state) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_plane_state *plane_state; struct drm_crtc_state *crtc_state; - struct drm_plane *plane; struct drm_crtc *crtc; int i, ret; @@ -14183,30 +14159,6 @@ static int intel_atomic_prepare_commit(struct drm_device *dev, ret = drm_atomic_helper_prepare_planes(dev, state); mutex_unlock(&dev->struct_mutex); - if (!ret && !nonblock) { - for_each_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane_state); - long timeout; - - if (!intel_plane_state->wait_req) - continue; - - timeout = i915_wait_request(intel_plane_state->wait_req, - I915_WAIT_INTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - if (timeout < 0) { - /* Any hang should be swallowed by the wait */ - WARN_ON(timeout == -EIO); - mutex_lock(&dev->struct_mutex); - drm_atomic_helper_cleanup_planes(dev, state); - mutex_unlock(&dev->struct_mutex); - ret = timeout; - break; - } - } - } - return ret; } @@ -14400,26 +14352,11 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) struct drm_crtc_state *old_crtc_state; struct drm_crtc *crtc; struct intel_crtc_state *intel_cstate; - struct drm_plane *plane; - struct drm_plane_state *plane_state; bool hw_check = intel_state->modeset; unsigned long put_domains[I915_MAX_PIPES] = {}; unsigned crtc_vblank_mask = 0; int i; - for_each_plane_in_state(state, plane, plane_state, i) { - struct intel_plane_state *intel_plane_state = - to_intel_plane_state(plane->state); - - if (!intel_plane_state->wait_req) - continue; - - /* EIO should be eaten, and we can't get interrupted in the - * worker, and blocking commits have waited already. */ - WARN_ON(i915_wait_request(intel_plane_state->wait_req, - 0, MAX_SCHEDULE_TIMEOUT) < 0); - } - drm_atomic_helper_wait_for_dependencies(state); if (intel_state->modeset) { @@ -14626,7 +14563,7 @@ static int intel_atomic_commit(struct drm_device *dev, INIT_WORK(&state->commit_work, intel_atomic_commit_work); - ret = intel_atomic_prepare_commit(dev, state, nonblock); + ret = intel_atomic_prepare_commit(dev, state); if (ret) { DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); return ret; @@ -14759,7 +14696,7 @@ intel_prepare_plane_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); - struct reservation_object *resv; + long lret; int ret = 0; if (!obj && !old_obj) @@ -14797,39 +14734,34 @@ intel_prepare_plane_fb(struct drm_plane *plane, return 0; /* For framebuffer backed by dmabuf, wait for fence */ - resv = i915_gem_object_get_dmabuf_resv(obj); - if (resv) { - long lret; + lret = i915_gem_object_wait(obj, + I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, + MAX_SCHEDULE_TIMEOUT, + NULL); + if (lret == -ERESTARTSYS) + return lret; - lret = reservation_object_wait_timeout_rcu(resv, false, true, - MAX_SCHEDULE_TIMEOUT); - if (lret == -ERESTARTSYS) - return lret; - - WARN(lret < 0, "waiting returns %li\n", lret); - } + WARN(lret < 0, "waiting returns %li\n", lret); if (plane->type == DRM_PLANE_TYPE_CURSOR && INTEL_INFO(dev)->cursor_needs_physical) { int align = IS_I830(dev_priv) ? 16 * 1024 : 256; ret = i915_gem_object_attach_phys(obj, align); - if (ret) + if (ret) { DRM_DEBUG_KMS("failed to attach phys object\n"); + return ret; + } } else { struct i915_vma *vma; vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); - if (IS_ERR(vma)) - ret = PTR_ERR(vma); - } - - if (ret == 0) { - to_intel_plane_state(new_state)->wait_req = - i915_gem_active_get(&obj->last_write, - &obj->base.dev->struct_mutex); + if (IS_ERR(vma)) { + DRM_DEBUG_KMS("failed to pin object\n"); + return PTR_ERR(vma); + } } - return ret; + return 0; } /** @@ -14847,7 +14779,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct intel_plane_state *old_intel_state; - struct intel_plane_state *intel_state = to_intel_plane_state(plane->state); struct drm_i915_gem_object *old_obj = intel_fb_obj(old_state->fb); struct drm_i915_gem_object *obj = intel_fb_obj(plane->state->fb); @@ -14859,9 +14790,6 @@ intel_cleanup_plane_fb(struct drm_plane *plane, if (old_obj && (plane->type != DRM_PLANE_TYPE_CURSOR || !INTEL_INFO(dev)->cursor_needs_physical)) intel_unpin_fb_obj(old_state->fb, old_state->rotation); - - i915_gem_request_assign(&intel_state->wait_req, NULL); - i915_gem_request_assign(&old_intel_state->wait_req, NULL); } int diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ace222c..ec7fa592 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -401,9 +401,6 @@ struct intel_plane_state { int scaler_id; struct drm_intel_sprite_colorkey ckey; - - /* async flip related structures */ - struct drm_i915_gem_request *wait_req; }; struct intel_initial_plane_config { -- cgit v1.1 From c004a90b7263e3d5811f6ec7de0abb143f3099b3 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:45 +0100 Subject: drm/i915: Restore nonblocking awaits for modesetting After combining the dma-buf reservation object and the GEM reservation object, we lost the ability to do a nonblocking wait on the i915 request (as we blocked upon the reservation object during prepare_fb). We can instead convert the reservation object into a fence upon which we can asynchronously wait (including a forced timeout in case the DMA fence is never signaled). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-22-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_display.c | 93 +++++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_drv.h | 2 + 2 files changed, 66 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 622f733..304073a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14510,12 +14510,33 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) static void intel_atomic_commit_work(struct work_struct *work) { - struct drm_atomic_state *state = container_of(work, - struct drm_atomic_state, - commit_work); + struct drm_atomic_state *state = + container_of(work, struct drm_atomic_state, commit_work); + intel_atomic_commit_tail(state); } +static int __i915_sw_fence_call +intel_atomic_commit_ready(struct i915_sw_fence *fence, + enum i915_sw_fence_notify notify) +{ + struct intel_atomic_state *state = + container_of(fence, struct intel_atomic_state, commit_ready); + + switch (notify) { + case FENCE_COMPLETE: + if (state->base.commit_work.func) + queue_work(system_unbound_wq, &state->base.commit_work); + break; + + case FENCE_FREE: + drm_atomic_state_put(&state->base); + break; + } + + return NOTIFY_DONE; +} + static void intel_atomic_track_fbs(struct drm_atomic_state *state) { struct drm_plane_state *old_plane_state; @@ -14561,11 +14582,14 @@ static int intel_atomic_commit(struct drm_device *dev, if (ret) return ret; - INIT_WORK(&state->commit_work, intel_atomic_commit_work); + drm_atomic_state_get(state); + i915_sw_fence_init(&intel_state->commit_ready, + intel_atomic_commit_ready); ret = intel_atomic_prepare_commit(dev, state); if (ret) { DRM_DEBUG_ATOMIC("Preparing state failed with %i\n", ret); + i915_sw_fence_commit(&intel_state->commit_ready); return ret; } @@ -14576,10 +14600,14 @@ static int intel_atomic_commit(struct drm_device *dev, intel_atomic_track_fbs(state); drm_atomic_state_get(state); - if (nonblock) - queue_work(system_unbound_wq, &state->commit_work); - else + INIT_WORK(&state->commit_work, + nonblock ? intel_atomic_commit_work : NULL); + + i915_sw_fence_commit(&intel_state->commit_ready); + if (!nonblock) { + i915_sw_fence_wait(&intel_state->commit_ready); intel_atomic_commit_tail(state); + } return 0; } @@ -14691,20 +14719,22 @@ int intel_prepare_plane_fb(struct drm_plane *plane, struct drm_plane_state *new_state) { + struct intel_atomic_state *intel_state = + to_intel_atomic_state(new_state->state); struct drm_device *dev = plane->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_framebuffer *fb = new_state->fb; struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct drm_i915_gem_object *old_obj = intel_fb_obj(plane->state->fb); - long lret; - int ret = 0; + int ret; if (!obj && !old_obj) return 0; if (old_obj) { struct drm_crtc_state *crtc_state = - drm_atomic_get_existing_crtc_state(new_state->state, plane->state->crtc); + drm_atomic_get_existing_crtc_state(new_state->state, + plane->state->crtc); /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the @@ -14717,31 +14747,36 @@ intel_prepare_plane_fb(struct drm_plane *plane, * This should only fail upon a hung GPU, in which case we * can safely continue. */ - if (needs_modeset(crtc_state)) - ret = i915_gem_object_wait(old_obj, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT, - NULL); - if (ret) { - /* GPU hangs should have been swallowed by the wait */ - WARN_ON(ret == -EIO); - return ret; + if (needs_modeset(crtc_state)) { + ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, + old_obj->resv, NULL, + false, 0, + GFP_KERNEL); + if (ret < 0) + return ret; } } + if (new_state->fence) { /* explicit fencing */ + ret = i915_sw_fence_await_dma_fence(&intel_state->commit_ready, + new_state->fence, + I915_FENCE_TIMEOUT, + GFP_KERNEL); + if (ret < 0) + return ret; + } + if (!obj) return 0; - /* For framebuffer backed by dmabuf, wait for fence */ - lret = i915_gem_object_wait(obj, - I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED, - MAX_SCHEDULE_TIMEOUT, - NULL); - if (lret == -ERESTARTSYS) - return lret; - - WARN(lret < 0, "waiting returns %li\n", lret); + if (!new_state->fence) { /* implicit fencing */ + ret = i915_sw_fence_await_reservation(&intel_state->commit_ready, + obj->resv, NULL, + false, I915_FENCE_TIMEOUT, + GFP_KERNEL); + if (ret < 0) + return ret; + } if (plane->type == DRM_PLANE_TYPE_CURSOR && INTEL_INFO(dev)->cursor_needs_physical) { diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index ec7fa592..2616d92 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -365,6 +365,8 @@ struct intel_atomic_state { /* Gen9+ only */ struct skl_wm_values wm_results; + + struct i915_sw_fence commit_ready; }; struct intel_plane_state { -- cgit v1.1 From 73cb97010d4fdd2a29f00cac14d206c7641c23d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:46 +0100 Subject: drm/i915: Combine seqno + tracking into a global timeline struct Our timelines are more than just a seqno. They also provide an ordered list of requests to be executed. Due to the restriction of handling individual address spaces, we are limited to a timeline per address space but we use a fence context per engine within. Our first step to introducing independent timelines per context (i.e. to allow each context to have a queue of requests to execute that have a defined set of dependencies on other requests) is to provide a timeline abstraction for the global execution queue. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-23-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_debugfs.c | 33 +++++------- drivers/gpu/drm/i915/i915_drv.c | 6 ++- drivers/gpu/drm/i915/i915_drv.h | 9 ++-- drivers/gpu/drm/i915/i915_gem.c | 72 ++++++++++++++++++++------ drivers/gpu/drm/i915/i915_gem.h | 2 + drivers/gpu/drm/i915/i915_gem_request.c | 81 ++++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_request.h | 1 + drivers/gpu/drm/i915/i915_gem_timeline.c | 64 +++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gem_timeline.h | 70 ++++++++++++++++++++++++++ drivers/gpu/drm/i915/i915_gpu_error.c | 6 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 3 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_engine_cs.c | 15 +++--- drivers/gpu/drm/i915/intel_ringbuffer.h | 36 ++----------- 15 files changed, 286 insertions(+), 115 deletions(-) create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.c create mode 100644 drivers/gpu/drm/i915/i915_gem_timeline.h (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 7faa04c..240ce9a 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -42,6 +42,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ + i915_gem_timeline.o \ i915_gem_userptr.o \ i915_trace_points.o \ intel_breadcrumbs.o \ diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index b632506..3a0ea5e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", engine->name, i915_gem_request_get_seqno(work->flip_queued_req), - dev_priv->next_seqno, + dev_priv->gt.global_timeline.next_seqno, intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else @@ -662,13 +662,13 @@ static int i915_gem_request_info(struct seq_file *m, void *data) int count; count = 0; - list_for_each_entry(req, &engine->request_list, link) + list_for_each_entry(req, &engine->timeline->requests, link) count++; if (count == 0) continue; seq_printf(m, "%s requests: %d\n", engine->name, count); - list_for_each_entry(req, &engine->request_list, link) + list_for_each_entry(req, &engine->timeline->requests, link) print_request(m, req, " "); any++; @@ -1052,15 +1052,8 @@ static int i915_next_seqno_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - int ret; - - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - return ret; - - *val = dev_priv->next_seqno; - mutex_unlock(&dev_priv->drm.struct_mutex); + *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno); return 0; } @@ -1075,7 +1068,7 @@ i915_next_seqno_set(void *data, u64 val) if (ret) return ret; - ret = i915_gem_set_seqno(dev, val); + ret = i915_gem_set_global_seqno(dev, val); mutex_unlock(&dev->struct_mutex); return ret; @@ -1364,7 +1357,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - engine->last_submitted_seqno); + engine->timeline->last_submitted_seqno); seq_printf(m, "\twaiters? %s, fake irq active? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -3181,7 +3174,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "%s\n", engine->name); seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [score %d]\n", intel_engine_get_seqno(engine), - engine->last_submitted_seqno, + engine->timeline->last_submitted_seqno, engine->hangcheck.seqno, engine->hangcheck.score); @@ -3189,14 +3182,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->request_list) + rq = list_first_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->request_list, - struct drm_i915_gem_request, link); - if (&rq->link != &engine->request_list) + rq = list_last_entry(&engine->timeline->requests, + struct drm_i915_gem_request, link); + if (&rq->link != &engine->timeline->requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 91cd7b29..839ce2a 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -831,7 +831,9 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); - i915_gem_load_init(&dev_priv->drm); + ret = i915_gem_load_init(&dev_priv->drm); + if (ret < 0) + goto err_gvt; intel_display_crc_init(dev_priv); @@ -841,6 +843,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, return 0; +err_gvt: + intel_gvt_cleanup(dev_priv); err_workqueues: i915_workqueues_cleanup(dev_priv); return ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 693ee69..f0f68f6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -63,6 +63,7 @@ #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" #include "i915_gem_request.h" +#include "i915_gem_timeline.h" #include "intel_gvt.h" @@ -1830,7 +1831,6 @@ struct drm_i915_private { struct i915_gem_context *kernel_context; struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct i915_vma *semaphore; - u32 next_seqno; struct drm_dma_handle *status_page_dmah; struct resource mch_res; @@ -2090,6 +2090,9 @@ struct drm_i915_private { void (*resume)(struct drm_i915_private *); void (*cleanup_engine)(struct intel_engine_cs *engine); + struct list_head timelines; + struct i915_gem_timeline global_timeline; + /** * Is the GPU currently considered idle, or busy executing * userspace requests? Whilst idle, we allow runtime power @@ -3175,7 +3178,7 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); -void i915_gem_load_init(struct drm_device *dev); +int i915_gem_load_init(struct drm_device *dev); void i915_gem_load_cleanup(struct drm_device *dev); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); @@ -3347,7 +3350,7 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits); -int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); +int __must_check i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1161a21..5253602 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -371,7 +371,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) { + if (rps && rq->fence.seqno == rq->timeline->last_submitted_seqno) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -2563,7 +2563,7 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - list_for_each_entry(request, &engine->request_list, link) { + list_for_each_entry(request, &engine->timeline->requests, link) { if (i915_gem_request_completed(request)) continue; @@ -2632,7 +2632,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) if (i915_gem_context_is_default(incomplete_ctx)) return; - list_for_each_entry_continue(request, &engine->request_list, link) + list_for_each_entry_continue(request, &engine->timeline->requests, link) if (request->ctx == incomplete_ctx) reset_request(request); } @@ -2671,7 +2671,8 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - intel_engine_init_seqno(engine, engine->last_submitted_seqno); + intel_engine_init_global_seqno(engine, + engine->timeline->last_submitted_seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2979,18 +2980,26 @@ destroy: return 0; } -int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, - unsigned int flags) +static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) { - struct intel_engine_cs *engine; - enum intel_engine_id id; - int ret; + int ret, i; - for_each_engine(engine, dev_priv, id) { - if (engine->last_context == NULL) - continue; + for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { + ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); + if (ret) + return ret; + } - ret = intel_engine_idle(engine, flags); + return 0; +} + +int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) +{ + struct i915_gem_timeline *tl; + int ret; + + list_for_each_entry(tl, &i915->gt.timelines, link) { + ret = wait_for_timeline(tl, flags); if (ret) return ret; } @@ -4680,21 +4689,32 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) i915_gem_detect_bit_6_swizzle(dev); } -void +int i915_gem_load_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + int err; dev_priv->objects = kmem_cache_create("i915_gem_object", sizeof(struct drm_i915_gem_object), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!dev_priv->objects) { + err = -ENOMEM; + goto err_out; + } + dev_priv->vmas = kmem_cache_create("i915_gem_vma", sizeof(struct i915_vma), 0, SLAB_HWCACHE_ALIGN, NULL); + if (!dev_priv->vmas) { + err = -ENOMEM; + goto err_objects; + } + dev_priv->requests = kmem_cache_create("i915_gem_request", sizeof(struct drm_i915_gem_request), 0, @@ -4702,6 +4722,19 @@ i915_gem_load_init(struct drm_device *dev) SLAB_RECLAIM_ACCOUNT | SLAB_DESTROY_BY_RCU, NULL); + if (!dev_priv->requests) { + err = -ENOMEM; + goto err_vmas; + } + + mutex_lock(&dev_priv->drm.struct_mutex); + INIT_LIST_HEAD(&dev_priv->gt.timelines); + err = i915_gem_timeline_init(dev_priv, + &dev_priv->gt.global_timeline, + "[execution]"); + mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + goto err_requests; INIT_LIST_HEAD(&dev_priv->context_list); INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); @@ -4726,6 +4759,17 @@ i915_gem_load_init(struct drm_device *dev) atomic_set(&dev_priv->mm.bsd_engine_dispatch_index, 0); spin_lock_init(&dev_priv->fb_tracking.lock); + + return 0; + +err_requests: + kmem_cache_destroy(dev_priv->requests); +err_vmas: + kmem_cache_destroy(dev_priv->vmas); +err_objects: + kmem_cache_destroy(dev_priv->objects); +err_out: + return err; } void i915_gem_load_cleanup(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index 8292e79..735580d 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -31,4 +31,6 @@ #define GEM_BUG_ON(expr) #endif +#define I915_NUM_ENGINES 5 + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 01a7fa5..16d38f8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -40,7 +40,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) * multiple execution contexts (fence contexts) as we allow * engines within a single timeline to execute in parallel. */ - return "global"; + return to_request(fence)->timeline->common->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -211,7 +211,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) return; do { - tmp = list_first_entry(&engine->request_list, + tmp = list_first_entry(&engine->timeline->requests, typeof(*tmp), link); i915_gem_request_retire(tmp); @@ -238,37 +238,39 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) return 0; } -static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) +static int i915_gem_init_global_seqno(struct drm_i915_private *dev_priv, + u32 seqno) { + struct i915_gem_timeline *timeline = &dev_priv->gt.global_timeline; struct intel_engine_cs *engine; enum intel_engine_id id; int ret; /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv, id) { - ret = intel_engine_idle(engine, - I915_WAIT_INTERRUPTIBLE | - I915_WAIT_LOCKED); - if (ret) - return ret; - } + ret = i915_gem_wait_for_idle(dev_priv, + I915_WAIT_INTERRUPTIBLE | + I915_WAIT_LOCKED); + if (ret) + return ret; + i915_gem_retire_requests(dev_priv); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { + if (!i915_seqno_passed(seqno, timeline->next_seqno)) { while (intel_kick_waiters(dev_priv) || intel_kick_signalers(dev_priv)) yield(); + yield(); } /* Finally reset hw state */ for_each_engine(engine, dev_priv, id) - intel_engine_init_seqno(engine, seqno); + intel_engine_init_global_seqno(engine, seqno); return 0; } -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) { struct drm_i915_private *dev_priv = to_i915(dev); int ret; @@ -281,28 +283,31 @@ int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); + ret = i915_gem_init_global_seqno(dev_priv, seqno - 1); if (ret) return ret; - dev_priv->next_seqno = seqno; + dev_priv->gt.global_timeline.next_seqno = seqno; return 0; } -static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) +static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv, + u32 *seqno) { + struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline; + /* reserve 0 for non-seqno */ - if (unlikely(dev_priv->next_seqno == 0)) { + if (unlikely(tl->next_seqno == 0)) { int ret; - ret = i915_gem_init_seqno(dev_priv, 0); + ret = i915_gem_init_global_seqno(dev_priv, 0); if (ret) return ret; - dev_priv->next_seqno = 1; + tl->next_seqno = 1; } - *seqno = dev_priv->next_seqno++; + *seqno = tl->next_seqno++; return 0; } @@ -311,13 +316,14 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { struct drm_i915_gem_request *request = container_of(fence, typeof(*request), submit); + struct intel_engine_cs *engine = request->engine; /* Will be called from irq-context when using foreign DMA fences */ switch (state) { case FENCE_COMPLETE: - request->engine->last_submitted_seqno = request->fence.seqno; - request->engine->submit_request(request); + engine->timeline->last_submitted_seqno = request->fence.seqno; + engine->submit_request(request); break; case FENCE_FREE: @@ -357,7 +363,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, return ERR_PTR(ret); /* Move the oldest request to the slab-cache (if not in use!) */ - req = list_first_entry_or_null(&engine->request_list, + req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); @@ -394,15 +400,17 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (!req) return ERR_PTR(-ENOMEM); - ret = i915_gem_get_seqno(dev_priv, &seqno); + ret = i915_gem_get_global_seqno(dev_priv, &seqno); if (ret) goto err; + req->timeline = engine->timeline; + spin_lock_init(&req->lock); dma_fence_init(&req->fence, &i915_fence_ops, &req->lock, - engine->fence_context, + req->timeline->fence_context, seqno); i915_sw_fence_init(&req->submit, submit_notify); @@ -457,9 +465,16 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, GEM_BUG_ON(to == from); - if (to->engine == from->engine) + if (to->timeline == from->timeline) return 0; + if (to->engine == from->engine) { + ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, + &from->submit, + GFP_KERNEL); + return ret < 0 ? ret : 0; + } + idx = intel_engine_sync_index(from->engine, to->engine); if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; @@ -622,6 +637,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; + struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; u32 request_start; u32 reserved_tail; @@ -679,17 +695,17 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * see a more recent value in the hws than we are tracking. */ - prev = i915_gem_active_raw(&engine->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &request->i915->drm.struct_mutex); if (prev) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_pending_seqno; - engine->last_pending_seqno = request->fence.seqno; - i915_gem_active_set(&engine->last_request, request); - list_add_tail(&request->link, &engine->request_list); + request->previous_seqno = timeline->last_pending_seqno; + timeline->last_pending_seqno = request->fence.seqno; + i915_gem_active_set(&timeline->last_request, request); + list_add_tail(&request->link, &timeline->requests); list_add_tail(&request->ring_link, &ring->request_list); i915_gem_mark_busy(engine); @@ -899,7 +915,8 @@ static bool engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; - list_for_each_entry_safe(request, next, &engine->request_list, link) { + list_for_each_entry_safe(request, next, + &engine->timeline->requests, link) { if (!i915_gem_request_completed(request)) return false; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index a51d596..4ac30ae 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -81,6 +81,7 @@ struct drm_i915_gem_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; + struct intel_timeline *timeline; struct intel_signal_node signaling; struct i915_sw_fence submit; diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c new file mode 100644 index 0000000..a1bd03d --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -0,0 +1,64 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" + +int i915_gem_timeline_init(struct drm_i915_private *i915, + struct i915_gem_timeline *timeline, + const char *name) +{ + unsigned int i; + u64 fences; + + lockdep_assert_held(&i915->drm.struct_mutex); + + timeline->i915 = i915; + timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); + if (!timeline->name) + return -ENOMEM; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); + for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { + struct intel_timeline *tl = &timeline->engine[i]; + + tl->fence_context = fences++; + tl->common = timeline; + + init_request_active(&tl->last_request, NULL); + INIT_LIST_HEAD(&tl->requests); + } + + return 0; +} + +void i915_gem_timeline_fini(struct i915_gem_timeline *tl) +{ + lockdep_assert_held(&tl->i915->drm.struct_mutex); + + list_del(&tl->link); + kfree(tl->name); +} diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h new file mode 100644 index 0000000..bfdf033 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -0,0 +1,70 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_GEM_TIMELINE_H +#define I915_GEM_TIMELINE_H + +#include + +#include "i915_gem_request.h" + +struct i915_gem_timeline; + +struct intel_timeline { + u64 fence_context; + u32 last_submitted_seqno; + u32 last_pending_seqno; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head requests; + + /* Contains an RCU guarded pointer to the last request. No reference is + * held to the request, users must carefully acquire a reference to + * the request using i915_gem_active_get_request_rcu(), or hold the + * struct_mutex. + */ + struct i915_gem_active last_request; + + struct i915_gem_timeline *common; +}; + +struct i915_gem_timeline { + struct list_head link; + u32 next_seqno; + + struct drm_i915_private *i915; + const char *name; + + struct intel_timeline engine[I915_NUM_ENGINES]; +}; + +int i915_gem_timeline_init(struct drm_i915_private *i915, + struct i915_gem_timeline *tl, + const char *name); +void i915_gem_timeline_fini(struct i915_gem_timeline *tl); + +#endif diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index aa8dadc..12fea57 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1110,7 +1110,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); - ee->last_seqno = engine->last_submitted_seqno; + ee->last_seqno = engine->timeline->last_submitted_seqno; ee->start = I915_READ_START(engine); ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); @@ -1195,7 +1195,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->request_list, link) + list_for_each_entry_from(request, &engine->timeline->requests, link) count++; if (!count) return; @@ -1208,7 +1208,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->request_list, link) { + list_for_each_entry_from(request, &engine->timeline->requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 74235ea..cca250e 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -1522,7 +1522,8 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) engine->submit_request = i915_guc_submit; /* Replay the current set of previously submitted requests */ - list_for_each_entry(request, &engine->request_list, link) { + list_for_each_entry(request, + &engine->timeline->requests, link) { client->wq_rsvd += sizeof(struct guc_wq_item); if (i915_sw_fence_done(&request->submit)) i915_guc_submit(request); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 88239e1..90d0905 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3168,7 +3168,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); - submit = READ_ONCE(engine->last_submitted_seqno); + submit = READ_ONCE(engine->timeline->last_submitted_seqno); if (engine->hangcheck.seqno == seqno) { if (i915_seqno_passed(seqno, submit)) { diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index fd55182..6a31055 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -174,7 +174,7 @@ cleanup: return ret; } -void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) +void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) { struct drm_i915_private *dev_priv = engine->i915; @@ -210,7 +210,9 @@ void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); if (engine->irq_seqno_barrier) engine->irq_seqno_barrier(engine); - engine->last_submitted_seqno = seqno; + + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + engine->timeline->last_submitted_seqno = seqno; engine->hangcheck.seqno = seqno; @@ -225,10 +227,9 @@ void intel_engine_init_hangcheck(struct intel_engine_cs *engine) memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); } -static void intel_engine_init_requests(struct intel_engine_cs *engine) +static void intel_engine_init_timeline(struct intel_engine_cs *engine) { - init_request_active(&engine->last_request, NULL); - INIT_LIST_HEAD(&engine->request_list); + engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; } /** @@ -245,9 +246,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) INIT_LIST_HEAD(&engine->execlist_queue); spin_lock_init(&engine->execlist_lock); - engine->fence_context = dma_fence_context_alloc(1); - - intel_engine_init_requests(engine); + intel_engine_init_timeline(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(engine, &engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index cb6e96c..a62e396 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -4,6 +4,7 @@ #include #include "i915_gem_batch_pool.h" #include "i915_gem_request.h" +#include "i915_gem_timeline.h" #define I915_CMD_HASH_ORDER 9 @@ -169,7 +170,6 @@ struct intel_engine_cs { VCS2, /* Keep instances of the same type engine together. */ VECS } id; -#define I915_NUM_ENGINES 5 #define _VCS(n) (VCS + (n)) unsigned int exec_id; enum intel_engine_hw_id { @@ -180,10 +180,10 @@ struct intel_engine_cs { VCS2_HW } hw_id; enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ - u64 fence_context; u32 mmio_base; unsigned int irq_shift; struct intel_ring *buffer; + struct intel_timeline *timeline; struct intel_render_state *render_state; @@ -346,27 +346,6 @@ struct intel_engine_cs { bool preempt_wa; u32 ctx_desc_template; - /** - * List of breadcrumbs associated with GPU requests currently - * outstanding. - */ - struct list_head request_list; - - /** - * Seqno of request most recently submitted to request_list. - * Used exclusively by hang checker to avoid grabbing lock while - * inspecting request list. - */ - u32 last_submitted_seqno; - u32 last_pending_seqno; - - /* An RCU guarded pointer to the last request. No reference is - * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_rcu(), or hold the - * struct_mutex. - */ - struct i915_gem_active last_request; - struct i915_gem_context *last_context; struct intel_engine_hangcheck hangcheck; @@ -516,20 +495,13 @@ static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) int __intel_ring_space(int head, int tail, int size); void intel_ring_update_space(struct intel_ring *ring); -void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); +void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno); void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); void intel_engine_cleanup_common(struct intel_engine_cs *engine); -static inline int intel_engine_idle(struct intel_engine_cs *engine, - unsigned int flags) -{ - /* Wait upon the last request to be completed */ - return i915_gem_active_wait(&engine->last_request, flags); -} - int intel_init_render_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine); int intel_init_bsd2_ring_buffer(struct intel_engine_cs *engine); @@ -619,7 +591,7 @@ unsigned int intel_kick_signalers(struct drm_i915_private *i915); static inline bool intel_engine_is_active(struct intel_engine_cs *engine) { - return i915_gem_active_isset(&engine->last_request); + return i915_gem_active_isset(&engine->timeline->last_request); } #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.1 From 3033acab07f95c089fb871896406657ad4000ed6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:47 +0100 Subject: drm/i915: Queue the idling context switch after all other timelines Before suspend, we wait for the switch to the kernel context. In order for all the other context images to be complete upon suspend, that switch must be the last operation by the GPU (i.e. this idling request must not overtake any pending requests). To make this request execute last, we make it depend on every other inflight request. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-24-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ drivers/gpu/drm/i915/i915_gem_context.c | 23 +++++++++++++++++------ 2 files changed, 27 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5253602..5ea46a7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4362,6 +4362,15 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } +static void assert_kernel_context_is_current(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + GEM_BUG_ON(engine->last_context != dev_priv->kernel_context); +} + int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4391,6 +4400,7 @@ int i915_gem_suspend(struct drm_device *dev) i915_gem_retire_requests(dev_priv); + assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 47e888c..a2acb8b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -931,22 +931,33 @@ int i915_switch_context(struct drm_i915_gem_request *req) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; + struct i915_gem_timeline *timeline; enum intel_engine_id id; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + for_each_engine(engine, dev_priv, id) { struct drm_i915_gem_request *req; int ret; - if (engine->last_context == NULL) - continue; - - if (engine->last_context == dev_priv->kernel_context) - continue; - req = i915_gem_request_alloc(engine, dev_priv->kernel_context); if (IS_ERR(req)) return PTR_ERR(req); + /* Queue this switch after all other activity */ + list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { + struct drm_i915_gem_request *prev; + struct intel_timeline *tl; + + tl = &timeline->engine[engine->id]; + prev = i915_gem_active_raw(&tl->last_request, + &dev_priv->drm.struct_mutex); + if (prev) + i915_sw_fence_await_sw_fence_gfp(&req->submit, + &prev->submit, + GFP_KERNEL); + } + ret = i915_switch_context(req); i915_add_request_no_flush(req); if (ret) -- cgit v1.1 From 4680816be3362bdf6ac712cbdc6098c76febe78f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:48 +0100 Subject: drm/i915: Wait first for submission, before waiting for request completion In future patches, we will no longer be able to wait on a static global seqno and instead have to break our wait up into phases. First we wait for the global seqno assignment (upon submission to hardware), and once submitted we wait for the hardware to complete. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-25-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 16d38f8..03ae85a 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -789,6 +789,49 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, return false; } +static long +__i915_request_wait_for_submit(struct drm_i915_gem_request *request, + unsigned int flags, + long timeout) +{ + const int state = flags & I915_WAIT_INTERRUPTIBLE ? + TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + wait_queue_head_t *q = &request->i915->gpu_error.wait_queue; + DEFINE_WAIT(reset); + DEFINE_WAIT(wait); + + if (flags & I915_WAIT_LOCKED) + add_wait_queue(q, &reset); + + do { + prepare_to_wait(&request->submit.wait, &wait, state); + + if (i915_sw_fence_done(&request->submit)) + break; + + if (flags & I915_WAIT_LOCKED && + i915_reset_in_progress(&request->i915->gpu_error)) { + __set_current_state(TASK_RUNNING); + i915_reset(request->i915); + reset_wait_queue(q, &reset); + continue; + } + + if (signal_pending_state(state, current)) { + timeout = -ERESTARTSYS; + break; + } + + timeout = io_schedule_timeout(timeout); + } while (timeout); + finish_wait(&request->submit.wait, &wait); + + if (flags & I915_WAIT_LOCKED) + remove_wait_queue(q, &reset); + + return timeout; +} + /** * i915_wait_request - wait until execution of request has finished * @req: the request to wait upon @@ -833,6 +876,14 @@ long i915_wait_request(struct drm_i915_gem_request *req, trace_i915_gem_request_wait_begin(req); + if (!i915_sw_fence_done(&req->submit)) { + timeout = __i915_request_wait_for_submit(req, flags, timeout); + if (timeout < 0) + goto complete; + + GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); + } + /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) goto complete; -- cgit v1.1 From 65e4760e3920c21073a9d737929dc36df561380f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:49 +0100 Subject: drm/i915: Introduce a global_seqno for each request Though we will have multiple timelines, we still have a single timeline of execution. This we can use to provide an execution and retirement order of requests. This keeps tracking execution of requests simple, and vital for preserving a single waiter (i.e. so that we can order the waiters so that only the earliest to wakeup need be woken). To accomplish this we distinguish the seqno used to order requests per-context (external) and that used internally for execution. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-26-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 2 +- drivers/gpu/drm/i915/i915_gem_request.c | 19 +++++++++++++----- drivers/gpu/drm/i915/i915_gem_request.h | 32 +++++++++++++++++++++++++----- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_guc_submission.c | 4 ++-- drivers/gpu/drm/i915/i915_trace.h | 8 ++++---- drivers/gpu/drm/i915/intel_breadcrumbs.c | 8 +++++--- drivers/gpu/drm/i915/intel_lrc.c | 4 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++------- 11 files changed, 66 insertions(+), 33 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 3a0ea5e..90bc4a8 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -637,7 +637,7 @@ static void print_request(struct seq_file *m, rcu_read_lock(); task = pid ? pid_task(pid, PIDTYPE_PID) : NULL; seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix, - rq->fence.seqno, rq->ctx->hw_id, rq->fence.seqno, + rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, jiffies_to_msecs(jiffies - rq->emitted_jiffies), task ? task->comm : "", task ? task->pid : -1); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f0f68f6..217674b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -4050,7 +4050,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) /* Before we do the heavier coherent read of the seqno, * check the value (hopefully) in the CPU cacheline. */ - if (i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req)) return true; /* Ensure our read of the seqno is coherent so that we @@ -4101,7 +4101,7 @@ __i915_request_irq_complete(struct drm_i915_gem_request *req) wake_up_process(tsk); rcu_read_unlock(); - if (i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req)) return true; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5ea46a7..f4cfb88 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2615,7 +2615,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) return; DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", - engine->name, request->fence.seqno); + engine->name, request->global_seqno); /* Setup the CS to resume from the breadcrumb of the hung request */ engine->reset_hw(engine, request); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 03ae85a..311cf3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -376,7 +376,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * of being read by __i915_gem_active_get_rcu(). As such, * we have to be very careful when overwriting the contents. During * the RCU lookup, we change chase the request->engine pointer, - * read the request->fence.seqno and increment the reference count. + * read the request->global_seqno and increment the reference count. * * The reference count is incremented atomically. If it is zero, * the lookup knows the request is unallocated and complete. Otherwise, @@ -418,6 +418,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; + req->global_seqno = seqno; req->ctx = i915_gem_context_get(ctx); /* No zalloc, must clear what we need by hand */ @@ -475,8 +476,15 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } + if (!from->global_seqno) { + ret = i915_sw_fence_await_dma_fence(&to->submit, + &from->fence, 0, + GFP_KERNEL); + return ret < 0 ? ret : 0; + } + idx = intel_engine_sync_index(from->engine, to->engine); - if (from->fence.seqno <= from->engine->semaphore.sync_seqno[idx]) + if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -494,7 +502,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - from->engine->semaphore.sync_seqno[idx] = from->fence.seqno; + from->engine->semaphore.sync_seqno[idx] = from->global_seqno; return 0; } @@ -774,7 +782,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req, timeout_us += local_clock_us(&cpu); do { - if (i915_gem_request_completed(req)) + if (__i915_gem_request_completed(req)) return true; if (signal_pending_state(state, current)) @@ -883,6 +891,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, GEM_BUG_ON(!i915_sw_fence_done(&req->submit)); } + GEM_BUG_ON(!req->global_seqno); /* Optimistic short spin before touching IRQs */ if (i915_spin_request(req, state, 5)) @@ -892,7 +901,7 @@ long i915_wait_request(struct drm_i915_gem_request *req, if (flags & I915_WAIT_LOCKED) add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - intel_wait_init(&wait, req->fence.seqno); + intel_wait_init(&wait, req->global_seqno); if (intel_engine_add_wait(req->engine, &wait)) /* In order to check that we haven't missed the interrupt * as we enabled it, we need to kick ourselves to do a diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 4ac30ae..75f8360 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -87,6 +87,8 @@ struct drm_i915_gem_request { struct i915_sw_fence submit; wait_queue_t submitq; + u32 global_seqno; + /** GEM sequence number associated with the previous request, * when the HWS breadcrumb is equal to this the GPU is processing * this request. @@ -163,7 +165,7 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); static inline u32 i915_gem_request_get_seqno(struct drm_i915_gem_request *req) { - return req ? req->fence.seqno : 0; + return req ? req->global_seqno : 0; } static inline struct intel_engine_cs * @@ -248,17 +250,37 @@ static inline bool i915_seqno_passed(u32 seq1, u32 seq2) } static inline bool -i915_gem_request_started(const struct drm_i915_gem_request *req) +__i915_gem_request_started(const struct drm_i915_gem_request *req) { + GEM_BUG_ON(!req->global_seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), req->previous_seqno); } static inline bool -i915_gem_request_completed(const struct drm_i915_gem_request *req) +i915_gem_request_started(const struct drm_i915_gem_request *req) { + if (!req->global_seqno) + return false; + + return __i915_gem_request_started(req); +} + +static inline bool +__i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + GEM_BUG_ON(!req->global_seqno); return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->fence.seqno); + req->global_seqno); +} + +static inline bool +i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + if (!req->global_seqno) + return false; + + return __i915_gem_request_completed(req); } bool __i915_spin_request(const struct drm_i915_gem_request *request, @@ -266,7 +288,7 @@ bool __i915_spin_request(const struct drm_i915_gem_request *request, static inline bool i915_spin_request(const struct drm_i915_gem_request *request, int state, unsigned long timeout_us) { - return (i915_gem_request_started(request) && + return (__i915_gem_request_started(request) && __i915_spin_request(request, state, timeout_us)); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 12fea57..9aa197c 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1176,7 +1176,7 @@ static void record_request(struct drm_i915_gem_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->seqno = request->fence.seqno; + erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; erq->head = request->head; erq->tail = request->tail; diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index cca250e..857ef91 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -554,7 +554,7 @@ static void guc_wq_item_append(struct i915_guc_client *gc, wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; - wqi->fence_id = rq->fence.seqno; + wqi->fence_id = rq->global_seqno; kunmap_atomic(base); } @@ -655,7 +655,7 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq) client->b_fail += 1; guc->submissions[engine_id] += 1; - guc->last_seqno[engine_id] = rq->fence.seqno; + guc->last_seqno[engine_id] = rq->global_seqno; spin_unlock(&client->wq_lock); } diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 5c912c2..c5d210e 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -466,7 +466,7 @@ TRACE_EVENT(i915_gem_ring_sync_to, __entry->dev = from->i915->drm.primary->index; __entry->sync_from = from->engine->id; __entry->sync_to = to->engine->id; - __entry->seqno = from->fence.seqno; + __entry->seqno = from->global_seqno; ), TP_printk("dev=%u, sync-from=%u, sync-to=%u, seqno=%u", @@ -489,7 +489,7 @@ TRACE_EVENT(i915_gem_ring_dispatch, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->fence.seqno; + __entry->seqno = req->global_seqno; __entry->flags = flags; dma_fence_enable_sw_signaling(&req->fence); ), @@ -534,7 +534,7 @@ DECLARE_EVENT_CLASS(i915_gem_request, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->fence.seqno; + __entry->seqno = req->global_seqno; ), TP_printk("dev=%u, ring=%u, seqno=%u", @@ -596,7 +596,7 @@ TRACE_EVENT(i915_gem_request_wait_begin, TP_fast_assign( __entry->dev = req->i915->drm.primary->index; __entry->ring = req->engine->id; - __entry->seqno = req->fence.seqno; + __entry->seqno = req->global_seqno; __entry->blocking = mutex_is_locked(&req->i915->drm.struct_mutex); ), diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 56efcc5..0d5def0 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -504,9 +504,11 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) /* locked by dma_fence_enable_sw_signaling() */ assert_spin_locked(&request->lock); + if (!request->global_seqno) + return; request->signaling.wait.tsk = b->signaler; - request->signaling.wait.seqno = request->fence.seqno; + request->signaling.wait.seqno = request->global_seqno; i915_gem_request_get(request); spin_lock(&b->lock); @@ -530,8 +532,8 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) p = &b->signals.rb_node; while (*p) { parent = *p; - if (i915_seqno_passed(request->fence.seqno, - to_signaler(parent)->fence.seqno)) { + if (i915_seqno_passed(request->global_seqno, + to_signaler(parent)->global_seqno)) { p = &parent->rb_right; first = false; } else { diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index cb30549..e0a9bf8 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1584,7 +1584,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(ring, 0); - intel_ring_emit(ring, request->fence.seqno); + intel_ring_emit(ring, request->global_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_emit(ring, MI_NOOP); return intel_logical_ring_advance(request); @@ -1613,7 +1613,7 @@ static int gen8_emit_request_render(struct drm_i915_gem_request *request) PIPE_CONTROL_QW_WRITE)); intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); intel_ring_emit(ring, 0); - intel_ring_emit(ring, i915_gem_request_get_seqno(request)); + intel_ring_emit(ring, request->global_seqno); /* We're thrashing one dword of HWS. */ intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index aaa46d9..76c6b70 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1238,7 +1238,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req) PIPE_CONTROL_CS_STALL); intel_ring_emit(ring, lower_32_bits(gtt_offset)); intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | @@ -1274,7 +1274,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req) lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT); intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, MI_SEMAPHORE_SIGNAL | MI_SEMAPHORE_TARGET(waiter->hw_id)); @@ -1308,7 +1308,7 @@ static int gen6_signal(struct drm_i915_gem_request *req) if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(ring, mbox_reg); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); } } @@ -1339,7 +1339,7 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req) intel_ring_emit(ring, MI_STORE_DWORD_INDEX); intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, req->fence.seqno); + intel_ring_emit(ring, req->global_seqno); intel_ring_emit(ring, MI_USER_INTERRUPT); intel_ring_advance(ring); @@ -1389,7 +1389,7 @@ static int gen8_render_emit_request(struct drm_i915_gem_request *req) PIPE_CONTROL_QW_WRITE)); intel_ring_emit(ring, intel_hws_seqno_address(engine)); intel_ring_emit(ring, 0); - intel_ring_emit(ring, i915_gem_request_get_seqno(req)); + intel_ring_emit(ring, req->global_seqno); /* We're thrashing one dword of HWS. */ intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_USER_INTERRUPT); @@ -1427,7 +1427,7 @@ gen8_ring_sync_to(struct drm_i915_gem_request *req, MI_SEMAPHORE_WAIT | MI_SEMAPHORE_GLOBAL_GTT | MI_SEMAPHORE_SAD_GTE_SDD); - intel_ring_emit(ring, signal->fence.seqno); + intel_ring_emit(ring, signal->global_seqno); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_advance(ring); @@ -1465,7 +1465,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, * seqno is >= the last seqno executed. However for hardware the * comparison is strictly greater than. */ - intel_ring_emit(ring, signal->fence.seqno - 1); + intel_ring_emit(ring, signal->global_seqno - 1); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); -- cgit v1.1 From 9b81d556b11fe58827dcd87bc5deaf8da2f716ae Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:50 +0100 Subject: drm/i915: Rename ->emit_request to ->emit_breadcrumb Now that the emission of the request tail and its submission to hardware are two separate steps, engine->emit_request() is confusing. engine->emit_request() is called to emit the breadcrumb commands for the request into the ring, name it such (engine->emit_breadcrumb). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-27-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 4 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 +++++----- drivers/gpu/drm/i915/intel_ringbuffer.c | 16 ++++++++-------- drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 311cf3f..a626b26 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -685,8 +685,8 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) request->postfix = ring->tail; /* Not allowed to fail! */ - ret = engine->emit_request(request); - WARN(ret, "(%s)->emit_request failed: %d!\n", engine->name, ret); + ret = engine->emit_breadcrumb(request); + WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret); /* Sanity check that the reserved size was large enough. */ ret = ring->tail - request_start; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e0a9bf8..57dba45 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -440,7 +440,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (last) /* WaIdleLiteRestore:bdw,skl * Apply the wa NOOPs to prevent ring:HEAD == req:TAIL - * as we resubmit the request. See gen8_emit_request() + * as we resubmit the request. See gen8_emit_breadcrumb() * for where we prepare the padding after the end of the * request. */ @@ -1567,7 +1567,7 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * restore with HEAD==TAIL (WaIdleLiteRestore). */ -static int gen8_emit_request(struct drm_i915_gem_request *request) +static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request) { struct intel_ring *ring = request->ring; int ret; @@ -1590,7 +1590,7 @@ static int gen8_emit_request(struct drm_i915_gem_request *request) return intel_logical_ring_advance(request); } -static int gen8_emit_request_render(struct drm_i915_gem_request *request) +static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request) { struct intel_ring *ring = request->ring; int ret; @@ -1694,7 +1694,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->init_hw = gen8_init_common_ring; engine->reset_hw = reset_common_ring; engine->emit_flush = gen8_emit_flush; - engine->emit_request = gen8_emit_request; + engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->submit_request = execlists_submit_request; engine->irq_enable = gen8_logical_ring_enable_irq; @@ -1816,7 +1816,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; - engine->emit_request = gen8_emit_request_render; + engine->emit_breadcrumb = gen8_emit_breadcrumb_render; ret = intel_engine_create_scratch(engine, 4096); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 76c6b70..54c3981 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1328,7 +1328,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request) intel_ring_offset(request->ring, request->tail)); } -static int i9xx_emit_request(struct drm_i915_gem_request *req) +static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; int ret; @@ -1349,14 +1349,14 @@ static int i9xx_emit_request(struct drm_i915_gem_request *req) } /** - * gen6_sema_emit_request - Update the semaphore mailbox registers + * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers * * @request - request to write to the ring * * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int gen6_sema_emit_request(struct drm_i915_gem_request *req) +static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req) { int ret; @@ -1364,10 +1364,10 @@ static int gen6_sema_emit_request(struct drm_i915_gem_request *req) if (ret) return ret; - return i9xx_emit_request(req); + return i9xx_emit_breadcrumb(req); } -static int gen8_render_emit_request(struct drm_i915_gem_request *req) +static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) { struct intel_engine_cs *engine = req->engine; struct intel_ring *ring = req->ring; @@ -2637,9 +2637,9 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->init_hw = init_ring_common; engine->reset_hw = reset_ring_common; - engine->emit_request = i9xx_emit_request; + engine->emit_breadcrumb = i9xx_emit_breadcrumb; if (i915.semaphores) - engine->emit_request = gen6_sema_emit_request; + engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) @@ -2666,7 +2666,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; - engine->emit_request = gen8_render_emit_request; + engine->emit_breadcrumb = gen8_render_emit_breadcrumb; engine->emit_flush = gen8_render_ring_flush; if (i915.semaphores) engine->semaphore.signal = gen8_rcs_signal; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a62e396..a5ced16 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -255,7 +255,7 @@ struct intel_engine_cs { #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - int (*emit_request)(struct drm_i915_gem_request *req); + int (*emit_breadcrumb)(struct drm_i915_gem_request *req); /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). -- cgit v1.1 From 98f29e8d908f2b9e3d966f6f7d63cd69b4aaf0a2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:51 +0100 Subject: drm/i915: Record space required for breadcrumb emission In the next patch, we will use deferred breadcrumb emission. That requires reserving sufficient space in the ringbuffer to emit the breadcrumb, which first requires us to know how large the breadcrumb is. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-28-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 1 + drivers/gpu/drm/i915/intel_lrc.c | 6 ++++++ drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++++++++++++++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index a626b26..be9e23b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -434,6 +434,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * away, e.g. because a GPU scheduler has deferred it. */ req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + GEM_BUG_ON(req->reserved_space < engine->emit_breadcrumb_sz); if (i915.enable_execlists) ret = intel_logical_ring_alloc_request_extras(req); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 57dba45..8229bae 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1590,6 +1590,8 @@ static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request) return intel_logical_ring_advance(request); } +static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; + static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request) { struct intel_ring *ring = request->ring; @@ -1621,6 +1623,8 @@ static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request) return intel_logical_ring_advance(request); } +static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; + static int gen8_init_rcs_context(struct drm_i915_gem_request *req) { int ret; @@ -1695,6 +1699,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->reset_hw = reset_common_ring; engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; engine->submit_request = execlists_submit_request; engine->irq_enable = gen8_logical_ring_enable_irq; @@ -1817,6 +1822,7 @@ int logical_render_ring_init(struct intel_engine_cs *engine) engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; engine->emit_breadcrumb = gen8_emit_breadcrumb_render; + engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_render_sz; ret = intel_engine_create_scratch(engine, 4096); if (ret) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 54c3981..ae9cf6b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1348,6 +1348,8 @@ static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) return 0; } +static const int i9xx_emit_breadcrumb_sz = 4; + /** * gen6_sema_emit_breadcrumb - Update the semaphore mailbox registers * @@ -1401,6 +1403,8 @@ static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) return 0; } +static const int gen8_render_emit_breadcrumb_sz = 8; + /** * intel_ring_sync - sync the waiter to the signaller on seqno * @@ -2638,8 +2642,21 @@ static void intel_ring_default_vfuncs(struct drm_i915_private *dev_priv, engine->reset_hw = reset_ring_common; engine->emit_breadcrumb = i9xx_emit_breadcrumb; - if (i915.semaphores) + engine->emit_breadcrumb_sz = i9xx_emit_breadcrumb_sz; + if (i915.semaphores) { + int num_rings; + engine->emit_breadcrumb = gen6_sema_emit_breadcrumb; + + num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; + if (INTEL_GEN(dev_priv) >= 8) { + engine->emit_breadcrumb_sz += num_rings * 6; + } else { + engine->emit_breadcrumb_sz += num_rings * 3; + if (num_rings & 1) + engine->emit_breadcrumb_sz++; + } + } engine->submit_request = i9xx_submit_request; if (INTEL_GEN(dev_priv) >= 8) @@ -2667,9 +2684,17 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 8) { engine->init_context = intel_rcs_ctx_init; engine->emit_breadcrumb = gen8_render_emit_breadcrumb; + engine->emit_breadcrumb_sz = gen8_render_emit_breadcrumb_sz; engine->emit_flush = gen8_render_ring_flush; - if (i915.semaphores) + if (i915.semaphores) { + int num_rings; + engine->semaphore.signal = gen8_rcs_signal; + + num_rings = + hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1; + engine->emit_breadcrumb_sz += num_rings * 6; + } } else if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index a5ced16..7b7aaaf 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -256,6 +256,7 @@ struct intel_engine_cs { #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) int (*emit_breadcrumb)(struct drm_i915_gem_request *req); + int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). -- cgit v1.1 From caddfe7192f5e74d65ebcfdae614f99e8fd87222 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:52 +0100 Subject: drm/i915: Defer breadcrumb emission Move the actual emission of the breadcrumb for closing the request from i915_add_request() to the submit callback. (It can be moved later when required.) This allows us to defer the allocation of the global_seqno from request construction to actual submission, allowing us to emit the requests out of order (wrt to the order of their construction, they still will only be executed one all of their dependencies are resolved including that all earlier requests on their timeline have been submitted.) We have to specialise how we then emit the request in order to write into the preallocated space, rather than at the tail of the ringbuffer (which will have been advanced by the addition of new requests). Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-29-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 41 +++----- drivers/gpu/drm/i915/intel_lrc.c | 120 ++++++++--------------- drivers/gpu/drm/i915/intel_ringbuffer.c | 169 +++++++++++--------------------- drivers/gpu/drm/i915/intel_ringbuffer.h | 10 +- 4 files changed, 118 insertions(+), 222 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index be9e23b..06daa4d 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -318,17 +318,16 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) container_of(fence, typeof(*request), submit); struct intel_engine_cs *engine = request->engine; + if (state != FENCE_COMPLETE) + return NOTIFY_DONE; + /* Will be called from irq-context when using foreign DMA fences */ - switch (state) { - case FENCE_COMPLETE: - engine->timeline->last_submitted_seqno = request->fence.seqno; - engine->submit_request(request); - break; + engine->timeline->last_submitted_seqno = request->fence.seqno; - case FENCE_FREE: - break; - } + engine->emit_breadcrumb(request, + request->ring->vaddr + request->postfix); + engine->submit_request(request); return NOTIFY_DONE; } @@ -648,9 +647,7 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) struct intel_ring *ring = request->ring; struct intel_timeline *timeline = request->timeline; struct drm_i915_gem_request *prev; - u32 request_start; - u32 reserved_tail; - int ret; + int err; lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_gem_request_add(request); @@ -660,8 +657,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * should already have been reserved in the ring buffer. Let the ring * know that it is time to use that space up. */ - request_start = ring->tail; - reserved_tail = request->reserved_space; request->reserved_space = 0; /* @@ -672,10 +667,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * what. */ if (flush_caches) { - ret = engine->emit_flush(request, EMIT_FLUSH); + err = engine->emit_flush(request, EMIT_FLUSH); /* Not allowed to fail! */ - WARN(ret, "engine->emit_flush() failed: %d!\n", ret); + WARN(err, "engine->emit_flush() failed: %d!\n", err); } /* Record the position of the start of the breadcrumb so that @@ -683,20 +678,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) * GPU processing the request, we never over-estimate the * position of the ring's HEAD. */ + err = intel_ring_begin(request, engine->emit_breadcrumb_sz); + GEM_BUG_ON(err); request->postfix = ring->tail; - - /* Not allowed to fail! */ - ret = engine->emit_breadcrumb(request); - WARN(ret, "(%s)->emit_breadcrumb failed: %d!\n", engine->name, ret); - - /* Sanity check that the reserved size was large enough. */ - ret = ring->tail - request_start; - if (ret < 0) - ret += ring->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); + ring->tail += engine->emit_breadcrumb_sz * sizeof(u32); /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 8229bae..fa3012c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -365,7 +365,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq) struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt; u32 *reg_state = ce->lrc_reg_state; - reg_state[CTX_RING_TAIL+1] = intel_ring_offset(rq->ring, rq->tail); + reg_state[CTX_RING_TAIL+1] = rq->tail; /* True 32b PPGTT with dynamic page allocation: update PDP * registers and point the unallocated PDPs to scratch page. @@ -599,6 +599,15 @@ static void execlists_submit_request(struct drm_i915_gem_request *request) spin_lock_irqsave(&engine->execlist_lock, flags); + /* We keep the previous context alive until we retire the following + * request. This ensures that any the context object is still pinned + * for any residual writes the HW makes into it on the context switch + * into the next object following the breadcrumb. Otherwise, we may + * retire the context too early. + */ + request->previous_context = engine->last_context; + engine->last_context = request->ctx; + list_add_tail(&request->execlist_link, &engine->execlist_queue); if (execlists_elsp_idle(engine)) tasklet_hi_schedule(&engine->irq_tasklet); @@ -671,46 +680,6 @@ err_unpin: return ret; } -/* - * intel_logical_ring_advance() - advance the tail and prepare for submission - * @request: Request to advance the logical ringbuffer of. - * - * The tail is updated in our logical ringbuffer struct, not in the actual context. What - * really happens during submission is that the context and current tail will be placed - * on a queue waiting for the ELSP to be ready to accept a new context submission. At that - * point, the tail *inside* the context is updated and the ELSP written to. - */ -static int -intel_logical_ring_advance(struct drm_i915_gem_request *request) -{ - struct intel_ring *ring = request->ring; - struct intel_engine_cs *engine = request->engine; - - intel_ring_advance(ring); - request->tail = ring->tail; - - /* - * Here we add two extra NOOPs as padding to avoid - * lite restore of a context with HEAD==TAIL. - * - * Caller must reserve WA_TAIL_DWORDS for us! - */ - intel_ring_emit(ring, MI_NOOP); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - request->wa_tail = ring->tail; - - /* We keep the previous context alive until we retire the following - * request. This ensures that any the context object is still pinned - * for any residual writes the HW makes into it on the context switch - * into the next object following the breadcrumb. Otherwise, we may - * retire the context too early. - */ - request->previous_context = engine->last_context; - engine->last_context = request->ctx; - return 0; -} - static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { @@ -1566,41 +1535,35 @@ static void bxt_a_seqno_barrier(struct intel_engine_cs *engine) * used as a workaround for not being allowed to do lite * restore with HEAD==TAIL (WaIdleLiteRestore). */ - -static int gen8_emit_breadcrumb(struct drm_i915_gem_request *request) +static void gen8_emit_wa_tail(struct drm_i915_gem_request *request, u32 *out) { - struct intel_ring *ring = request->ring; - int ret; - - ret = intel_ring_begin(request, 6 + WA_TAIL_DWORDS); - if (ret) - return ret; + *out++ = MI_NOOP; + *out++ = MI_NOOP; + request->wa_tail = intel_ring_offset(request->ring, out); +} +static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, + u32 *out) +{ /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX_ADDR & (1 << 5)); - intel_ring_emit(ring, (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(ring, - intel_hws_seqno_address(request->engine) | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, request->global_seqno); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_emit(ring, MI_NOOP); - return intel_logical_ring_advance(request); + *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *out++ = intel_hws_seqno_address(request->engine) | MI_FLUSH_DW_USE_GTT; + *out++ = 0; + *out++ = request->global_seqno; + *out++ = MI_USER_INTERRUPT; + *out++ = MI_NOOP; + request->tail = intel_ring_offset(request->ring, out); + + gen8_emit_wa_tail(request, out); } static const int gen8_emit_breadcrumb_sz = 6 + WA_TAIL_DWORDS; -static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request) +static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request, + u32 *out) { - struct intel_ring *ring = request->ring; - int ret; - - ret = intel_ring_begin(request, 8 + WA_TAIL_DWORDS); - if (ret) - return ret; - /* We're using qword write, seqno should be aligned to 8 bytes. */ BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); @@ -1608,19 +1571,20 @@ static int gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request) * need a prior CS_STALL, which is emitted by the flush * following the batch. */ - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, - (PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(ring, intel_hws_seqno_address(request->engine)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, request->global_seqno); + *out++ = GFX_OP_PIPE_CONTROL(6); + *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_QW_WRITE); + *out++ = intel_hws_seqno_address(request->engine); + *out++ = 0; + *out++ = request->global_seqno; /* We're thrashing one dword of HWS. */ - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_emit(ring, MI_NOOP); - return intel_logical_ring_advance(request); + *out++ = 0; + *out++ = MI_USER_INTERRUPT; + *out++ = MI_NOOP; + request->tail = intel_ring_offset(request->ring, out); + + gen8_emit_wa_tail(request, out); } static const int gen8_emit_breadcrumb_render_sz = 8 + WA_TAIL_DWORDS; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index ae9cf6b..1624477 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1213,90 +1213,62 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) i915_vma_unpin_and_release(&dev_priv->semaphore); } -static int gen8_rcs_signal(struct drm_i915_gem_request *req) +static u32 *gen8_rcs_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, (num_rings-1) * 8); - if (ret) - return ret; for_each_engine(waiter, dev_priv, id) { u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, - PIPE_CONTROL_GLOBAL_GTT_IVB | - PIPE_CONTROL_QW_WRITE | - PIPE_CONTROL_CS_STALL); - intel_ring_emit(ring, lower_32_bits(gtt_offset)); - intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, - MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(ring, 0); + *out++ = GFX_OP_PIPE_CONTROL(6); + *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | + PIPE_CONTROL_QW_WRITE | + PIPE_CONTROL_CS_STALL); + *out++ = lower_32_bits(gtt_offset); + *out++ = upper_32_bits(gtt_offset); + *out++ = req->global_seqno; + *out++ = 0; + *out++ = (MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + *out++ = 0; } - intel_ring_advance(ring); - return 0; + return out; } -static int gen8_xcs_signal(struct drm_i915_gem_request *req) +static u32 *gen8_xcs_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *waiter; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, (num_rings-1) * 6); - if (ret) - return ret; for_each_engine(waiter, dev_priv, id) { u64 gtt_offset = req->engine->semaphore.signal_ggtt[id]; if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID) continue; - intel_ring_emit(ring, - (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW); - intel_ring_emit(ring, - lower_32_bits(gtt_offset) | - MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, upper_32_bits(gtt_offset)); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, - MI_SEMAPHORE_SIGNAL | - MI_SEMAPHORE_TARGET(waiter->hw_id)); - intel_ring_emit(ring, 0); + *out++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW; + *out++ = lower_32_bits(gtt_offset) | MI_FLUSH_DW_USE_GTT; + *out++ = upper_32_bits(gtt_offset); + *out++ = req->global_seqno; + *out++ = (MI_SEMAPHORE_SIGNAL | + MI_SEMAPHORE_TARGET(waiter->hw_id)); + *out++ = 0; } - intel_ring_advance(ring); - return 0; + return out; } -static int gen6_signal(struct drm_i915_gem_request *req) +static u32 *gen6_signal(struct drm_i915_gem_request *req, u32 *out) { - struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; struct intel_engine_cs *engine; enum intel_engine_id id; - int ret, num_rings; - - num_rings = INTEL_INFO(dev_priv)->num_rings; - ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); - if (ret) - return ret; + int num_rings = 0; for_each_engine(engine, dev_priv, id) { i915_reg_t mbox_reg; @@ -1306,46 +1278,34 @@ static int gen6_signal(struct drm_i915_gem_request *req) mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { - intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); - intel_ring_emit_reg(ring, mbox_reg); - intel_ring_emit(ring, req->global_seqno); + *out++ = MI_LOAD_REGISTER_IMM(1); + *out++ = i915_mmio_reg_offset(mbox_reg); + *out++ = req->global_seqno; + num_rings++; } } + if (num_rings & 1) + *out++ = MI_NOOP; - /* If num_dwords was rounded, make sure the tail pointer is correct */ - if (num_rings % 2 == 0) - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - return 0; + return out; } static void i9xx_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; - I915_WRITE_TAIL(request->engine, - intel_ring_offset(request->ring, request->tail)); + I915_WRITE_TAIL(request->engine, request->tail); } -static int i9xx_emit_breadcrumb(struct drm_i915_gem_request *req) +static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { - struct intel_ring *ring = req->ring; - int ret; - - ret = intel_ring_begin(req, 4); - if (ret) - return ret; - - intel_ring_emit(ring, MI_STORE_DWORD_INDEX); - intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); - intel_ring_emit(ring, req->global_seqno); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_advance(ring); - - req->tail = ring->tail; + *out++ = MI_STORE_DWORD_INDEX; + *out++ = I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT; + *out++ = req->global_seqno; + *out++ = MI_USER_INTERRUPT; - return 0; + req->tail = intel_ring_offset(req->ring, out); } static const int i9xx_emit_breadcrumb_sz = 4; @@ -1358,49 +1318,34 @@ static const int i9xx_emit_breadcrumb_sz = 4; * Update the mailbox registers in the *other* rings with the current seqno. * This acts like a signal in the canonical semaphore. */ -static int gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req) +static void gen6_sema_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { - int ret; - - ret = req->engine->semaphore.signal(req); - if (ret) - return ret; - - return i9xx_emit_breadcrumb(req); + return i9xx_emit_breadcrumb(req, + req->engine->semaphore.signal(req, out)); } -static int gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req) +static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req, + u32 *out) { struct intel_engine_cs *engine = req->engine; - struct intel_ring *ring = req->ring; - int ret; - if (engine->semaphore.signal) { - ret = engine->semaphore.signal(req); - if (ret) - return ret; - } - - ret = intel_ring_begin(req, 8); - if (ret) - return ret; + if (engine->semaphore.signal) + out = engine->semaphore.signal(req, out); - intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); - intel_ring_emit(ring, (PIPE_CONTROL_GLOBAL_GTT_IVB | + *out++ = GFX_OP_PIPE_CONTROL(6); + *out++ = (PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_QW_WRITE)); - intel_ring_emit(ring, intel_hws_seqno_address(engine)); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, req->global_seqno); + PIPE_CONTROL_QW_WRITE); + *out++ = intel_hws_seqno_address(engine); + *out++ = 0; + *out++ = req->global_seqno; /* We're thrashing one dword of HWS. */ - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_USER_INTERRUPT); - intel_ring_emit(ring, MI_NOOP); - intel_ring_advance(ring); - - req->tail = ring->tail; + *out++ = 0; + *out++ = MI_USER_INTERRUPT; + *out++ = MI_NOOP; - return 0; + req->tail = intel_ring_offset(req->ring, out); } static const int gen8_render_emit_breadcrumb_sz = 8; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 7b7aaaf..9d228be 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -255,7 +255,8 @@ struct intel_engine_cs { #define I915_DISPATCH_SECURE BIT(0) #define I915_DISPATCH_PINNED BIT(1) #define I915_DISPATCH_RS BIT(2) - int (*emit_breadcrumb)(struct drm_i915_gem_request *req); + void (*emit_breadcrumb)(struct drm_i915_gem_request *req, + u32 *out); int emit_breadcrumb_sz; /* Pass the request to the hardware queue (e.g. directly into @@ -331,7 +332,7 @@ struct intel_engine_cs { /* AKA wait() */ int (*sync_to)(struct drm_i915_gem_request *req, struct drm_i915_gem_request *signal); - int (*signal)(struct drm_i915_gem_request *req); + u32 *(*signal)(struct drm_i915_gem_request *req, u32 *out); } semaphore; /* Execlists */ @@ -487,10 +488,11 @@ static inline void intel_ring_advance(struct intel_ring *ring) */ } -static inline u32 intel_ring_offset(struct intel_ring *ring, u32 value) +static inline u32 intel_ring_offset(struct intel_ring *ring, void *addr) { /* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */ - return value & (ring->size - 1); + u32 offset = addr - ring->vaddr; + return offset & (ring->size - 1); } int __intel_ring_space(int head, int tail, int size); -- cgit v1.1 From 85e17f5974b357bc4a127be09de71b430be265e0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:53 +0100 Subject: drm/i915: Move the global sync optimisation to the timeline Currently we try to reduce the number of synchronisations (now the number of requests we need to wait upon) by noting that if we have earlier waited upon a request, all subsequent requests in the timeline will be after the wait. This only applies to requests in this timeline, as other timelines will not be ordered by that waiter. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-30-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ------ drivers/gpu/drm/i915/i915_drv.h | 1 - drivers/gpu/drm/i915/i915_gem_request.c | 29 +++++++++++-------- drivers/gpu/drm/i915/i915_gem_timeline.h | 1 + drivers/gpu/drm/i915/i915_gpu_error.c | 48 +++++++++++++++++++------------- drivers/gpu/drm/i915/intel_engine_cs.c | 2 -- drivers/gpu/drm/i915/intel_ringbuffer.c | 3 -- drivers/gpu/drm/i915/intel_ringbuffer.h | 23 --------------- 8 files changed, 47 insertions(+), 69 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 90bc4a8..4655227 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3347,15 +3347,6 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); } - seq_puts(m, "\nSync seqno:\n"); - for_each_engine(engine, dev_priv, id) { - for (j = 0; j < num_rings; j++) - seq_printf(m, " 0x%08x ", - engine->semaphore.sync_seqno[j]); - seq_putc(m, '\n'); - } - seq_putc(m, '\n'); - intel_runtime_pm_put(dev_priv); mutex_unlock(&dev->struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 217674b..6bf40276a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -802,7 +802,6 @@ struct drm_i915_error_state { u32 cpu_ring_tail; u32 last_seqno; - u32 semaphore_seqno[I915_NUM_ENGINES - 1]; /* Register state */ u32 start; diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 06daa4d..9c34a4c 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -238,35 +238,41 @@ static int i915_gem_check_wedge(struct drm_i915_private *dev_priv) return 0; } -static int i915_gem_init_global_seqno(struct drm_i915_private *dev_priv, - u32 seqno) +static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) { - struct i915_gem_timeline *timeline = &dev_priv->gt.global_timeline; + struct i915_gem_timeline *timeline = &i915->gt.global_timeline; struct intel_engine_cs *engine; enum intel_engine_id id; int ret; /* Carefully retire all requests without writing to the rings */ - ret = i915_gem_wait_for_idle(dev_priv, + ret = i915_gem_wait_for_idle(i915, I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED); if (ret) return ret; - i915_gem_retire_requests(dev_priv); + i915_gem_retire_requests(i915); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ if (!i915_seqno_passed(seqno, timeline->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) + while (intel_kick_waiters(i915) || intel_kick_signalers(i915)) yield(); yield(); } /* Finally reset hw state */ - for_each_engine(engine, dev_priv, id) + for_each_engine(engine, i915, id) intel_engine_init_global_seqno(engine, seqno); + list_for_each_entry(timeline, &i915->gt.timelines, link) { + for_each_engine(engine, i915, id) { + struct intel_timeline *tl = &timeline->engine[id]; + + memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno)); + } + } + return 0; } @@ -462,7 +468,7 @@ static int i915_gem_request_await_request(struct drm_i915_gem_request *to, struct drm_i915_gem_request *from) { - int idx, ret; + int ret; GEM_BUG_ON(to == from); @@ -483,8 +489,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret < 0 ? ret : 0; } - idx = intel_engine_sync_index(from->engine, to->engine); - if (from->global_seqno <= from->engine->semaphore.sync_seqno[idx]) + if (from->global_seqno <= to->timeline->sync_seqno[from->engine->id]) return 0; trace_i915_gem_ring_sync_to(to, from); @@ -502,7 +507,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to, return ret; } - from->engine->semaphore.sync_seqno[idx] = from->global_seqno; + to->timeline->sync_seqno[from->engine->id] = from->global_seqno; return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index bfdf033..767b239 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -48,6 +48,7 @@ struct intel_timeline { * struct_mutex. */ struct i915_gem_active last_request; + u32 sync_seqno[I915_NUM_ENGINES]; struct i915_gem_timeline *common; }; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9aa197c..ef36981 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -415,17 +415,13 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, if (INTEL_GEN(m->i915) >= 6) { err_printf(m, " RC PSMI: 0x%08x\n", ee->rc_psmi); err_printf(m, " FAULT_REG: 0x%08x\n", ee->fault_reg); - err_printf(m, " SYNC_0: 0x%08x [last synced 0x%08x]\n", - ee->semaphore_mboxes[0], - ee->semaphore_seqno[0]); - err_printf(m, " SYNC_1: 0x%08x [last synced 0x%08x]\n", - ee->semaphore_mboxes[1], - ee->semaphore_seqno[1]); - if (HAS_VEBOX(m->i915)) { - err_printf(m, " SYNC_2: 0x%08x [last synced 0x%08x]\n", - ee->semaphore_mboxes[2], - ee->semaphore_seqno[2]); - } + err_printf(m, " SYNC_0: 0x%08x\n", + ee->semaphore_mboxes[0]); + err_printf(m, " SYNC_1: 0x%08x\n", + ee->semaphore_mboxes[1]); + if (HAS_VEBOX(m->i915)) + err_printf(m, " SYNC_2: 0x%08x\n", + ee->semaphore_mboxes[2]); } if (USES_PPGTT(m->i915)) { err_printf(m, " GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode); @@ -972,6 +968,26 @@ static void i915_gem_record_fences(struct drm_i915_private *dev_priv, } } +static inline u32 +gen8_engine_sync_index(struct intel_engine_cs *engine, + struct intel_engine_cs *other) +{ + int idx; + + /* + * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; + * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; + * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; + * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; + * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; + */ + + idx = (other - engine) - 1; + if (idx < 0) + idx += I915_NUM_ENGINES; + + return idx; +} static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *engine, @@ -995,10 +1011,9 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; tmp = error->semaphore->pages[0]; - idx = intel_engine_sync_index(engine, to); + idx = gen8_engine_sync_index(engine, to); ee->semaphore_mboxes[idx] = tmp[signal_offset]; - ee->semaphore_seqno[idx] = engine->semaphore.sync_seqno[idx]; } } @@ -1009,14 +1024,9 @@ static void gen6_record_semaphore_state(struct intel_engine_cs *engine, ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base)); ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base)); - ee->semaphore_seqno[0] = engine->semaphore.sync_seqno[0]; - ee->semaphore_seqno[1] = engine->semaphore.sync_seqno[1]; - - if (HAS_VEBOX(dev_priv)) { + if (HAS_VEBOX(dev_priv)) ee->semaphore_mboxes[2] = I915_READ(RING_SYNC_2(engine->mmio_base)); - ee->semaphore_seqno[2] = engine->semaphore.sync_seqno[2]; - } } static void error_record_engine_waiters(struct intel_engine_cs *engine, diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 6a31055..94de3d6 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -204,8 +204,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) I915_NUM_ENGINES * gen8_semaphore_seqno_size); kunmap(page); } - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); if (engine->irq_seqno_barrier) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1624477..188fdec 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2003,9 +2003,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) intel_engine_setup_common(engine); - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); - ret = intel_engine_init_common(engine); if (ret) goto error; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 9d228be..891629c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -314,8 +314,6 @@ struct intel_engine_cs { * ie. transpose of f(x, y) */ struct { - u32 sync_seqno[I915_NUM_ENGINES-1]; - union { #define GEN6_SEMAPHORE_LAST VECS_HW #define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) @@ -385,27 +383,6 @@ intel_engine_flag(const struct intel_engine_cs *engine) return 1 << engine->id; } -static inline u32 -intel_engine_sync_index(struct intel_engine_cs *engine, - struct intel_engine_cs *other) -{ - int idx; - - /* - * rcs -> 0 = vcs, 1 = bcs, 2 = vecs, 3 = vcs2; - * vcs -> 0 = bcs, 1 = vecs, 2 = vcs2, 3 = rcs; - * bcs -> 0 = vecs, 1 = vcs2. 2 = rcs, 3 = vcs; - * vecs -> 0 = vcs2, 1 = rcs, 2 = vcs, 3 = bcs; - * vcs2 -> 0 = rcs, 1 = vcs, 2 = bcs, 3 = vecs; - */ - - idx = (other->id - engine->id) - 1; - if (idx < 0) - idx += I915_NUM_ENGINES; - - return idx; -} - static inline void intel_flush_status_page(struct intel_engine_cs *engine, int reg) { -- cgit v1.1 From 562f5d45508bc376cf537d414cd944cff5e453f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:54 +0100 Subject: drm/i915: Create a unique name for the context This will be used for communicating issues with this context to userspace, so we want to identify the parent process and the individual context. Note that the name isn't quite unique, it makes the presumption of there only being a single device fd per process. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-31-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 11 ++--------- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_context.c | 23 ++++++++++++++++++----- 3 files changed, 21 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4655227..f8604a0 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -631,17 +631,10 @@ static void print_request(struct seq_file *m, struct drm_i915_gem_request *rq, const char *prefix) { - struct pid *pid = rq->ctx->pid; - struct task_struct *task; - - rcu_read_lock(); - task = pid ? pid_task(pid, PIDTYPE_PID) : NULL; - seq_printf(m, "%s%x [%x:%x] @ %d: %s [%d]\n", prefix, + seq_printf(m, "%s%x [%x:%x] @ %d: %s\n", prefix, rq->global_seqno, rq->ctx->hw_id, rq->fence.seqno, jiffies_to_msecs(jiffies - rq->emitted_jiffies), - task ? task->comm : "", - task ? task->pid : -1); - rcu_read_unlock(); + rq->timeline->common->name); } static int i915_gem_request_info(struct seq_file *m, void *data) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6bf40276a..8b987f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -937,6 +937,7 @@ struct i915_gem_context { struct drm_i915_file_private *file_priv; struct i915_hw_ppgtt *ppgtt; struct pid *pid; + const char *name; struct i915_ctx_hang_stats hang_stats; diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index a2acb8b..d3118db 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -158,6 +158,7 @@ void i915_gem_context_free(struct kref *ctx_ref) __i915_gem_object_release_unless_active(ce->state->obj); } + kfree(ctx->name); put_pid(ctx->pid); list_del(&ctx->link); @@ -303,19 +304,28 @@ __create_hw_context(struct drm_device *dev, } /* Default context will never have a file_priv */ - if (file_priv != NULL) { + ret = DEFAULT_CONTEXT_HANDLE; + if (file_priv) { ret = idr_alloc(&file_priv->context_idr, ctx, DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL); if (ret < 0) goto err_out; - } else - ret = DEFAULT_CONTEXT_HANDLE; + } + ctx->user_handle = ret; ctx->file_priv = file_priv; - if (file_priv) + if (file_priv) { ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x", + current->comm, + pid_nr(ctx->pid), + ctx->user_handle); + if (!ctx->name) { + ret = -ENOMEM; + goto err_pid; + } + } - ctx->user_handle = ret; /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there * is no remap info, it will be a NOP. */ @@ -329,6 +339,9 @@ __create_hw_context(struct drm_device *dev, return ctx; +err_pid: + put_pid(ctx->pid); + idr_remove(&file_priv->context_idr, ctx->user_handle); err_out: context_close(ctx); return ERR_PTR(ret); -- cgit v1.1 From f6168e330438a264123d2e0b502526f06594bb51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:55 +0100 Subject: drm/i915: Convert breadcrumbs spinlock to be irqsafe The breadcrumbs are about to be used from within IRQ context sections (e.g. nouveau signals a fence from an interrupt handler causing us to submit a new request) and/or from bottom-half tasklets (i.e. intel_lrc_irq_handler), therefore we need to employ the irqsafe spinlock variants. For example, deferring the request submission to the intel_lrc_irq_handler generates this trace: [ 66.388639] ================================= [ 66.388650] [ INFO: inconsistent lock state ] [ 66.388663] 4.9.0-rc2+ #56 Not tainted [ 66.388672] --------------------------------- [ 66.388682] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage. [ 66.388695] swapper/1/0 [HC0[0]:SC1[1]:HE0:SE0] takes: [ 66.388706] (&(&b->lock)->rlock){+.?...} , at: [] intel_engine_enable_signaling+0x78/0x150 [ 66.388761] {SOFTIRQ-ON-W} state was registered at: [ 66.388772] [ 66.388783] [] __lock_acquire+0x682/0x1870 [ 66.388795] [ 66.388803] [] lock_acquire+0x6c/0xb0 [ 66.388814] [ 66.388824] [] _raw_spin_lock+0x2a/0x40 [ 66.388835] [ 66.388845] [] intel_engine_reset_breadcrumbs+0x21/0xb0 [ 66.388857] [ 66.388866] [] gen8_init_common_ring+0x67/0x100 [ 66.388878] [ 66.388887] [] gen8_init_render_ring+0x12/0x60 [ 66.388903] [ 66.388912] [] i915_gem_init_hw+0xf7/0x2a0 [ 66.388927] [ 66.388936] [] i915_gem_init+0xbb/0xf0 [ 66.388950] [ 66.388959] [] i915_driver_load+0x7e0/0x1330 [ 66.388978] [ 66.388988] [] i915_pci_probe+0x28/0x40 [ 66.389003] [ 66.389013] [] pci_device_probe+0x8b/0xf0 [ 66.389028] [ 66.389037] [] driver_probe_device+0x21e/0x430 [ 66.389056] [ 66.389065] [] __driver_attach+0xde/0xe0 [ 66.389080] [ 66.389090] [] bus_for_each_dev+0x5d/0x90 [ 66.389105] [ 66.389113] [] driver_attach+0x19/0x20 [ 66.389134] [ 66.389144] [] bus_add_driver+0x15d/0x260 [ 66.389159] [ 66.389168] [] driver_register+0x5b/0xd0 [ 66.389183] [ 66.389281] [] __pci_register_driver+0x5b/0x60 [ 66.389301] [ 66.389312] [] i915_init+0x3e/0x45 [ 66.389326] [ 66.389336] [] do_one_initcall+0x8b/0x118 [ 66.389350] [ 66.389359] [] kernel_init_freeable+0x1b3/0x23b [ 66.389378] [ 66.389387] [] kernel_init+0x9/0x100 [ 66.389402] [ 66.389411] [] ret_from_fork+0x27/0x40 [ 66.389426] irq event stamp: 315865 [ 66.389438] hardirqs last enabled at (315864): [] _raw_spin_unlock_irqrestore+0x31/0x50 [ 66.389469] hardirqs last disabled at (315865): [] _raw_spin_lock_irqsave+0x13/0x50 [ 66.389499] softirqs last enabled at (315818): [] _local_bh_enable+0x1c/0x50 [ 66.389530] softirqs last disabled at (315819): [] irq_exit+0xbe/0xd0 [ 66.389559] [ 66.389559] other info that might help us debug this: [ 66.389580] Possible unsafe locking scenario: [ 66.389580] [ 66.389598] CPU0 [ 66.389609] ---- [ 66.389620] lock(&(&b->lock)->rlock); [ 66.389650] [ 66.389661] lock(&(&b->lock)->rlock); [ 66.389690] [ 66.389690] *** DEADLOCK *** [ 66.389690] [ 66.389715] 2 locks held by swapper/1/0: [ 66.389728] #0: (&(&tl->lock)->rlock){..-...}, at: [] intel_lrc_irq_handler+0x201/0x3c0 [ 66.389785] #1: (&(&req->lock)->rlock/1){..-...}, at: [] __i915_gem_request_submit+0x8f/0x170 [ 66.389854] [ 66.389854] stack backtrace: [ 66.389959] CPU: 1 PID: 0 Comm: swapper/1 Not tainted 4.9.0-rc2+ #56 [ 66.389976] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 66.389999] ffff88027fd03c58 ffffffff812beae5 ffff88027696e680 ffffffff822afe20 [ 66.390036] ffff88027fd03ca8 ffffffff810bb420 0000000000000001 0000000000000000 [ 66.390070] 0000000000000000 0000000000000006 0000000000000004 ffff88027696ee10 [ 66.390104] Call Trace: [ 66.390117] [ 66.390128] [] dump_stack+0x68/0x93 [ 66.390147] [] print_usage_bug+0x1d0/0x1e0 [ 66.390164] [] mark_lock+0x470/0x4f0 [ 66.390181] [] ? print_shortest_lock_dependencies+0x1b0/0x1b0 [ 66.390203] [] __lock_acquire+0x59d/0x1870 [ 66.390221] [] lock_acquire+0x6c/0xb0 [ 66.390237] [] ? lock_acquire+0x6c/0xb0 [ 66.390255] [] ? intel_engine_enable_signaling+0x78/0x150 [ 66.390273] [] _raw_spin_lock+0x2a/0x40 [ 66.390291] [] ? intel_engine_enable_signaling+0x78/0x150 [ 66.390309] [] intel_engine_enable_signaling+0x78/0x150 [ 66.390327] [] __i915_gem_request_submit+0x150/0x170 [ 66.390345] [] intel_lrc_irq_handler+0x28b/0x3c0 [ 66.390363] [] tasklet_action+0x57/0xc0 [ 66.390380] [] __do_softirq+0x119/0x240 [ 66.390396] [] irq_exit+0xbe/0xd0 [ 66.390414] [] do_IRQ+0x65/0x110 [ 66.390431] [] common_interrupt+0x86/0x86 [ 66.390446] [ 66.390457] [] ? cpuidle_enter_state+0x151/0x200 [ 66.390480] [] cpuidle_enter+0x12/0x20 [ 66.390498] [] call_cpuidle+0x1e/0x40 [ 66.390516] [] cpu_startup_entry+0x10e/0x1f0 [ 66.390534] [] start_secondary+0x103/0x130 (This is split out of the defer global seqno allocation patch due to realisation that we need a more complete conversion if we want to defer request submission even further.) v2: lockdep was warning about mixed SOFTIRQ contexts not HARDIRQ contexts so we only need to use spin_lock_bh and not disable interrupts. v3: We need full irq protection as we may be called from a third party interrupt handler (via fences). Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-32-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 12 +++++------ drivers/gpu/drm/i915/i915_gpu_error.c | 8 ++++---- drivers/gpu/drm/i915/intel_breadcrumbs.c | 35 ++++++++++++++++++++------------ drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +- 4 files changed, 33 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index f8604a0..1723a1f 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -683,14 +683,14 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "Waiting (%s): %s [%d] on %x\n", engine->name, w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static int i915_gem_seqno_info(struct seq_file *m, void *data) @@ -1355,14 +1355,14 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, &dev_priv->gpu_error.missed_irq_rings))); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, @@ -3265,14 +3265,14 @@ static int i915_engine_info(struct seq_file *m, void *unused) I915_READ(RING_PP_DIR_DCLV(engine))); } - spin_lock(&b->lock); + spin_lock_irq(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { struct intel_wait *w = container_of(rb, typeof(*w), node); seq_printf(m, "\t%s [%d] waiting for %x\n", w->tsk->comm, w->tsk->pid, w->seqno); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); seq_puts(m, "\n"); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index ef36981..7ba4048 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1043,7 +1043,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (RB_EMPTY_ROOT(&b->waiters)) return; - if (!spin_trylock(&b->lock)) { + if (!spin_trylock_irq(&b->lock)) { ee->waiters = ERR_PTR(-EDEADLK); return; } @@ -1051,7 +1051,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, count = 0; for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb)) count++; - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); waiter = NULL; if (count) @@ -1061,7 +1061,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (!waiter) return; - if (!spin_trylock(&b->lock)) { + if (!spin_trylock_irq(&b->lock)) { kfree(waiter); ee->waiters = ERR_PTR(-EDEADLK); return; @@ -1079,7 +1079,7 @@ static void error_record_engine_waiters(struct intel_engine_cs *engine, if (++ee->num_waiters == count) break; } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static void error_record_engine_registers(struct drm_i915_error_state *error, diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 0d5def0..c410d3d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -83,16 +83,18 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; - spin_lock_irq(&engine->i915->irq_lock); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); engine->irq_enable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock(&engine->i915->irq_lock); } static void irq_disable(struct intel_engine_cs *engine) { - spin_lock_irq(&engine->i915->irq_lock); + /* Caller disables interrupts */ + spin_lock(&engine->i915->irq_lock); engine->irq_disable(engine); - spin_unlock_irq(&engine->i915->irq_lock); + spin_unlock(&engine->i915->irq_lock); engine->breadcrumbs.irq_posted = false; } @@ -293,9 +295,9 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_breadcrumbs *b = &engine->breadcrumbs; bool first; - spin_lock(&b->lock); + spin_lock_irq(&b->lock); first = __intel_engine_add_wait(engine, wait); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); return first; } @@ -326,7 +328,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, if (RB_EMPTY_NODE(&wait->node)) return; - spin_lock(&b->lock); + spin_lock_irq(&b->lock); if (RB_EMPTY_NODE(&wait->node)) goto out_unlock; @@ -400,7 +402,7 @@ out_unlock: GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } static bool signal_complete(struct drm_i915_gem_request *request) @@ -473,14 +475,14 @@ static int intel_breadcrumbs_signaler(void *arg) * we just completed - so double check we are still * the oldest before picking the next one. */ - spin_lock(&b->lock); + spin_lock_irq(&b->lock); if (request == b->first_signal) { struct rb_node *rb = rb_next(&request->signaling.node); b->first_signal = rb ? to_signaler(rb) : NULL; } rb_erase(&request->signaling.node, &b->signals); - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); i915_gem_request_put(request); } else { @@ -502,7 +504,14 @@ void intel_engine_enable_signaling(struct drm_i915_gem_request *request) struct rb_node *parent, **p; bool first, wakeup; - /* locked by dma_fence_enable_sw_signaling() */ + /* Note that we may be called from an interrupt handler on another + * device (e.g. nouveau signaling a fence completion causing us + * to submit a request, and so enable signaling). As such, + * we need to make sure that all other users of b->lock protect + * against interrupts, i.e. use spin_lock_irqsave. + */ + + /* locked by dma_fence_enable_sw_signaling() (irqsafe fence->lock) */ assert_spin_locked(&request->lock); if (!request->global_seqno) return; @@ -594,7 +603,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; cancel_fake_irq(engine); - spin_lock(&b->lock); + spin_lock_irq(&b->lock); __intel_breadcrumbs_disable_irq(b); if (intel_engine_has_waiter(engine)) { @@ -607,7 +616,7 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) irq_disable(engine); } - spin_unlock(&b->lock); + spin_unlock_irq(&b->lock); } void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 891629c..d16c74a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -207,7 +207,7 @@ struct intel_engine_cs { struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ bool irq_posted; - spinlock_t lock; /* protects the lists of requests */ + spinlock_t lock; /* protects the lists of requests; irqsafe */ struct rb_root waiters; /* sorted by retirement, priority */ struct rb_root signals; /* sorted by retirement */ struct intel_wait *first_wait; /* oldest waiter by retirement */ -- cgit v1.1 From 28176ef4cfa510e5f1498bbf39ff1e4afd0b085d Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:56 +0100 Subject: drm/i915: Reserve space in the global seqno during request allocation A restriction on our global seqno is that they cannot wrap, and that we cannot use the value 0. This allows us to detect when a request has not yet been submitted, its global seqno is still 0, and ensures that hardware semaphores are monotonic as required by older hardware. To meet these restrictions when we defer the assignment of the global seqno, we must check that we have an available slot in the global seqno space during request construction. If that test fails, we wait for all requests to be completed and reset the hardware back to 0. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-33-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 10 ++-- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 7 ++- drivers/gpu/drm/i915/i915_gem_request.c | 86 +++++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_timeline.h | 2 +- 5 files changed, 55 insertions(+), 52 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1723a1f..9bef6f5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -552,7 +552,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, void *data) seq_printf(m, "Flip queued on %s at seqno %x, next seqno %x [current breadcrumb %x], completed? %d\n", engine->name, i915_gem_request_get_seqno(work->flip_queued_req), - dev_priv->gt.global_timeline.next_seqno, + atomic_read(&dev_priv->gt.global_timeline.next_seqno), intel_engine_get_seqno(engine), i915_gem_request_completed(work->flip_queued_req)); } else @@ -1046,7 +1046,7 @@ i915_next_seqno_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - *val = READ_ONCE(dev_priv->gt.global_timeline.next_seqno); + *val = atomic_read(&dev_priv->gt.global_timeline.next_seqno); return 0; } @@ -2277,8 +2277,8 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) struct drm_file *file; seq_printf(m, "RPS enabled? %d\n", dev_priv->rps.enabled); - seq_printf(m, "GPU busy? %s [%x]\n", - yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); + seq_printf(m, "GPU busy? %s [%d requests]\n", + yesno(dev_priv->gt.awake), dev_priv->gt.active_requests); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); seq_printf(m, "Frequency requested %d\n", intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); @@ -2313,7 +2313,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) if (INTEL_GEN(dev_priv) >= 6 && dev_priv->rps.enabled && - dev_priv->gt.active_engines) { + dev_priv->gt.active_requests) { u32 rpup, rpupei; u32 rpdown, rpdownei; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8b987f7..eacb144 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2092,6 +2092,7 @@ struct drm_i915_private { struct list_head timelines; struct i915_gem_timeline global_timeline; + u32 active_requests; /** * Is the GPU currently considered idle, or busy executing @@ -2100,7 +2101,6 @@ struct drm_i915_private { * In order to reduce the effect on performance, there * is a slight delay before we do so. */ - unsigned int active_engines; bool awake; /** diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4cfb88..8a5d207 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2688,8 +2688,6 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) memset(engine->execlist_port, 0, sizeof(engine->execlist_port)); spin_unlock(&engine->execlist_lock); } - - engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_set_wedged(struct drm_i915_private *dev_priv) @@ -2746,7 +2744,7 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; - if (READ_ONCE(dev_priv->gt.active_engines)) + if (READ_ONCE(dev_priv->gt.active_requests)) return; rearm_hangcheck = @@ -2760,7 +2758,7 @@ i915_gem_idle_work_handler(struct work_struct *work) goto out_rearm; } - if (dev_priv->gt.active_engines) + if (dev_priv->gt.active_requests) goto out_unlock; for_each_engine(engine, dev_priv, id) @@ -4399,6 +4397,7 @@ int i915_gem_suspend(struct drm_device *dev) goto err; i915_gem_retire_requests(dev_priv); + GEM_BUG_ON(dev_priv->gt.active_requests); assert_kernel_context_is_current(dev_priv); i915_gem_context_lost(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9c34a4c..9b22f66 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -159,6 +159,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) */ list_del(&request->ring_link); request->ring->last_retired_head = request->postfix; + request->i915->gt.active_requests--; /* Walk through the active list, calling retire on each. This allows * objects to track their GPU activity and mark themselves as idle @@ -253,13 +254,15 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) return ret; i915_gem_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests > 1); /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, timeline->next_seqno)) { + if (!i915_seqno_passed(seqno, atomic_read(&timeline->next_seqno))) { while (intel_kick_waiters(i915) || intel_kick_signalers(i915)) yield(); yield(); } + atomic_set(&timeline->next_seqno, seqno); /* Finally reset hw state */ for_each_engine(engine, i915, id) @@ -279,7 +282,6 @@ static int i915_gem_init_global_seqno(struct drm_i915_private *i915, u32 seqno) int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) { struct drm_i915_private *dev_priv = to_i915(dev); - int ret; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -289,34 +291,33 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) /* HWS page needs to be set less than what we * will inject to ring */ - ret = i915_gem_init_global_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - dev_priv->gt.global_timeline.next_seqno = seqno; - return 0; + return i915_gem_init_global_seqno(dev_priv, seqno - 1); } -static int i915_gem_get_global_seqno(struct drm_i915_private *dev_priv, - u32 *seqno) +static int reserve_global_seqno(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl = &dev_priv->gt.global_timeline; - - /* reserve 0 for non-seqno */ - if (unlikely(tl->next_seqno == 0)) { - int ret; + u32 active_requests = ++i915->gt.active_requests; + u32 next_seqno = atomic_read(&i915->gt.global_timeline.next_seqno); + int ret; - ret = i915_gem_init_global_seqno(dev_priv, 0); - if (ret) - return ret; + /* Reservation is fine until we need to wrap around */ + if (likely(next_seqno + active_requests > next_seqno)) + return 0; - tl->next_seqno = 1; + ret = i915_gem_init_global_seqno(i915, 0); + if (ret) { + i915->gt.active_requests--; + return ret; } - *seqno = tl->next_seqno++; return 0; } +static u32 timeline_get_seqno(struct i915_gem_timeline *tl) +{ + return atomic_inc_return(&tl->next_seqno); +} + static int __i915_sw_fence_call submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) { @@ -356,9 +357,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, { struct drm_i915_private *dev_priv = engine->i915; struct drm_i915_gem_request *req; - u32 seqno; int ret; + lockdep_assert_held(&dev_priv->drm.struct_mutex); + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex * and restart. @@ -367,6 +369,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) return ERR_PTR(ret); + ret = reserve_global_seqno(dev_priv); + if (ret) + return ERR_PTR(ret); + /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); @@ -402,12 +408,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, * Do not use kmem_cache_zalloc() here! */ req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - ret = i915_gem_get_global_seqno(dev_priv, &seqno); - if (ret) - goto err; + if (!req) { + ret = -ENOMEM; + goto err_unreserve; + } req->timeline = engine->timeline; @@ -416,14 +420,14 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, &i915_fence_ops, &req->lock, req->timeline->fence_context, - seqno); + timeline_get_seqno(req->timeline->common)); i915_sw_fence_init(&req->submit, submit_notify); INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->global_seqno = seqno; + req->global_seqno = req->fence.seqno; req->ctx = i915_gem_context_get(ctx); /* No zalloc, must clear what we need by hand */ @@ -459,8 +463,9 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, err_ctx: i915_gem_context_put(ctx); -err: kmem_cache_free(dev_priv->requests, req); +err_unreserve: + dev_priv->gt.active_requests--; return ERR_PTR(ret); } @@ -624,7 +629,6 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - dev_priv->gt.active_engines |= intel_engine_flag(engine); if (dev_priv->gt.awake) return; @@ -700,6 +704,9 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, + request->fence.seqno)); + request->emitted_jiffies = jiffies; request->previous_seqno = timeline->last_pending_seqno; timeline->last_pending_seqno = request->fence.seqno; @@ -962,38 +969,35 @@ complete: return timeout; } -static bool engine_retire_requests(struct intel_engine_cs *engine) +static void engine_retire_requests(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request, *next; list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { if (!i915_gem_request_completed(request)) - return false; + return; i915_gem_request_retire(request); } - - return true; } void i915_gem_retire_requests(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - unsigned int tmp; + enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); - if (dev_priv->gt.active_engines == 0) + if (!dev_priv->gt.active_requests) return; GEM_BUG_ON(!dev_priv->gt.awake); - for_each_engine_masked(engine, dev_priv, dev_priv->gt.active_engines, tmp) - if (engine_retire_requests(engine)) - dev_priv->gt.active_engines &= ~intel_engine_flag(engine); + for_each_engine(engine, dev_priv, id) + engine_retire_requests(engine); - if (dev_priv->gt.active_engines == 0) + if (!dev_priv->gt.active_requests) queue_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, msecs_to_jiffies(100)); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 767b239..18e6039 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -55,7 +55,7 @@ struct intel_timeline { struct i915_gem_timeline { struct list_head link; - u32 next_seqno; + atomic_t next_seqno; struct drm_i915_private *i915; const char *name; -- cgit v1.1 From f2d13290e3275df34c0cd625fbc665965af08c67 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:57 +0100 Subject: drm/i915: Defer setting of global seqno on request to submission Defer the assignment of the global seqno on a request to its submission. In the next patch, we will only allocate the global seqno at that time, here we are just enabling the wait-for-submission before wait-for-seqno paths. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-34-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_request.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 9b22f66..7499e3b 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -324,14 +324,32 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) struct drm_i915_gem_request *request = container_of(fence, typeof(*request), submit); struct intel_engine_cs *engine = request->engine; + struct intel_timeline *timeline; + u32 seqno; if (state != FENCE_COMPLETE) return NOTIFY_DONE; /* Will be called from irq-context when using foreign DMA fences */ - engine->timeline->last_submitted_seqno = request->fence.seqno; + timeline = request->timeline; + seqno = request->fence.seqno; + GEM_BUG_ON(!seqno); + GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); + + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, seqno)); + request->previous_seqno = timeline->last_submitted_seqno; + timeline->last_submitted_seqno = seqno; + + /* We may be recursing from the signal callback of another i915 fence */ + spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); + request->global_seqno = seqno; + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) + intel_engine_enable_signaling(request); + spin_unlock(&request->lock); + + GEM_BUG_ON(!request->global_seqno); engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); engine->submit_request(request); @@ -427,10 +445,10 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, INIT_LIST_HEAD(&req->active_list); req->i915 = dev_priv; req->engine = engine; - req->global_seqno = req->fence.seqno; req->ctx = i915_gem_context_get(ctx); /* No zalloc, must clear what we need by hand */ + req->global_seqno = 0; req->previous_context = NULL; req->file_priv = NULL; req->batch = NULL; @@ -704,15 +722,13 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); - GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, - request->fence.seqno)); + list_add_tail(&request->link, &timeline->requests); - request->emitted_jiffies = jiffies; - request->previous_seqno = timeline->last_pending_seqno; timeline->last_pending_seqno = request->fence.seqno; i915_gem_active_set(&timeline->last_request, request); - list_add_tail(&request->link, &timeline->requests); + list_add_tail(&request->ring_link, &ring->request_list); + request->emitted_jiffies = jiffies; i915_gem_mark_busy(engine); -- cgit v1.1 From 80b204bce8f27b52cd65839e0e6144b4452ae3de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Oct 2016 13:58:58 +0100 Subject: drm/i915: Enable multiple timelines With the infrastructure converted over to tracking multiple timelines in the GEM API whilst preserving the efficiency of using a single execution timeline internally, we can now assign a separate timeline to every context with full-ppgtt. v2: Add a comment to indicate the xfer between timelines upon submission. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161028125858.23563-35-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 10 ++++++ drivers/gpu/drm/i915/i915_gem.c | 10 +++--- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_evict.c | 11 +++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 19 ++++++---- drivers/gpu/drm/i915/i915_gem_gtt.h | 4 ++- drivers/gpu/drm/i915/i915_gem_request.c | 61 +++++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_timeline.c | 1 + drivers/gpu/drm/i915/i915_gem_timeline.h | 3 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 5 --- 10 files changed, 77 insertions(+), 51 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eacb144..42a4996 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3549,6 +3549,16 @@ static inline void i915_gem_context_put(struct i915_gem_context *ctx) kref_put(&ctx->ref, i915_gem_context_free); } +static inline struct intel_timeline * +i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct i915_address_space *vm; + + vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; + return &vm->timeline.engine[engine->id]; +} + static inline bool i915_gem_context_is_default(const struct i915_gem_context *c) { return c->user_handle == DEFAULT_CONTEXT_HANDLE; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8a5d207..1e5d2bf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2564,12 +2564,9 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * not need an engine->irq_seqno_barrier() before the seqno reads. */ list_for_each_entry(request, &engine->timeline->requests, link) { - if (i915_gem_request_completed(request)) + if (__i915_gem_request_completed(request)) continue; - if (!i915_sw_fence_done(&request->submit)) - break; - return request; } @@ -2597,6 +2594,7 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) { struct drm_i915_gem_request *request; struct i915_gem_context *incomplete_ctx; + struct intel_timeline *timeline; bool ring_hung; if (engine->irq_seqno_barrier) @@ -2635,6 +2633,10 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine) list_for_each_entry_continue(request, &engine->timeline->requests, link) if (request->ctx == incomplete_ctx) reset_request(request); + + timeline = i915_gem_context_lookup_timeline(incomplete_ctx, engine); + list_for_each_entry(request, &timeline->requests, link) + reset_request(request); } void i915_gem_reset(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index d3118db..461aece 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -365,9 +365,9 @@ i915_gem_create_context(struct drm_device *dev, return ctx; if (USES_FULL_PPGTT(dev)) { - struct i915_hw_ppgtt *ppgtt = - i915_ppgtt_create(to_i915(dev), file_priv); + struct i915_hw_ppgtt *ppgtt; + ppgtt = i915_ppgtt_create(to_i915(dev), file_priv, ctx->name); if (IS_ERR(ppgtt)) { DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n", PTR_ERR(ppgtt)); diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index 79b9641..bd08814 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -33,14 +33,17 @@ #include "intel_drv.h" #include "i915_trace.h" -static bool -gpu_is_idle(struct drm_i915_private *dev_priv) +static bool ggtt_is_idle(struct drm_i915_private *dev_priv) { + struct i915_ggtt *ggtt = &dev_priv->ggtt; struct intel_engine_cs *engine; enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - if (intel_engine_is_active(engine)) + struct intel_timeline *tl; + + tl = &ggtt->base.timeline.engine[engine->id]; + if (i915_gem_active_isset(&tl->last_request)) return false; } @@ -154,7 +157,7 @@ search_again: if (!i915_is_ggtt(vm) || flags & PIN_NONBLOCK) return -ENOSPC; - if (gpu_is_idle(dev_priv)) { + if (ggtt_is_idle(dev_priv)) { /* If we still have pending pageflip completions, drop * back to userspace to give our workqueues time to * acquire our locks and unpin the old scanouts. diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 1b1a459..e7afad5 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2185,8 +2185,10 @@ static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt, } static void i915_address_space_init(struct i915_address_space *vm, - struct drm_i915_private *dev_priv) + struct drm_i915_private *dev_priv, + const char *name) { + i915_gem_timeline_init(dev_priv, &vm->timeline, name); drm_mm_init(&vm->mm, vm->start, vm->total); INIT_LIST_HEAD(&vm->active_list); INIT_LIST_HEAD(&vm->inactive_list); @@ -2215,14 +2217,15 @@ static void gtt_write_workarounds(struct drm_device *dev) static int i915_ppgtt_init(struct i915_hw_ppgtt *ppgtt, struct drm_i915_private *dev_priv, - struct drm_i915_file_private *file_priv) + struct drm_i915_file_private *file_priv, + const char *name) { int ret; ret = __hw_ppgtt_init(ppgtt, dev_priv); if (ret == 0) { kref_init(&ppgtt->ref); - i915_address_space_init(&ppgtt->base, dev_priv); + i915_address_space_init(&ppgtt->base, dev_priv, name); ppgtt->base.file = file_priv; } @@ -2258,7 +2261,8 @@ int i915_ppgtt_init_hw(struct drm_device *dev) struct i915_hw_ppgtt * i915_ppgtt_create(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *fpriv) + struct drm_i915_file_private *fpriv, + const char *name) { struct i915_hw_ppgtt *ppgtt; int ret; @@ -2267,7 +2271,7 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, if (!ppgtt) return ERR_PTR(-ENOMEM); - ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv); + ret = i915_ppgtt_init(ppgtt, dev_priv, fpriv, name); if (ret) { kfree(ppgtt); return ERR_PTR(ret); @@ -2290,6 +2294,7 @@ void i915_ppgtt_release(struct kref *kref) WARN_ON(!list_empty(&ppgtt->base.inactive_list)); WARN_ON(!list_empty(&ppgtt->base.unbound_list)); + i915_gem_timeline_fini(&ppgtt->base.timeline); list_del(&ppgtt->base.global_link); drm_mm_takedown(&ppgtt->base.mm); @@ -3232,11 +3237,13 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) /* Subtract the guard page before address space initialization to * shrink the range used by drm_mm. */ + mutex_lock(&dev_priv->drm.struct_mutex); ggtt->base.total -= PAGE_SIZE; - i915_address_space_init(&ggtt->base, dev_priv); + i915_address_space_init(&ggtt->base, dev_priv, "[global]"); ggtt->base.total += PAGE_SIZE; if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; + mutex_unlock(&dev_priv->drm.struct_mutex); if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, dev_priv->ggtt.mappable_base, diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 9f0327e..518e75b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -342,6 +342,7 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; + struct i915_gem_timeline timeline; struct drm_device *dev; /* Every address space belongs to a struct file - except for the global * GTT that is owned by the driver (and so @file is set to NULL). In @@ -613,7 +614,8 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv); int i915_ppgtt_init_hw(struct drm_device *dev); void i915_ppgtt_release(struct kref *kref); struct i915_hw_ppgtt *i915_ppgtt_create(struct drm_i915_private *dev_priv, - struct drm_i915_file_private *fpriv); + struct drm_i915_file_private *fpriv, + const char *name); static inline void i915_ppgtt_get(struct i915_hw_ppgtt *ppgtt) { if (ppgtt) diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 7499e3b..79b0046 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -34,12 +34,6 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { - /* Timelines are bound by eviction to a VM. However, since - * we only have a global seqno at the moment, we only have - * a single timeline. Note that each timeline will have - * multiple execution contexts (fence contexts) as we allow - * engines within a single timeline to execute in parallel. - */ return to_request(fence)->timeline->common->name; } @@ -64,18 +58,6 @@ static signed long i915_fence_wait(struct dma_fence *fence, return i915_wait_request(to_request(fence), interruptible, timeout); } -static void i915_fence_value_str(struct dma_fence *fence, char *str, int size) -{ - snprintf(str, size, "%u", fence->seqno); -} - -static void i915_fence_timeline_value_str(struct dma_fence *fence, char *str, - int size) -{ - snprintf(str, size, "%u", - intel_engine_get_seqno(to_request(fence)->engine)); -} - static void i915_fence_release(struct dma_fence *fence) { struct drm_i915_gem_request *req = to_request(fence); @@ -90,8 +72,6 @@ const struct dma_fence_ops i915_fence_ops = { .signaled = i915_fence_signaled, .wait = i915_fence_wait, .release = i915_fence_release, - .fence_value_str = i915_fence_value_str, - .timeline_value_str = i915_fence_timeline_value_str, }; int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, @@ -147,7 +127,10 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request) GEM_BUG_ON(!i915_gem_request_completed(request)); trace_i915_gem_request_retire(request); + + spin_lock_irq(&request->engine->timeline->lock); list_del_init(&request->link); + spin_unlock_irq(&request->engine->timeline->lock); /* We know the GPU must have read the request to have * sent us the seqno + interrupt, so use the position @@ -313,6 +296,12 @@ static int reserve_global_seqno(struct drm_i915_private *i915) return 0; } +static u32 __timeline_get_seqno(struct i915_gem_timeline *tl) +{ + /* next_seqno only incremented under a mutex */ + return ++tl->next_seqno.counter; +} + static u32 timeline_get_seqno(struct i915_gem_timeline *tl) { return atomic_inc_return(&tl->next_seqno); @@ -325,16 +314,20 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) container_of(fence, typeof(*request), submit); struct intel_engine_cs *engine = request->engine; struct intel_timeline *timeline; + unsigned long flags; u32 seqno; if (state != FENCE_COMPLETE) return NOTIFY_DONE; - /* Will be called from irq-context when using foreign DMA fences */ + /* Transfer from per-context onto the global per-engine timeline */ + timeline = engine->timeline; + GEM_BUG_ON(timeline == request->timeline); - timeline = request->timeline; + /* Will be called from irq-context when using foreign DMA fences */ + spin_lock_irqsave(&timeline->lock, flags); - seqno = request->fence.seqno; + seqno = timeline_get_seqno(timeline->common); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -354,6 +347,12 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state) request->ring->vaddr + request->postfix); engine->submit_request(request); + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); + + spin_unlock_irqrestore(&timeline->lock, flags); + return NOTIFY_DONE; } @@ -394,7 +393,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, /* Move the oldest request to the slab-cache (if not in use!) */ req = list_first_entry_or_null(&engine->timeline->requests, typeof(*req), link); - if (req && i915_gem_request_completed(req)) + if (req && __i915_gem_request_completed(req)) i915_gem_request_retire(req); /* Beware: Dragons be flying overhead. @@ -431,14 +430,15 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, goto err_unreserve; } - req->timeline = engine->timeline; + req->timeline = i915_gem_context_lookup_timeline(ctx, engine); + GEM_BUG_ON(req->timeline == engine->timeline); spin_lock_init(&req->lock); dma_fence_init(&req->fence, &i915_fence_ops, &req->lock, req->timeline->fence_context, - timeline_get_seqno(req->timeline->common)); + __timeline_get_seqno(req->timeline->common)); i915_sw_fence_init(&req->submit, submit_notify); @@ -722,9 +722,14 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); + spin_lock_irq(&timeline->lock); list_add_tail(&request->link, &timeline->requests); + spin_unlock_irq(&timeline->lock); + + GEM_BUG_ON(i915_seqno_passed(timeline->last_submitted_seqno, + request->fence.seqno)); - timeline->last_pending_seqno = request->fence.seqno; + timeline->last_submitted_seqno = request->fence.seqno; i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); @@ -991,7 +996,7 @@ static void engine_retire_requests(struct intel_engine_cs *engine) list_for_each_entry_safe(request, next, &engine->timeline->requests, link) { - if (!i915_gem_request_completed(request)) + if (!__i915_gem_request_completed(request)) return; i915_gem_request_retire(request); diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c index a1bd03d..fc8f13a 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/i915_gem_timeline.c @@ -48,6 +48,7 @@ int i915_gem_timeline_init(struct drm_i915_private *i915, tl->fence_context = fences++; tl->common = timeline; + spin_lock_init(&tl->lock); init_request_active(&tl->last_request, NULL); INIT_LIST_HEAD(&tl->requests); } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_gem_timeline.h index 18e6039..f2bf7b1 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_gem_timeline.h @@ -34,7 +34,8 @@ struct i915_gem_timeline; struct intel_timeline { u64 fence_context; u32 last_submitted_seqno; - u32 last_pending_seqno; + + spinlock_t lock; /** * List of breadcrumbs associated with GPU requests currently diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index d16c74a..43f61aa 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -569,9 +569,4 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); -static inline bool intel_engine_is_active(struct intel_engine_cs *engine) -{ - return i915_gem_active_isset(&engine->timeline->last_request); -} - #endif /* _INTEL_RINGBUFFER_H_ */ -- cgit v1.1 From b64b540931483cca3200d98756bed6ad0e01d75c Mon Sep 17 00:00:00 2001 From: Lyude Date: Wed, 26 Oct 2016 12:36:09 -0400 Subject: drm/i915/vlv: Prevent enabling hpd polling in late suspend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of the CI machines began to run into issues with the hpd poller suddenly waking up in the midst of the late suspend phase. It looks like this is getting caused by the fact we now deinitialize power wells in late suspend, which means that intel_hpd_poll_init() gets called in late suspend causing polling to get re-enabled. So, when deinitializing power wells on valleyview we now refrain from enabling polling in the midst of suspend. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98040 Fixes: 19625e85c6ec ("drm/i915: Enable polling when we don't have hpd") Signed-off-by: Lyude Cc: Ville Syrjälä Cc: Jani Saarinen Cc: Petry Latvala Reviewed-by: Ville Syrjälä Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1477499769-1966-1-git-send-email-lyude@redhat.com --- drivers/gpu/drm/i915/intel_runtime_pm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 95034a0..9a3a745 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -1113,7 +1113,9 @@ static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) intel_power_sequencer_reset(dev_priv); - intel_hpd_poll_init(dev_priv); + /* Prevent us from re-enabling polling on accident in late suspend */ + if (!dev_priv->drm.dev->power.is_suspended) + intel_hpd_poll_init(dev_priv); } static void vlv_display_power_well_enable(struct drm_i915_private *dev_priv, -- cgit v1.1 From 7b7a119e8546e27227a7969883a3c34ed7dbb0cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 31 Oct 2016 12:40:48 +0000 Subject: drm/i915: Mark up obj->mm.lock for shrinker As we may allocate from within the obj->mm.lock we may enter the shrinker for direct reclaim. Operating on the current object is prevented by checking for obj->mm.pages (which is only set as the last operation in the allocation path). However, we need to identify the single recursion of accessing another object's obj->mm.lock as the two locks have identical class and so appear to be the same to lockdep, convincing it that a deadlock is possible. Use mutex_lock_nested() to remove the false positive. [ 2165.945734] ================================= [ 2165.945749] [ INFO: inconsistent lock state ] [ 2165.945765] 4.9.0-rc2+ #2 Tainted: G W [ 2165.945781] --------------------------------- [ 2165.945796] inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. [ 2165.945816] kswapd0/62 [HC0[0]:SC0[0]:HE1:SE1] takes: (&obj->mm.lock){+.+.?.}, at: [] i915_gem_shrink+0x29f/0x500 [i915] [ 2165.945904] {RECLAIM_FS-ON-W} state was registered at: [ 2165.945931] [] mark_held_locks+0x6f/0xa0 [ 2165.945956] [] lockdep_trace_alloc+0x69/0xc0 [ 2165.945982] [] kmem_cache_alloc_trace+0x33/0x2a0 [ 2165.946019] [] i915_gem_object_get_pages_stolen+0x6a/0xd0 [i915] [ 2165.946060] [] ____i915_gem_object_get_pages+0x20/0x60 [i915] [ 2165.946098] [] __i915_gem_object_get_pages+0x58/0x70 [i915] [ 2165.946138] [] _i915_gem_object_create_stolen+0xec/0x120 [i915] [ 2165.946177] [] i915_gem_object_create_stolen_for_preallocated+0xf3/0x3f0 [i915] [ 2165.946222] [] intel_alloc_initial_plane_obj.isra.125+0xd3/0x200 [i915] [ 2165.946266] [] intel_modeset_init+0x931/0x1530 [i915] [ 2165.946301] [] i915_driver_load+0xa14/0x14a0 [i915] [ 2165.946335] [] i915_pci_probe+0x4f/0x70 [i915] [ 2165.946362] [] local_pci_probe+0x42/0xa0 [ 2165.946386] [] pci_device_probe+0x103/0x150 [ 2165.946411] [] driver_probe_device+0x223/0x430 [ 2165.946436] [] __driver_attach+0xe3/0xf0 [ 2165.946461] [] bus_for_each_dev+0x73/0xc0 [ 2165.946485] [] driver_attach+0x1e/0x20 [ 2165.946508] [] bus_add_driver+0x173/0x270 [ 2165.946533] [] driver_register+0x60/0xe0 [ 2165.946557] [] __pci_register_driver+0x5d/0x60 [ 2165.946606] [] soundcore_open+0x17/0x230 [soundcore] [ 2165.946636] [] do_one_initcall+0x50/0x180 [ 2165.946661] [] do_init_module+0x5f/0x1f1 [ 2165.946685] [] load_module+0x2174/0x2a80 [ 2165.946709] [] SYSC_finit_module+0xdf/0x110 [ 2165.946734] [] SyS_finit_module+0xe/0x10 [ 2165.946758] [] entry_SYSCALL_64_fastpath+0x18/0xad [ 2165.946776] irq event stamp: 90871 [ 2165.946788] hardirqs last enabled at (90871): [ 2165.946805] [] __mutex_unlock_slowpath+0x11a/0x1c0 [ 2165.946823] hardirqs last disabled at (90870): [ 2165.946839] [] __mutex_unlock_slowpath+0x5b/0x1c0 [ 2165.946856] softirqs last enabled at (90858): [ 2165.946872] [] __do_softirq+0x39a/0x4c6 [ 2165.946887] softirqs last disabled at (90671): [ 2165.946902] [] irq_exit+0xea/0xf0 [ 2165.946916] other info that might help us debug this: [ 2165.946936] Possible unsafe locking scenario: [ 2165.946955] CPU0 [ 2165.946965] ---- [ 2165.946975] lock(&obj->mm.lock); [ 2165.947000] [ 2165.947010] lock(&obj->mm.lock); [ 2165.947035] *** DEADLOCK *** [ 2165.947054] 2 locks held by kswapd0/62: [ 2165.947067] #0: (shrinker_rwsem){++++..}, at: [] shrink_slab.part.40+0x5e/0x5d0 [ 2165.947120] #1: (&dev->struct_mutex){+.+.+.}, at: [] i915_gem_shrinker_lock+0x1b/0x60 [i915] [ 2165.948909] stack backtrace: [ 2165.950650] CPU: 2 PID: 62 Comm: kswapd0 Tainted: G W 4.9.0-rc2+ #2 [ 2165.951587] Hardware name: LENOVO 80MX/Lenovo E31-80, BIOS DCCN34WW(V2.03) 12/01/2015 [ 2165.952484] ffffc90000b5f8c8 ffffffffb137f645 ffff88016c5a2700 ffffffffb25f20a0 [ 2165.953395] ffffc90000b5f918 ffffffffb10bcecd 0000000000000000 ffff880100000001 [ 2165.954305] 0000000000000001 000000000000000a ffff88016c5a2fd0 ffff88016c5a2700 [ 2165.955240] Call Trace: [ 2165.956170] [] dump_stack+0x68/0x93 [ 2165.957071] [] print_usage_bug+0x1dd/0x1f0 [ 2165.957979] [] mark_lock+0x559/0x5c0 [ 2165.958875] [] ? print_shortest_lock_dependencies+0x1b0/0x1b0 [ 2165.959829] [] __lock_acquire+0x66d/0x12a0 [ 2165.960729] [] ? __slab_free+0xa1/0x340 [ 2165.961625] [] ? debug_lockdep_rcu_enabled+0x1d/0x20 [ 2165.962530] [] ? mark_held_locks+0x6f/0xa0 [ 2165.963457] [] lock_acquire+0xf0/0x1f0 [ 2165.964368] [] ? i915_gem_shrink+0x29f/0x500 [i915] [ 2165.965269] [] ? i915_gem_shrink+0x29f/0x500 [i915] [ 2165.966150] [] mutex_lock_nested+0x77/0x420 [ 2165.967030] [] ? i915_gem_shrink+0x29f/0x500 [i915] [ 2165.967952] [] ? __i915_gem_object_put_pages.part.58+0x161/0x1b0 [i915] [ 2165.968835] [] i915_gem_shrink+0x29f/0x500 [i915] [ 2165.969712] [] i915_gem_shrinker_scan+0x70/0xb0 [i915] [ 2165.970591] [] shrink_slab.part.40+0x1fe/0x5d0 [ 2165.971504] [] shrink_node+0x22c/0x320 [ 2165.972371] [] kswapd+0x38b/0x9b0 [ 2165.973238] [] ? mem_cgroup_shrink_node+0x330/0x330 [ 2165.974068] [] kthread+0xff/0x120 [ 2165.974929] [] ? kthread_park+0x60/0x60 [ 2165.975847] [] ret_from_fork+0x27/0x40 Reported-by: Tvrtko Ursulin Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation...") Testcase: igt/gem_ctx_create/maximum-swap Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161031124048.30355-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0241658..0daa09ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -223,7 +223,9 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, continue; if (unsafe_drop_pages(obj)) { - mutex_lock(&obj->mm.lock); + /* May arrive from get_pages on another bo */ + mutex_lock_nested(&obj->mm.lock, + SINGLE_DEPTH_NESTING); if (!obj->mm.pages) { __i915_gem_object_invalidate(obj); count += obj->base.size >> PAGE_SHIFT; -- cgit v1.1 From 33edc24d12c42710e23f1c2cef434937af73e245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Oct 2016 18:58:00 +0300 Subject: drm/i915: Don't try to initialize sprite planes on pre-ilk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't currently implement support for sprite planes on pre-ilk platforms, so let's leave num_sprites at 0 so that we don't get spurious errors during driver init. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477411083-19255-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_device_info.c | 5 +++-- drivers/gpu/drm/i915/intel_sprite.c | 3 --- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index d6a8f11..185e3bb 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -282,12 +282,13 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) info->num_sprites[PIPE_A] = 2; info->num_sprites[PIPE_B] = 2; info->num_sprites[PIPE_C] = 1; - } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { for_each_pipe(dev_priv, pipe) info->num_sprites[pipe] = 2; - else + } else if (INTEL_GEN(dev_priv) >= 5) { for_each_pipe(dev_priv, pipe) info->num_sprites[pipe] = 1; + } if (i915.disable_display) { DRM_INFO("Display disabled (module parameter)\n"); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 43d0350..ae1e3d4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1054,9 +1054,6 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) int num_plane_formats; int ret; - if (INTEL_INFO(dev)->gen < 5) - return -ENODEV; - intel_plane = kzalloc(sizeof(*intel_plane), GFP_KERNEL); if (!intel_plane) { ret = -ENOMEM; -- cgit v1.1 From a81d6fa0966b88b1ddd79302d7d351d493c4db9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Oct 2016 18:58:01 +0300 Subject: drm/i915: Initialize planes in a reasonable order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The zpos magic sorting uses the object ID to solve conflicting zpos values. Let's initialize our planes in an order that makes the object IDs agree with the normal primary->sprites->cursor z order. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477411083-19255-3-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 304073a..b9692f6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15241,7 +15241,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) struct intel_crtc_state *crtc_state = NULL; struct drm_plane *primary = NULL; struct drm_plane *cursor = NULL; - int ret; + int sprite, ret; intel_crtc = kzalloc(sizeof(*intel_crtc), GFP_KERNEL); if (intel_crtc == NULL) @@ -15268,6 +15268,13 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) if (!primary) goto fail; + for_each_sprite(dev_priv, pipe, sprite) { + ret = intel_plane_init(dev, pipe, sprite); + if (ret) + DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", + pipe_name(pipe), sprite_name(pipe, sprite), ret); + } + cursor = intel_cursor_plane_create(dev, pipe); if (!cursor) goto fail; @@ -16392,7 +16399,6 @@ void intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - int sprite, ret; enum pipe pipe; struct intel_crtc *crtc; @@ -16463,12 +16469,6 @@ void intel_modeset_init(struct drm_device *dev) for_each_pipe(dev_priv, pipe) { intel_crtc_init(dev, pipe); - for_each_sprite(dev_priv, pipe, sprite) { - ret = intel_plane_init(dev, pipe, sprite); - if (ret) - DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", - pipe_name(pipe), sprite_name(pipe, sprite), ret); - } } intel_update_czclk(dev_priv); -- cgit v1.1 From b079bd17e301708da044b4ea114f44c4aa47160c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Oct 2016 18:58:02 +0300 Subject: drm/i915: Bail if plane/crtc init fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to the plane->index not getting readjusted in drm_plane_cleanup(), we can't continue initialization of some plane/crtc init fails. Well, we sort of could I suppose if we left all initialized planes on the list, but that would expose those planes to userspace as well. But for crtcs the situation is even worse since we assume that pipe==crtc index occasionally, so we can't really deal with a partially initialize set of crtcs. So seems safest to just abort the entire thing if anything goes wrong. All the failure paths here are kmalloc()s anyway, so it seems unlikely we'd get very far if these start failing. v2: Add (enum plane) case to silence gcc Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477411083-19255-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.c | 4 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 103 ++++++++++++++++++++++------------- drivers/gpu/drm/i915/intel_drv.h | 3 +- drivers/gpu/drm/i915/intel_sprite.c | 8 +-- 5 files changed, 76 insertions(+), 44 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 839ce2a..6a99544 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -595,7 +595,9 @@ static int i915_load_modeset_init(struct drm_device *dev) /* Important: The output setup functions called by modeset_init need * working irqs for e.g. gmbus and dp aux transfers. */ - intel_modeset_init(dev); + ret = intel_modeset_init(dev); + if (ret) + goto cleanup_irq; intel_guc_init(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 42a4996..51360d1 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3799,7 +3799,7 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv); /* modesetting */ extern void intel_modeset_init_hw(struct drm_device *dev); -extern void intel_modeset_init(struct drm_device *dev); +extern int intel_modeset_init(struct drm_device *dev); extern void intel_modeset_gem_init(struct drm_device *dev); extern void intel_modeset_cleanup(struct drm_device *dev); extern int intel_connector_register(struct drm_connector *); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b9692f6..740c688 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14944,9 +14944,6 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, */ void intel_plane_destroy(struct drm_plane *plane) { - if (!plane) - return; - drm_plane_cleanup(plane); kfree(to_intel_plane(plane)); } @@ -14960,11 +14957,10 @@ const struct drm_plane_funcs intel_plane_funcs = { .atomic_set_property = intel_plane_atomic_set_property, .atomic_duplicate_state = intel_plane_duplicate_state, .atomic_destroy_state = intel_plane_destroy_state, - }; -static struct drm_plane *intel_primary_plane_create(struct drm_device *dev, - int pipe) +static struct intel_plane * +intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *primary = NULL; @@ -14975,12 +14971,17 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev, int ret; primary = kzalloc(sizeof(*primary), GFP_KERNEL); - if (!primary) + if (!primary) { + ret = -ENOMEM; goto fail; + } state = intel_create_plane_state(&primary->base); - if (!state) + if (!state) { + ret = -ENOMEM; goto fail; + } + primary->base.state = &state->base; primary->can_scale = false; @@ -15061,13 +15062,13 @@ static struct drm_plane *intel_primary_plane_create(struct drm_device *dev, drm_plane_helper_add(&primary->base, &intel_plane_helper_funcs); - return &primary->base; + return primary; fail: kfree(state); kfree(primary); - return NULL; + return ERR_PTR(ret); } static int @@ -15163,8 +15164,8 @@ intel_update_cursor_plane(struct drm_plane *plane, intel_crtc_update_cursor(crtc, state); } -static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, - int pipe) +static struct intel_plane * +intel_cursor_plane_create(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *cursor = NULL; @@ -15172,12 +15173,17 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, int ret; cursor = kzalloc(sizeof(*cursor), GFP_KERNEL); - if (!cursor) + if (!cursor) { + ret = -ENOMEM; goto fail; + } state = intel_create_plane_state(&cursor->base); - if (!state) + if (!state) { + ret = -ENOMEM; goto fail; + } + cursor->base.state = &state->base; cursor->can_scale = false; @@ -15209,13 +15215,13 @@ static struct drm_plane *intel_cursor_plane_create(struct drm_device *dev, drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); - return &cursor->base; + return cursor; fail: kfree(state); kfree(cursor); - return NULL; + return ERR_PTR(ret); } static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc, @@ -15234,22 +15240,24 @@ static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_cr scaler_state->scaler_id = -1; } -static void intel_crtc_init(struct drm_device *dev, int pipe) +static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc; struct intel_crtc_state *crtc_state = NULL; - struct drm_plane *primary = NULL; - struct drm_plane *cursor = NULL; + struct intel_plane *primary = NULL; + struct intel_plane *cursor = NULL; int sprite, ret; intel_crtc = kzalloc(sizeof(*intel_crtc), GFP_KERNEL); - if (intel_crtc == NULL) - return; + if (!intel_crtc) + return -ENOMEM; crtc_state = kzalloc(sizeof(*crtc_state), GFP_KERNEL); - if (!crtc_state) + if (!crtc_state) { + ret = -ENOMEM; goto fail; + } intel_crtc->config = crtc_state; intel_crtc->base.state = &crtc_state->base; crtc_state->base.crtc = &intel_crtc->base; @@ -15265,22 +15273,30 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) } primary = intel_primary_plane_create(dev, pipe); - if (!primary) + if (IS_ERR(primary)) { + ret = PTR_ERR(primary); goto fail; + } for_each_sprite(dev_priv, pipe, sprite) { - ret = intel_plane_init(dev, pipe, sprite); - if (ret) - DRM_DEBUG_KMS("pipe %c sprite %c init failed: %d\n", - pipe_name(pipe), sprite_name(pipe, sprite), ret); + struct intel_plane *plane; + + plane = intel_sprite_plane_create(dev, pipe, sprite); + if (!plane) { + ret = PTR_ERR(plane); + goto fail; + } } cursor = intel_cursor_plane_create(dev, pipe); - if (!cursor) + if (!cursor) { + ret = PTR_ERR(cursor); goto fail; + } - ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, primary, - cursor, &intel_crtc_funcs, + ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, + &primary->base, &cursor->base, + &intel_crtc_funcs, "pipe %c", pipe_name(pipe)); if (ret) goto fail; @@ -15290,7 +15306,7 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) * is hooked to pipe B. Hence we want plane A feeding pipe B. */ intel_crtc->pipe = pipe; - intel_crtc->plane = pipe; + intel_crtc->plane = (enum plane) pipe; if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4) { DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); intel_crtc->plane = !pipe; @@ -15312,13 +15328,18 @@ static void intel_crtc_init(struct drm_device *dev, int pipe) intel_color_init(&intel_crtc->base); WARN_ON(drm_crtc_index(&intel_crtc->base) != intel_crtc->pipe); - return; + + return 0; fail: - intel_plane_destroy(primary); - intel_plane_destroy(cursor); + /* + * drm_mode_config_cleanup() will free up any + * crtcs/planes already initialized. + */ kfree(crtc_state); kfree(intel_crtc); + + return ret; } enum pipe intel_get_pipe_from_connector(struct intel_connector *connector) @@ -16395,7 +16416,7 @@ fail: drm_modeset_acquire_fini(&ctx); } -void intel_modeset_init(struct drm_device *dev) +int intel_modeset_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; @@ -16419,7 +16440,7 @@ void intel_modeset_init(struct drm_device *dev) intel_init_pm(dev); if (INTEL_INFO(dev)->num_pipes == 0) - return; + return 0; /* * There may be no VBT; and if the BIOS enabled SSC we can @@ -16468,7 +16489,13 @@ void intel_modeset_init(struct drm_device *dev) INTEL_INFO(dev)->num_pipes > 1 ? "s" : ""); for_each_pipe(dev_priv, pipe) { - intel_crtc_init(dev, pipe); + int ret; + + ret = intel_crtc_init(dev, pipe); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } } intel_update_czclk(dev_priv); @@ -16516,6 +16543,8 @@ void intel_modeset_init(struct drm_device *dev) * since the watermark calculation done here will use pstate->fb. */ sanitize_watermarks(dev); + + return 0; } static void intel_enable_pipe_a(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 2616d92..4623335 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1778,7 +1778,8 @@ bool intel_sdvo_init(struct drm_device *dev, /* intel_sprite.c */ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); -int intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane); +struct intel_plane *intel_sprite_plane_create(struct drm_device *dev, + enum pipe pipe, int plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); void intel_pipe_update_start(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index ae1e3d4..41ae7f5 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1042,8 +1042,8 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; -int -intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) +struct intel_plane * +intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = NULL; @@ -1160,11 +1160,11 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) drm_plane_helper_add(&intel_plane->base, &intel_plane_helper_funcs); - return 0; + return intel_plane; fail: kfree(state); kfree(intel_plane); - return ret; + return ERR_PTR(ret); } -- cgit v1.1 From 1890ae64841a7f304ed9aa6cc400da6102853cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 25 Oct 2016 18:58:03 +0300 Subject: drm/i915: Reorganize sprite init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kill the switch statement from the sprite init code and replace with a more straightforward if ladder. Now each significant evolution of the sprite hardware is in its own neat box. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477411083-19255-5-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_sprite.c | 70 ++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 39 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 41ae7f5..70b50a2 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1067,25 +1067,25 @@ intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) } intel_plane->base.state = &state->base; - switch (INTEL_INFO(dev)->gen) { - case 5: - case 6: + if (INTEL_GEN(dev_priv) >= 9) { intel_plane->can_scale = true; - intel_plane->max_downscale = 16; - intel_plane->update_plane = ilk_update_plane; - intel_plane->disable_plane = ilk_disable_plane; + state->scaler_id = -1; - if (IS_GEN6(dev_priv)) { - plane_formats = snb_plane_formats; - num_plane_formats = ARRAY_SIZE(snb_plane_formats); - } else { - plane_formats = ilk_plane_formats; - num_plane_formats = ARRAY_SIZE(ilk_plane_formats); - } - break; + intel_plane->update_plane = skl_update_plane; + intel_plane->disable_plane = skl_disable_plane; + + plane_formats = skl_plane_formats; + num_plane_formats = ARRAY_SIZE(skl_plane_formats); + } else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_plane->can_scale = false; + intel_plane->max_downscale = 1; + + intel_plane->update_plane = vlv_update_plane; + intel_plane->disable_plane = vlv_disable_plane; - case 7: - case 8: + plane_formats = vlv_plane_formats; + num_plane_formats = ARRAY_SIZE(vlv_plane_formats); + } else if (INTEL_GEN(dev_priv) >= 7) { if (IS_IVYBRIDGE(dev_priv)) { intel_plane->can_scale = true; intel_plane->max_downscale = 2; @@ -1094,33 +1094,25 @@ intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) intel_plane->max_downscale = 1; } - if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { - intel_plane->update_plane = vlv_update_plane; - intel_plane->disable_plane = vlv_disable_plane; + intel_plane->update_plane = ivb_update_plane; + intel_plane->disable_plane = ivb_disable_plane; - plane_formats = vlv_plane_formats; - num_plane_formats = ARRAY_SIZE(vlv_plane_formats); - } else { - intel_plane->update_plane = ivb_update_plane; - intel_plane->disable_plane = ivb_disable_plane; + plane_formats = snb_plane_formats; + num_plane_formats = ARRAY_SIZE(snb_plane_formats); + } else { + intel_plane->can_scale = true; + intel_plane->max_downscale = 16; + + intel_plane->update_plane = ilk_update_plane; + intel_plane->disable_plane = ilk_disable_plane; + if (IS_GEN6(dev_priv)) { plane_formats = snb_plane_formats; num_plane_formats = ARRAY_SIZE(snb_plane_formats); + } else { + plane_formats = ilk_plane_formats; + num_plane_formats = ARRAY_SIZE(ilk_plane_formats); } - break; - case 9: - intel_plane->can_scale = true; - intel_plane->update_plane = skl_update_plane; - intel_plane->disable_plane = skl_disable_plane; - state->scaler_id = -1; - - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); - break; - default: - MISSING_CASE(INTEL_INFO(dev)->gen); - ret = -ENODEV; - goto fail; } if (INTEL_GEN(dev_priv) >= 9) { @@ -1139,7 +1131,7 @@ intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) possible_crtcs = (1 << pipe); - if (INTEL_INFO(dev)->gen >= 9) + if (INTEL_GEN(dev_priv) >= 9) ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs, &intel_plane_funcs, plane_formats, num_plane_formats, -- cgit v1.1 From 7d5d59e527b43d4efb00467234efbd2a0d5e5a86 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 08:48:41 +0000 Subject: drm/i915: Use the full hammer when shutting down the rcu tasks To flush all call_rcu() tasks (here from i915_gem_free_object()) we need to call rcu_barrier() (not synchronize_rcu()). If we don't then we may still have objects being freed as we continue to teardown the driver - in particular, the recently released rings may race with the memory manager shutdown resulting in sporadic: [ 142.217186] WARNING: CPU: 7 PID: 6185 at drivers/gpu/drm/drm_mm.c:932 drm_mm_takedown+0x2e/0x40 [ 142.217187] Memory manager not clean during takedown. [ 142.217187] Modules linked in: i915(-) x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich snd_hda_codec_realtek snd_hda_codec_generic mei_me mei snd_hda_codec_hdmi snd_hda_codec snd_hwdep snd_hda_core snd_pcm e1000e ptp pps_core [last unloaded: snd_hda_intel] [ 142.217199] CPU: 7 PID: 6185 Comm: rmmod Not tainted 4.9.0-rc2-CI-Trybot_242+ #1 [ 142.217199] Hardware name: LENOVO 10AGS00601/SHARKBAY, BIOS FBKT34AUS 04/24/2013 [ 142.217200] ffffc90002ecfce0 ffffffff8142dd65 ffffc90002ecfd30 0000000000000000 [ 142.217202] ffffc90002ecfd20 ffffffff8107e4e6 000003a40778c2a8 ffff880401355c48 [ 142.217204] ffff88040778c2a8 ffffffffa040f3c0 ffffffffa040f4a0 00005621fbf8b1f0 [ 142.217206] Call Trace: [ 142.217209] [] dump_stack+0x67/0x92 [ 142.217211] [] __warn+0xc6/0xe0 [ 142.217213] [] warn_slowpath_fmt+0x4a/0x50 [ 142.217214] [] drm_mm_takedown+0x2e/0x40 [ 142.217236] [] i915_gem_cleanup_stolen+0x1a/0x20 [i915] [ 142.217246] [] i915_ggtt_cleanup_hw+0x31/0xb0 [i915] [ 142.217253] [] i915_driver_cleanup_hw+0x31/0x40 [i915] [ 142.217260] [] i915_driver_unload+0x141/0x1a0 [i915] [ 142.217268] [] i915_pci_remove+0x14/0x20 [i915] [ 142.217269] [] pci_device_remove+0x34/0xb0 [ 142.217271] [] __device_release_driver+0x9c/0x150 [ 142.217272] [] driver_detach+0xb6/0xc0 [ 142.217273] [] bus_remove_driver+0x53/0xd0 [ 142.217274] [] driver_unregister+0x27/0x50 [ 142.217276] [] pci_unregister_driver+0x25/0x70 [ 142.217287] [] i915_exit+0x1a/0x71 [i915] [ 142.217289] [] SyS_delete_module+0x193/0x1e0 [ 142.217291] [] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 142.217292] ---[ end trace 6fd164859c154772 ]--- [ 142.217505] [drm:show_leaks] *ERROR* node [6b6b6b6b6b6b6b6b + 6b6b6b6b6b6b6b6b]: inserted at [] save_stack.isra.1+0x53/0xa0 [] drm_mm_insert_node_in_range_generic+0x2ad/0x360 [] i915_gem_stolen_insert_node_in_range+0x93/0xe0 [i915] [] i915_gem_object_create_stolen+0x75/0xb0 [i915] [] intel_engine_create_ring+0x9a/0x140 [i915] [] intel_init_ring_buffer+0xf1/0x440 [i915] [] intel_init_render_ring_buffer+0xab/0x1b0 [i915] [] intel_engines_init+0xc8/0x210 [i915] [] i915_gem_init+0xac/0xf0 [i915] [] i915_driver_load+0x9c4/0x1430 [i915] [] i915_pci_probe+0x28/0x40 [i915] [] pci_device_probe+0x85/0xf0 [] driver_probe_device+0x21f/0x430 [] __driver_attach+0xde/0xe0 In particular note that the node was being poisoned as we inspected the list, a clear indication that the object is being freed as we make the assertion. v2: Don't loop, just assert that we do all the work required as that will be better at detecting further errors. Fixes: fbbd37b36fa5 ("drm/i915: Move object release to a freelist + worker") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161101084843.3961-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 6a99544..3b9bfd2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -544,7 +544,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) i915_gem_context_fini(&dev_priv->drm); mutex_unlock(&dev_priv->drm.struct_mutex); - synchronize_rcu(); + rcu_barrier(); flush_work(&dev_priv->mm.free_work); WARN_ON(!list_empty(&dev_priv->context_list)); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1e5d2bf..b512745 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4787,6 +4787,8 @@ void i915_gem_load_cleanup(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + WARN_ON(!llist_empty(&dev_priv->mm.free_list)); + kmem_cache_destroy(dev_priv->requests); kmem_cache_destroy(dev_priv->vmas); kmem_cache_destroy(dev_priv->objects); -- cgit v1.1 From 415981623f80a08ae36e9757cd4fa25da140b877 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 08:48:42 +0000 Subject: drm/i915: Discard objects from mm global_list after being shrunk In the shrinker, we can safely remove an empty object (obj->mm.pages == NULL) after having discarded the pages because we are holding the struct_mutex. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161101084843.3961-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0daa09ca..9ace5f9 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -228,6 +228,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, SINGLE_DEPTH_NESTING); if (!obj->mm.pages) { __i915_gem_object_invalidate(obj); + list_del_init(&obj->global_list); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); -- cgit v1.1 From 535972771d8c99dd53471b54c4dd6a4d22313d84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 08:48:43 +0000 Subject: drm/i915: Move the recently scanned objects to the tail after shrinking During shrinking, we walk over the list of objects searching for victims. Any that are not removed are put back into the global list. Currently, they are put back in order (at the front) which means they will be first to be scanned again. If we instead move them to the rear of the list, we will scan new potential victims on the next pass and waste less time rescanning unshrinkable objects. Normally the lists are kept in rough order to shrinking (with object least frequently used at the start), by moving just scanned objects to the rear we are acknowledging that they are still in use. Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161101084843.3961-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_shrinker.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 9ace5f9..0993afc 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -234,7 +234,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, mutex_unlock(&obj->mm.lock); } } - list_splice(&still_in_list, phase->list); + list_splice_tail(&still_in_list, phase->list); } if (flags & I915_SHRINK_BOUND) -- cgit v1.1 From cb399eabc41ae0e67fe87fbebee4cb03ab1b5de9 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 10:03:16 +0000 Subject: drm/i915: Avoid accessing request->timeline outside of its lifetime Whilst waiting on a request, we may do so without holding any locks or any guards beyond a reference to the request. In order to avoid taking locks within request deallocation, we drop references to its timeline (via the context and ppgtt) upon retirement. We should avoid chasing such pointers outside of their control, in particular we inspect the request->timeline to see if we may restore the RPS waitboost for a client. If we instead look at the engine->timeline, we will have similar behaviour on both full-ppgtt and !full-ppgtt systems and reduce the amount of reward we give towards stalling clients (i.e. only if the client stalls and the GPU is uncontended does it reclaim its boost). This restores behaviour back to pre-timelines, whilst fixing: [ 645.078485] BUG: KASAN: use-after-free in i915_gem_object_wait_fence+0x1ee/0x2e0 at addr ffff8802335643a0 [ 645.078577] Read of size 4 by task gem_exec_schedu/28408 [ 645.078638] CPU: 1 PID: 28408 Comm: gem_exec_schedu Not tainted 4.9.0-rc2+ #64 [ 645.078724] Hardware name: / , BIOS PYBSWCEL.86A.0027.2015.0507.1758 05/07/2015 [ 645.078816] ffff88022daef9a0 ffffffff8143d059 ffff880235402a80 ffff880233564200 [ 645.078998] ffff88022daef9c8 ffffffff81229c5c ffff88022daefa48 ffff880233564200 [ 645.079172] ffff880235402a80 ffff88022daefa38 ffffffff81229ef0 000000008110a796 [ 645.079345] Call Trace: [ 645.079404] [] dump_stack+0x68/0x9f [ 645.079467] [] kasan_object_err+0x1c/0x70 [ 645.079534] [] kasan_report_error+0x1f0/0x4b0 [ 645.079601] [] kasan_report+0x34/0x40 [ 645.079676] [] ? i915_gem_object_wait_fence+0x1ee/0x2e0 [ 645.079741] [] __asan_load4+0x61/0x80 [ 645.079807] [] i915_gem_object_wait_fence+0x1ee/0x2e0 [ 645.079876] [] i915_gem_object_wait+0x19f/0x590 [ 645.079944] [] ? i915_gem_object_wait_priority+0x500/0x500 [ 645.080016] [] ? debug_show_all_locks+0x1e0/0x1e0 [ 645.080084] [] ? check_chain_key+0x14c/0x210 [ 645.080157] [] ? __lock_is_held+0x46/0xc0 [ 645.080226] [] ? i915_gem_set_domain_ioctl+0x141/0x690 [ 645.080296] [] i915_gem_set_domain_ioctl+0x1a2/0x690 [ 645.080366] [] ? __might_fault+0x75/0xe0 [ 645.080433] [] drm_ioctl+0x327/0x640 [ 645.080508] [] ? i915_gem_obj_prepare_shmem_write+0x3a0/0x3a0 [ 645.080603] [] ? drm_ioctl_permit+0x120/0x120 [ 645.080670] [] ? check_chain_key+0x14c/0x210 [ 645.080738] [] do_vfs_ioctl+0x127/0xa20 [ 645.080804] [] ? do_mmap+0x47c/0x580 [ 645.080871] [] ? vm_mmap_pgoff+0x117/0x140 [ 645.080938] [] ? ioctl_preallocate+0x150/0x150 [ 645.081011] [] ? up_write+0x23/0x50 [ 645.081078] [] ? vm_mmap_pgoff+0x117/0x140 [ 645.081145] [] ? vma_is_stack_for_current+0x90/0x90 [ 645.081214] [] ? mark_held_locks+0x23/0xc0 [ 645.082030] [] ? __fget+0x168/0x250 [ 645.082106] [] ? entry_SYSCALL_64_fastpath+0x5/0xb1 [ 645.082176] [] ? __fget_light+0xa2/0xc0 [ 645.082242] [] SyS_ioctl+0x3c/0x70 [ 645.082309] [] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 645.082374] Object at ffff880233564200, in cache kmalloc-8192 size: 8192 [ 645.082431] Allocated: [ 645.082480] PID = 28408 [ 645.082535] [ 645.082566] [] save_stack_trace+0x16/0x20 [ 645.082623] [ 645.082656] [] save_stack+0x46/0xd0 [ 645.082716] [ 645.082756] [] kasan_kmalloc+0xad/0xe0 [ 645.082817] [ 645.082848] [] i915_ppgtt_create+0x52/0x220 [ 645.082908] [ 645.082941] [] i915_gem_create_context+0x396/0x560 [ 645.083027] [ 645.083059] [] i915_gem_context_create_ioctl+0x97/0xf0 [ 645.083152] [ 645.083183] [] drm_ioctl+0x327/0x640 [ 645.083243] [ 645.083274] [] do_vfs_ioctl+0x127/0xa20 [ 645.083334] [ 645.083372] [] SyS_ioctl+0x3c/0x70 [ 645.083432] [ 645.083464] [] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 645.083551] Freed: [ 645.083599] PID = 27629 [ 645.083648] [ 645.083676] [] save_stack_trace+0x16/0x20 [ 645.083738] [ 645.083770] [] save_stack+0x46/0xd0 [ 645.083830] [ 645.083862] [] kasan_slab_free+0x73/0xc0 [ 645.083922] [ 645.083961] [] kfree+0xa9/0x170 [ 645.084021] [ 645.084053] [] i915_ppgtt_release+0x100/0x180 [ 645.084139] [ 645.084171] [] i915_gem_context_free+0x1b4/0x230 [ 645.084257] [ 645.084288] [] intel_lr_context_unpin+0x192/0x230 [ 645.084380] [ 645.084413] [] i915_gem_request_retire+0x620/0x630 [ 645.084500] [ 645.085226] [] i915_gem_retire_requests+0x181/0x280 [ 645.085313] [ 645.085352] [] i915_gem_retire_work_handler+0xca/0xe0 [ 645.085440] [ 645.085471] [] process_one_work+0x4fb/0x920 [ 645.085532] [ 645.085562] [] worker_thread+0x8d/0x840 [ 645.085622] [ 645.085653] [] kthread+0x185/0x1b0 [ 645.085718] [ 645.085750] [] ret_from_fork+0x27/0x40 [ 645.085811] Memory state around the buggy address: [ 645.085869] ffff880233564280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.085956] ffff880233564300: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086053] >ffff880233564380: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086138] ^ [ 645.086193] ffff880233564400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 645.086283] ffff880233564480: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb v2: Add a comment to document the hint like nature of intel_engine_last_submit() Fixes: 73cb97010d4f ("drm/i915: Combine seqno + tracking into a global timeline struct") Fixes: 80b204bce8f2 ("drm/i915: Enable multiple timelines") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161101100317.11129-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 7 +++---- drivers/gpu/drm/i915/i915_gem.c | 4 ++-- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- drivers/gpu/drm/i915/i915_irq.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.h | 12 ++++++++++++ 5 files changed, 19 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 9bef6f5..bf19192 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -1348,9 +1348,8 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) seq_printf(m, "%s:\n", engine->name); seq_printf(m, "\tseqno = %x [current %x, last %x]\n", - engine->hangcheck.seqno, - seqno[id], - engine->timeline->last_submitted_seqno); + engine->hangcheck.seqno, seqno[id], + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -3167,7 +3166,7 @@ static int i915_engine_info(struct seq_file *m, void *unused) seq_printf(m, "%s\n", engine->name); seq_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [score %d]\n", intel_engine_get_seqno(engine), - engine->timeline->last_submitted_seqno, + intel_engine_last_submit(engine), engine->hangcheck.seqno, engine->hangcheck.score); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b512745..6c51b21 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -371,7 +371,7 @@ out: if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq)) i915_gem_request_retire_upto(rq); - if (rps && rq->fence.seqno == rq->timeline->last_submitted_seqno) { + if (rps && rq->global_seqno == intel_engine_last_submit(rq->engine)) { /* The GPU is now idle and this client has stalled. * Since no other client has submitted a request in the * meantime, assume that this client is the only one @@ -2674,7 +2674,7 @@ static void i915_gem_cleanup_engine(struct intel_engine_cs *engine) * reset it. */ intel_engine_init_global_seqno(engine, - engine->timeline->last_submitted_seqno); + intel_engine_last_submit(engine)); /* * Clear the execlists queue up before freeing the requests, as those diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 7ba4048..204093f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1120,7 +1120,7 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base)); ee->acthd = intel_engine_get_active_head(engine); ee->seqno = intel_engine_get_seqno(engine); - ee->last_seqno = engine->timeline->last_submitted_seqno; + ee->last_seqno = intel_engine_last_submit(engine); ee->start = I915_READ_START(engine); ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 90d0905..4dda2b1 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -3168,7 +3168,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); - submit = READ_ONCE(engine->timeline->last_submitted_seqno); + submit = intel_engine_last_submit(engine); if (engine->hangcheck.seqno == seqno) { if (i915_seqno_passed(seqno, submit)) { diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 43f61aa..642b546 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -496,6 +496,18 @@ static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine) return intel_read_status_page(engine, I915_GEM_HWS_INDEX); } +static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) +{ + /* We are only peeking at the tail of the submit queue (and not the + * queue itself) in order to gain a hint as to the current active + * state of the engine. Callers are not expected to be taking + * engine->timeline->lock, nor are they expected to be concerned + * wtih serialising this hint with anything, so document it as + * a hint and nothing more. + */ + return READ_ONCE(engine->timeline->last_submitted_seqno); +} + int init_workarounds_ring(struct intel_engine_cs *engine); void intel_engine_get_instdone(struct intel_engine_cs *engine, -- cgit v1.1 From bc0629a76726991f97ee88c65b87e99dd94c1dc4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 10:03:17 +0000 Subject: drm/i915: Track pages pinned due to swizzling quirk If we have a tiled object and an unknown CPU swizzle pattern, we pin the pages to prevent the object from being swapped out (and us corrupting the contents as we do not know the access pattern and so cannot convert it to linear and back to tiled on reuse). This requires us to remember to drop the extra pinning when freeing the object, or else we trigger warnings about the pin leak. In commit fbbd37b36fa5 ("drm/i915: Move object release to a freelist + worker"), the object free path was deferred to a worker, but the unpinning of the quirk, along with marking the object as reclaimable, was left on the immediate path (so that if required we could reclaim the pages under memory pressure as early as possible). However, this split introduced a bug where the pages were no longer being unpinned if they were marked as unneeded. [ 231.800401] WARNING: CPU: 1 PID: 90 at drivers/gpu/drm/i915/i915_gem.c:4275 __i915_gem_free_objects+0x326/0x3c0 [i915] [ 231.800403] WARN_ON(i915_gem_object_has_pinned_pages(obj)) [ 231.800405] Modules linked in: [ 231.800406] snd_hda_intel i915 snd_hda_codec_generic mei_me snd_hda_codec coretemp snd_hwdep mei lpc_ich snd_hda_core snd_pcm e1000e ptp pps_core [last unloaded: i915] [ 231.800426] CPU: 1 PID: 90 Comm: kworker/1:4 Tainted: G U 4.9.0-rc2-CI-CI_DRM_1780+ #1 [ 231.800428] Hardware name: LENOVO 7465CTO/7465CTO, BIOS 6DET44WW (2.08 ) 04/22/2009 [ 231.800456] Workqueue: events __i915_gem_free_work [i915] [ 231.800459] ffffc9000034fc80 ffffffff8142dd65 ffffc9000034fcd0 0000000000000000 [ 231.800465] ffffc9000034fcc0 ffffffff8107e4e6 000010b300000001 0000000000001000 [ 231.800469] ffff88011d3db740 ffff880130ef0000 0000000000000000 ffff880130ef5ea0 [ 231.800474] Call Trace: [ 231.800479] [] dump_stack+0x67/0x92 [ 231.800484] [] __warn+0xc6/0xe0 [ 231.800487] [] warn_slowpath_fmt+0x4a/0x50 [ 231.800491] [] ? kmem_cache_free+0x2dc/0x340 [ 231.800520] [] __i915_gem_free_objects+0x326/0x3c0 [i915] [ 231.800548] [] __i915_gem_free_work+0x2e/0x50 [i915] [ 231.800552] [] process_one_work+0x1ec/0x6b0 [ 231.800555] [] ? process_one_work+0x166/0x6b0 [ 231.800558] [] worker_thread+0x49/0x490 [ 231.800561] [] ? process_one_work+0x6b0/0x6b0 [ 231.800563] [] ? process_one_work+0x6b0/0x6b0 [ 231.800566] [] kthread+0xeb/0x110 [ 231.800569] [] ? kthread_park+0x60/0x60 [ 231.800573] [] ret_from_fork+0x27/0x40 Moving to a separate flag for tracking the quirked pin is overkill for the bug (since we only have to interchange the two tests in i915_gem_free_object) but it does reduce a complicated test on all objects and provide a sanitycheck for uncommon code paths. Fixes: fbbd37b36fa5 ("drm/i915: Move object release to a freelist + worker") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161101100317.11129-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 6 ++++++ drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++-------- drivers/gpu/drm/i915/i915_gem_tiling.c | 9 +++++++-- 3 files changed, 26 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 51360d1..539106a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2304,6 +2304,12 @@ struct drm_i915_gem_object { * pages were last acquired. */ bool dirty:1; + + /** + * This is set if the object has been pinned due to unknown + * swizzling. + */ + bool quirked:1; } mm; /** Breadcrumb of last rendering to the buffer. diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6c51b21..c9e52f7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2324,8 +2324,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) i915_gem_object_do_bit_17_swizzle(obj, st); if (i915_gem_object_is_tiled(obj) && - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) + dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } return st; @@ -4091,10 +4093,15 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (obj->mm.pages && i915_gem_object_is_tiled(obj) && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (obj->mm.madv == I915_MADV_WILLNEED) + if (obj->mm.madv == I915_MADV_WILLNEED) { + GEM_BUG_ON(!obj->mm.quirked); __i915_gem_object_unpin_pages(obj); - if (args->madv == I915_MADV_WILLNEED) + obj->mm.quirked = false; + } + if (args->madv == I915_MADV_WILLNEED) { __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } } if (obj->mm.madv != __I915_MADV_PURGED) @@ -4335,14 +4342,12 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); + if (obj->mm.quirked) + __i915_gem_object_unpin_pages(obj); + if (discard_backing_storage(obj)) obj->mm.madv = I915_MADV_DONTNEED; - if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && - to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES && - i915_gem_object_is_tiled(obj)) - __i915_gem_object_unpin_pages(obj); - /* Before we free the object, make sure any pure RCU-only * read-side critical sections are complete, e.g. * i915_gem_busy_ioctl(). For the corresponding synchronized diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 6395e62..1577e78 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -263,10 +263,15 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, if (obj->mm.pages && obj->mm.madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - if (args->tiling_mode == I915_TILING_NONE) + if (args->tiling_mode == I915_TILING_NONE) { + GEM_BUG_ON(!obj->mm.quirked); __i915_gem_object_unpin_pages(obj); - if (!i915_gem_object_is_tiled(obj)) + obj->mm.quirked = false; + } + if (!i915_gem_object_is_tiled(obj)) { __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } } mutex_unlock(&obj->mm.lock); -- cgit v1.1 From db6c2b4151f2915fe1695cdcac43b32e73d1ad32 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 11:54:00 +0000 Subject: drm/i915: Store the vma in an rbtree under the object With full-ppgtt one of the main bottlenecks is the lookup of the VMA underneath the object. For execbuf there is merit in having a very fast direct lookup of ctx:handle to the vma using a hashtree, but that still leaves a large number of other lookups. One way to speed up the lookup would be to use a rhashtable, but that requires extra allocations and may exhibit poor worse case behaviour. An alternative is to use an embedded rbtree, i.e. no extra allocations and deterministic behaviour, but at the slight cost of O(lgN) lookups (instead of O(1) for rhashtable). The major of such tree will be very shallow and so not much slower, and still scales much, much better than the current unsorted list. v2: Bump vma_compare() to return a long, as we return the result of comparing two pointers. References: https://bugs.freedesktop.org/show_bug.cgi?id=87726 Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161101115400.15647-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 2 + drivers/gpu/drm/i915/i915_gem_gtt.c | 80 +++++++++++++++++++++++++------------ drivers/gpu/drm/i915/i915_gem_gtt.h | 1 + 4 files changed, 59 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 539106a..9143129 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2230,6 +2230,7 @@ struct drm_i915_gem_object { /** List of VMAs backed by this object */ struct list_head vma_list; + struct rb_root vma_tree; /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c9e52f7..1568f67 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4266,6 +4266,8 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_close(vma); } + GEM_BUG_ON(!list_empty(&obj->vma_list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); list_del(&obj->global_list); } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index e7afad5..00606a2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3399,6 +3399,7 @@ void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); list_del(&vma->vm_link); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -3416,12 +3417,33 @@ void i915_vma_close(struct i915_vma *vma) WARN_ON(i915_vma_unbind(vma)); } +static inline long vma_compare(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_vma_is_ggtt(vma)); + + if (vma->vm != vm) + return vma->vm - vm; + + if (!view) + return vma->ggtt_view.type; + + if (vma->ggtt_view.type != view->type) + return vma->ggtt_view.type - view->type; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)); +} + static struct i915_vma * __i915_vma_create(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { struct i915_vma *vma; + struct rb_node *rb, **p; int i; GEM_BUG_ON(vm->closed); @@ -3455,33 +3477,28 @@ __i915_vma_create(struct drm_i915_gem_object *obj, if (i915_is_ggtt(vm)) { vma->flags |= I915_VMA_GGTT; + list_add(&vma->obj_link, &obj->vma_list); } else { i915_ppgtt_get(i915_vm_to_ppgtt(vm)); + list_add_tail(&vma->obj_link, &obj->vma_list); } - list_add_tail(&vma->obj_link, &obj->vma_list); - return vma; -} + rb = NULL; + p = &obj->vma_tree.rb_node; + while (*p) { + struct i915_vma *pos; -static inline bool vma_matches(struct i915_vma *vma, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) -{ - if (vma->vm != vm) - return false; - - if (!i915_vma_is_ggtt(vma)) - return true; - - if (!view) - return vma->ggtt_view.type == 0; - - if (vma->ggtt_view.type != view->type) - return false; + rb = *p; + pos = rb_entry(rb, struct i915_vma, obj_node); + if (vma_compare(pos, vm, view) < 0) + p = &rb->rb_right; + else + p = &rb->rb_left; + } + rb_link_node(&vma->obj_node, rb, p); + rb_insert_color(&vma->obj_node, &obj->vma_tree); - return memcmp(&vma->ggtt_view.params, - &view->params, - sizeof(view->params)) == 0; + return vma; } struct i915_vma * @@ -3501,12 +3518,23 @@ i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, struct i915_address_space *vm, const struct i915_ggtt_view *view) { - struct i915_vma *vma; + struct rb_node *rb; + + rb = obj->vma_tree.rb_node; + while (rb) { + struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); + long cmp; - list_for_each_entry_reverse(vma, &obj->vma_list, obj_link) - if (vma_matches(vma, vm, view)) + cmp = vma_compare(vma, vm, view); + if (cmp == 0) return vma; + if (cmp < 0) + rb = rb->rb_right; + else + rb = rb->rb_left; + } + return NULL; } @@ -3521,8 +3549,10 @@ i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, GEM_BUG_ON(view && !i915_is_ggtt(vm)); vma = i915_gem_obj_to_vma(obj, vm, view); - if (!vma) + if (!vma) { vma = __i915_vma_create(obj, vm, view); + GEM_BUG_ON(vma != i915_gem_obj_to_vma(obj, vm, view)); + } GEM_BUG_ON(i915_vma_is_closed(vma)); return vma; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 518e75b..c23ef9d 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -227,6 +227,7 @@ struct i915_vma { struct list_head vm_link; struct list_head obj_link; /* Link in the object's VMA list */ + struct rb_node obj_node; /** This vma's place in the batchbuffer or on the eviction list */ struct list_head exec_list; -- cgit v1.1 From 548625ee8fde571c32e7976879854410689c4a84 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 1 Nov 2016 12:11:34 +0000 Subject: drm/i915: Improve lockdep tracking for obj->mm.lock The shrinker may appear to recurse into obj->mm.lock as the shrinker may be called from a direct reclaim path whilst handling get_pages. We filter out recursing on the same obj->mm.lock by inspecting obj->mm.pages, but we do want to take the lock on a second object in order to reap their pages. lockdep spots the recursion on the same lockclass and needs annotation to avoid a false positive. To keep the two paths distinct, create an enum to indicate which subclass of obj->mm.lock we are using. This removes the false positive and avoids masking real bugs. Suggested-by: Joonas Lahtinen Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/20161101121134.27504-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_drv.h | 8 +++++++- drivers/gpu/drm/i915/i915_gem.c | 9 +++++---- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- 4 files changed, 15 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9143129..086396e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3287,7 +3287,13 @@ i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) __i915_gem_object_unpin_pages(obj); } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj); +enum i915_mm_subclass { /* lockdep subclass for obj->mm.lock */ + I915_MM_NORMAL = 0, + I915_MM_SHRINKER +}; + +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass); void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj); enum i915_map_type { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1568f67..cbbfaa7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -491,7 +491,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, if (ret) return ret; - __i915_gem_object_put_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); if (obj->mm.pages) return -EBUSY; @@ -2181,7 +2181,8 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) radix_tree_delete(&obj->mm.get_page.radix, iter.index); } -void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) +void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, + enum i915_mm_subclass subclass) { struct sg_table *pages; @@ -2193,7 +2194,7 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj) return; /* May be called by shrinker from within get_pages() (on another bo) */ - mutex_lock_nested(&obj->mm.lock, SINGLE_DEPTH_NESTING); + mutex_lock_nested(&obj->mm.lock, subclass); if (unlikely(atomic_read(&obj->mm.pages_pin_count))) goto unlock; @@ -4283,7 +4284,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, if (WARN_ON(i915_gem_object_has_pinned_pages(obj))) atomic_set(&obj->mm.pages_pin_count, 0); - __i915_gem_object_put_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); GEM_BUG_ON(obj->mm.pages); if (obj->base.import_attach) diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 0993afc..f988652 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -111,7 +111,7 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) { if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_SHRINKER); return !READ_ONCE(obj->mm.pages); } @@ -225,7 +225,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, - SINGLE_DEPTH_NESTING); + I915_MM_SHRINKER); if (!obj->mm.pages) { __i915_gem_object_invalidate(obj); list_del_init(&obj->global_list); diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index c30d04f..9bf44b5 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -75,7 +75,7 @@ static void cancel_userptr(struct work_struct *work) /* We are inside a kthread context and can't be interrupted */ if (i915_gem_object_unbind(obj) == 0) - __i915_gem_object_put_pages(obj); + __i915_gem_object_put_pages(obj, I915_MM_NORMAL); WARN_ONCE(obj->mm.pages, "Failed to release pages: bind_count=%d, pages_pin_count=%d, pin_display=%d\n", obj->bind_count, -- cgit v1.1 From c8fe32c13543759fe5de7c4dcf3882f6693b8349 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:29 +0200 Subject: drm/i915/skl+: Prepare for removing data rate from skl watermark state, v2. Caching is not required, drm_atomic_crtc_state_for_each_plane_state can be used to inspect the states of all planes assigned to the CRTC even if they are not part of _state, so we can just recalculate every time. Changes since v1: - Remove plane->pipe checks, they're implied by the macros. - Split unrelated changes to a separate commit. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-2-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Paulo Zanoni Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b544248..87959d9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -31,6 +31,7 @@ #include "intel_drv.h" #include "../../../platform/x86/intel_ips.h" #include +#include /** * DOC: RC6 @@ -3269,24 +3270,20 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) struct drm_crtc *crtc = cstate->crtc; struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - const struct drm_plane *plane; + struct drm_plane *plane; const struct intel_plane *intel_plane; - struct drm_plane_state *pstate; + const struct drm_plane_state *pstate; unsigned int rate, total_data_rate = 0; int id; - int i; if (WARN_ON(!state)) return 0; /* Calculate and cache data rate for each plane */ - for_each_plane_in_state(state, plane, pstate, i) { + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, cstate) { id = skl_wm_plane_id(to_intel_plane(plane)); intel_plane = to_intel_plane(plane); - if (intel_plane->pipe != intel_crtc->pipe) - continue; - /* packed/uv */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 0); @@ -3383,7 +3380,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_plane *intel_plane; struct drm_plane *plane; - struct drm_plane_state *pstate; + const struct drm_plane_state *pstate; enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; uint16_t alloc_size, start, cursor_blocks; @@ -3419,14 +3416,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, alloc_size -= cursor_blocks; /* 1. Allocate the mininum required blocks for each active plane */ - for_each_plane_in_state(state, plane, pstate, i) { + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { intel_plane = to_intel_plane(plane); id = skl_wm_plane_id(intel_plane); - if (intel_plane->pipe != pipe) - continue; - - if (!to_intel_plane_state(pstate)->base.visible) { + if (!pstate->visible) { minimum[id] = 0; y_minimum[id] = 0; continue; -- cgit v1.1 From 220b0965219ec68fcd28bea603dbfe84281245ff Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:30 +0200 Subject: drm/i915/gen9+: Use cstate plane mask instead of crtc->state. I'm planning on getting rid of all obj->state dereferences, and replace thhem with accessor functions. Remove this one early, they're equivalent because removed planes are already part of the state, else they could not have been removed. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-3-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 87959d9..e6e9cc5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3975,7 +3975,7 @@ skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); - drm_for_each_plane_mask(plane, dev, crtc->state->plane_mask) { + drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { id = skl_wm_plane_id(to_intel_plane(plane)); if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][id], -- cgit v1.1 From 7570498efb693eeee44f58dcb5cb64fdee87f17d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Tue, 1 Nov 2016 12:04:10 +0100 Subject: drm/i915/gen9+: Use for_each_intel_plane_on_crtc in skl_print_wm_changes, v2. Using for_each_intel_plane_on_crtc will allow us to find all allocations that may have changed, not just the one added by the atomic state. This will print changes to plane allocations for crtc's when some planes are not added to the atomic state. Changes since v1: - Rephrase commit message. (Ville) - Use plane->base.id and plane->name to kill off cursor special case. (Ville) - Add intel_crtc to prevent a line wrap. (Paulo) - Line wrap debug messages. Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/c9f7dc1a-d23a-7c16-b2b7-1c23dd07ed35@linux.intel.com Reviewed-by: Matt Roper Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/intel_pm.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e6e9cc5..56372f4 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4090,45 +4090,31 @@ skl_print_wm_changes(const struct drm_atomic_state *state) to_intel_atomic_state(state); const struct drm_crtc *crtc; const struct drm_crtc_state *cstate; - const struct drm_plane *plane; const struct intel_plane *intel_plane; - const struct drm_plane_state *pstate; const struct skl_ddb_allocation *old_ddb = &dev_priv->wm.skl_hw.ddb; const struct skl_ddb_allocation *new_ddb = &intel_state->wm_results.ddb; - enum pipe pipe; int id; - int i, j; + int i; for_each_crtc_in_state(state, crtc, cstate, i) { - pipe = to_intel_crtc(crtc)->pipe; + const struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; - for_each_plane_in_state(state, plane, pstate, j) { + for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { const struct skl_ddb_entry *old, *new; - intel_plane = to_intel_plane(plane); id = skl_wm_plane_id(intel_plane); old = &old_ddb->plane[pipe][id]; new = &new_ddb->plane[pipe][id]; - if (intel_plane->pipe != pipe) - continue; - if (skl_ddb_entry_equal(old, new)) continue; - if (id != PLANE_CURSOR) { - DRM_DEBUG_ATOMIC("[PLANE:%d:plane %d%c] ddb (%d - %d) -> (%d - %d)\n", - plane->base.id, id + 1, - pipe_name(pipe), - old->start, old->end, - new->start, new->end); - } else { - DRM_DEBUG_ATOMIC("[PLANE:%d:cursor %c] ddb (%d - %d) -> (%d - %d)\n", - plane->base.id, - pipe_name(pipe), - old->start, old->end, - new->start, new->end); - } + DRM_DEBUG_ATOMIC("[PLANE:%d:%s] ddb (%d - %d) -> (%d - %d)\n", + intel_plane->base.base.id, + intel_plane->base.name, + old->start, old->end, + new->start, new->end); } } } -- cgit v1.1 From 1e6ee542262d611a07e15eee0abb4516d7378b93 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:32 +0200 Subject: drm/i915/skl+: Remove data_rate from watermark struct, v2. It's only used in one function, and can be calculated without caching it in the global struct by using drm_atomic_crtc_state_for_each_plane_state. There are loops over all planes, including planes that don't exist. This is harmless, because data_rate will always be 0 for them and we never program them when updating watermarks. Changes since v1: - Rename rate back to data_rate, and change array name to plane_data_rate. (Matt) - Remove whitespace. (Paulo) Signed-off-by: Maarten Lankhorst Reviewed-by: Matt Roper Cc: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-5-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/intel_drv.h | 4 ---- drivers/gpu/drm/i915/intel_pm.c | 35 ++++++++++++++++------------------- 2 files changed, 16 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 4623335..66a1965 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -501,10 +501,6 @@ struct intel_crtc_wm_state { struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; - /* cached plane data rate */ - unsigned plane_data_rate[I915_MAX_PLANES]; - unsigned plane_y_data_rate[I915_MAX_PLANES]; - /* minimum block allocation */ uint16_t minimum_blocks[I915_MAX_PLANES]; uint16_t minimum_y_blocks[I915_MAX_PLANES]; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 56372f4..909e1b1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3263,13 +3263,12 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, * 3 * 4096 * 8192 * 4 < 2^32 */ static unsigned int -skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) +skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, + unsigned *plane_data_rate, + unsigned *plane_y_data_rate) { struct drm_crtc_state *cstate = &intel_cstate->base; struct drm_atomic_state *state = cstate->state; - struct drm_crtc *crtc = cstate->crtc; - struct drm_device *dev = crtc->dev; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct drm_plane *plane; const struct intel_plane *intel_plane; const struct drm_plane_state *pstate; @@ -3287,21 +3286,16 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) /* packed/uv */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 0); - intel_cstate->wm.skl.plane_data_rate[id] = rate; + plane_data_rate[id] = rate; + + total_data_rate += rate; /* y-plane */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 1); - intel_cstate->wm.skl.plane_y_data_rate[id] = rate; - } + plane_y_data_rate[id] = rate; - /* Calculate CRTC's total data rate from cached values */ - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { - int id = skl_wm_plane_id(intel_plane); - - /* packed/uv */ - total_data_rate += intel_cstate->wm.skl.plane_data_rate[id]; - total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; + total_data_rate += rate; } return total_data_rate; @@ -3389,6 +3383,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, unsigned int total_data_rate; int num_active; int id, i; + unsigned plane_data_rate[I915_MAX_PLANES] = {}; + unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); @@ -3446,17 +3442,18 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * * FIXME: we may not allocate every single block here. */ - total_data_rate = skl_get_total_relative_data_rate(cstate); + total_data_rate = skl_get_total_relative_data_rate(cstate, + plane_data_rate, + plane_y_data_rate); if (total_data_rate == 0) return 0; start = alloc->start; - for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane) { + for (id = 0; id < I915_MAX_PLANES; id++) { unsigned int data_rate, y_data_rate; uint16_t plane_blocks, y_plane_blocks = 0; - int id = skl_wm_plane_id(intel_plane); - data_rate = cstate->wm.skl.plane_data_rate[id]; + data_rate = plane_data_rate[id]; /* * allocation for (packed formats) or (uv-plane part of planar format): @@ -3478,7 +3475,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, /* * allocation for y_plane part of planar format: */ - y_data_rate = cstate->wm.skl.plane_y_data_rate[id]; + y_data_rate = plane_y_data_rate[id]; y_plane_blocks = y_minimum[id]; y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, -- cgit v1.1 From fefdd8104d6f574677221c4ba2912483c66ee43c Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:33 +0200 Subject: drm/i915/skl+: Remove minimum block allocation from crtc state. This is not required any more now that we get fresh state from drm_atomic_crtc_state_for_each_plane_state. Zero all state in advance. Signed-off-by: Maarten Lankhorst Reviewed-by: Matt Roper Reviewed-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-6-git-send-email-maarten.lankhorst@linux.intel.com --- drivers/gpu/drm/i915/intel_drv.h | 4 ---- drivers/gpu/drm/i915/intel_pm.c | 15 +++++---------- 2 files changed, 5 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 66a1965..35a74fe 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -500,10 +500,6 @@ struct intel_crtc_wm_state { /* gen9+ only needs 1-step wm programming */ struct skl_pipe_wm optimal; struct skl_ddb_entry ddb; - - /* minimum block allocation */ - uint16_t minimum_blocks[I915_MAX_PLANES]; - uint16_t minimum_y_blocks[I915_MAX_PLANES]; } skl; }; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 909e1b1..086bf9e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3378,8 +3378,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; uint16_t alloc_size, start, cursor_blocks; - uint16_t *minimum = cstate->wm.skl.minimum_blocks; - uint16_t *y_minimum = cstate->wm.skl.minimum_y_blocks; + uint16_t minimum[I915_MAX_PLANES] = {}; + uint16_t y_minimum[I915_MAX_PLANES] = {}; unsigned int total_data_rate; int num_active; int id, i; @@ -3416,16 +3416,11 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, intel_plane = to_intel_plane(plane); id = skl_wm_plane_id(intel_plane); - if (!pstate->visible) { - minimum[id] = 0; - y_minimum[id] = 0; + if (!pstate->visible) continue; - } - if (plane->type == DRM_PLANE_TYPE_CURSOR) { - minimum[id] = 0; - y_minimum[id] = 0; + + if (plane->type == DRM_PLANE_TYPE_CURSOR) continue; - } minimum[id] = skl_ddb_min_alloc(pstate, 0); y_minimum[id] = skl_ddb_min_alloc(pstate, 1); -- cgit v1.1 From 49845a7aff5406e10420de08aa34cf55f98f2a3b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:34 +0200 Subject: drm/i915/skl+: Clean up minimum allocations, v2. Move calculating minimum allocations to a helper, which cleans up the code some more. The cursor is still allocated in advance because it doesn't count towards data rate and should always be reserved. changes since v1: - Change comment to have a extra opening line. (Matt) - Rebase to remove unused plane->pipe == pipe, handled by the iterator now. (Paulo) Reviewed-by: Matt Roper Signed-off-by: Maarten Lankhorst Cc: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-7-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Paulo Zanoni --- drivers/gpu/drm/i915/intel_pm.c | 62 +++++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 086bf9e..edd37e7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3364,6 +3364,30 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, return DIV_ROUND_UP((4 * src_w * plane_bpp), 512) * min_scanlines/4 + 3; } +static void +skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, + uint16_t *minimum, uint16_t *y_minimum) +{ + const struct drm_plane_state *pstate; + struct drm_plane *plane; + + drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { + struct intel_plane *intel_plane = to_intel_plane(plane); + int id = skl_wm_plane_id(intel_plane); + + if (id == PLANE_CURSOR) + continue; + + if (!pstate->visible) + continue; + + minimum[id] = skl_ddb_min_alloc(pstate, 0); + y_minimum[id] = skl_ddb_min_alloc(pstate, 1); + } + + minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); +} + static int skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct skl_ddb_allocation *ddb /* out */) @@ -3372,12 +3396,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct drm_crtc *crtc = cstate->base.crtc; struct drm_device *dev = crtc->dev; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct intel_plane *intel_plane; - struct drm_plane *plane; - const struct drm_plane_state *pstate; enum pipe pipe = intel_crtc->pipe; struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; - uint16_t alloc_size, start, cursor_blocks; + uint16_t alloc_size, start; uint16_t minimum[I915_MAX_PLANES] = {}; uint16_t y_minimum[I915_MAX_PLANES] = {}; unsigned int total_data_rate; @@ -3405,32 +3426,22 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } - cursor_blocks = skl_cursor_allocation(num_active); - ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - cursor_blocks; - ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; - - alloc_size -= cursor_blocks; - - /* 1. Allocate the mininum required blocks for each active plane */ - drm_atomic_crtc_state_for_each_plane_state(plane, pstate, &cstate->base) { - intel_plane = to_intel_plane(plane); - id = skl_wm_plane_id(intel_plane); - - if (!pstate->visible) - continue; - - if (plane->type == DRM_PLANE_TYPE_CURSOR) - continue; + skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); - minimum[id] = skl_ddb_min_alloc(pstate, 0); - y_minimum[id] = skl_ddb_min_alloc(pstate, 1); - } + /* + * 1. Allocate the mininum required blocks for each active plane + * and allocate the cursor, it doesn't require extra allocation + * proportional to the data rate. + */ - for (i = 0; i < PLANE_CURSOR; i++) { + for (i = 0; i < I915_MAX_PLANES; i++) { alloc_size -= minimum[i]; alloc_size -= y_minimum[i]; } + ddb->plane[pipe][PLANE_CURSOR].start = alloc->end - minimum[PLANE_CURSOR]; + ddb->plane[pipe][PLANE_CURSOR].end = alloc->end; + /* * 2. Distribute the remaining space in proportion to the amount of * data each plane needs to fetch from memory. @@ -3448,6 +3459,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, unsigned int data_rate, y_data_rate; uint16_t plane_blocks, y_plane_blocks = 0; + if (id == PLANE_CURSOR) + continue; + data_rate = plane_data_rate[id]; /* -- cgit v1.1 From 03af79e0b801fa5ea817f71a22a932d4b5b62b4b Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Wed, 26 Oct 2016 15:41:36 +0200 Subject: drm/i915/gen9+: Use the watermarks from crtc_state for everything, v2. There's no need to keep a duplicate skl_pipe_wm around any more, everything can be discovered from crtc_state, which we pass around correctly now even in case of plane disable. The copy in intel_crtc->wm.skl.active is equal to crtc_state->wm.skl.optimal after the atomic commit completes. It's useful for two-step watermark programming, but not required for gen9+ which does it in a single step. We can pull the old allocation from old_crtc_state. Signed-off-by: Maarten Lankhorst Cc: Matt Roper Reviewed-by: Paulo Zanoni Link: http://patchwork.freedesktop.org/patch/msgid/1477489299-25777-9-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Matt Roper --- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 1 - drivers/gpu/drm/i915/intel_pm.c | 18 ++++++++---------- 3 files changed, 9 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 740c688..5a87dbf 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13458,7 +13458,7 @@ static void verify_wm_state(struct drm_crtc *crtc, return; skl_pipe_wm_get_hw_state(crtc, &hw_wm); - sw_wm = &intel_crtc->wm.active.skl; + sw_wm = &to_intel_crtc_state(new_state)->wm.skl.optimal; skl_ddb_get_hw_state(dev_priv, &hw_ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 35a74fe..8c931bc 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -722,7 +722,6 @@ struct intel_crtc { /* watermarks currently being used */ union { struct intel_pipe_wm ilk; - struct skl_pipe_wm skl; } active; /* allow CxSR on this pipe */ diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index edd37e7..9cfc19e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3930,11 +3930,11 @@ bool skl_ddb_allocation_overlaps(struct drm_atomic_state *state, } static int skl_update_pipe_wm(struct drm_crtc_state *cstate, - struct skl_ddb_allocation *ddb, /* out */ + const struct skl_pipe_wm *old_pipe_wm, struct skl_pipe_wm *pipe_wm, /* out */ + struct skl_ddb_allocation *ddb, /* out */ bool *changed /* out */) { - struct intel_crtc *intel_crtc = to_intel_crtc(cstate->crtc); struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); int ret; @@ -3942,7 +3942,7 @@ static int skl_update_pipe_wm(struct drm_crtc_state *cstate, if (ret) return ret; - if (!memcmp(&intel_crtc->wm.active.skl, pipe_wm, sizeof(*pipe_wm))) + if (!memcmp(old_pipe_wm, pipe_wm, sizeof(*pipe_wm))) *changed = false; else *changed = true; @@ -4169,10 +4169,12 @@ skl_compute_wm(struct drm_atomic_state *state) for_each_crtc_in_state(state, crtc, cstate, i) { struct intel_crtc_state *intel_cstate = to_intel_crtc_state(cstate); + const struct skl_pipe_wm *old_pipe_wm = + &to_intel_crtc_state(crtc->state)->wm.skl.optimal; pipe_wm = &intel_cstate->wm.skl.optimal; - ret = skl_update_pipe_wm(cstate, &results->ddb, pipe_wm, - &changed); + ret = skl_update_pipe_wm(cstate, old_pipe_wm, pipe_wm, + &results->ddb, &changed); if (ret) return ret; @@ -4205,8 +4207,6 @@ static void skl_update_wm(struct drm_crtc *crtc) if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) return; - intel_crtc->wm.active.skl = *pipe_wm; - mutex_lock(&dev_priv->wm.wm_mutex); /* @@ -4374,10 +4374,8 @@ void skl_wm_get_hw_state(struct drm_device *dev) skl_pipe_wm_get_hw_state(crtc, &cstate->wm.skl.optimal); - if (intel_crtc->active) { + if (intel_crtc->active) hw->dirty_pipes |= drm_crtc_mask(crtc); - intel_crtc->wm.active.skl = cstate->wm.skl.optimal; - } } if (dev_priv->active_crtcs) { -- cgit v1.1 From 07c9a21a0d594c3acc0983e5e0a25d2364188d51 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 30 Oct 2016 13:28:20 +0000 Subject: drm/i915: Export a function to flush the context upon pinning For legacy contexts we employ an optimisation to only flush the context when binding into the global GTT. This avoids stalling on the GPU when reloading an active context. Wrap this detail up into a helper and export it for a potential third user. (Longer term, context pinning needs to be reworked as the current handling of switch context pins too late and so risks eviction and corrupting the request. Plans, plans, plans.) v2: Expand the comment explaining the optimisation for avoiding the stall on active contexts. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161030132820.32163-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem_context.c | 39 +++++++++++++++++++++++---------- drivers/gpu/drm/i915/intel_ringbuffer.c | 11 +++++----- 3 files changed, 36 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 086396e..b5bfc4c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3529,6 +3529,9 @@ int i915_gem_context_open(struct drm_device *dev, struct drm_file *file); void i915_gem_context_close(struct drm_device *dev, struct drm_file *file); int i915_switch_context(struct drm_i915_gem_request *req); int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv); +struct i915_vma * +i915_gem_context_pin_legacy(struct i915_gem_context *ctx, + unsigned int flags); void i915_gem_context_free(struct kref *ctx_ref); struct drm_i915_gem_object * i915_gem_alloc_context_obj(struct drm_device *dev, size_t size); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 461aece..6dd4757 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -764,12 +764,36 @@ needs_pd_load_post(struct i915_hw_ppgtt *ppgtt, return false; } +struct i915_vma * +i915_gem_context_pin_legacy(struct i915_gem_context *ctx, + unsigned int flags) +{ + struct i915_vma *vma = ctx->engine[RCS].state; + int ret; + + /* Clear this page out of any CPU caches for coherent swap-in/out. + * We only want to do this on the first bind so that we do not stall + * on an active context (which by nature is already on the GPU). + */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ERR_PTR(ret); + } + + ret = i915_vma_pin(vma, 0, ctx->ggtt_alignment, PIN_GLOBAL | flags); + if (ret) + return ERR_PTR(ret); + + return vma; +} + static int do_rcs_switch(struct drm_i915_gem_request *req) { struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; - struct i915_vma *vma = to->engine[RCS].state; + struct i915_vma *vma; struct i915_gem_context *from; u32 hw_flags; int ret, i; @@ -777,17 +801,10 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; - /* Clear this page out of any CPU caches for coherent swap-in/out. */ - if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { - ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); - if (ret) - return ret; - } - /* Trying to pin first makes error handling easier. */ - ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); - if (ret) - return ret; + vma = i915_gem_context_pin_legacy(to, 0); + if (IS_ERR(vma)) + return PTR_ERR(vma); /* * Pin can switch back to the default context if we end up calling into diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 188fdec..700e93d 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1949,14 +1949,13 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false); - if (ret) - goto error; + struct i915_vma *vma; - ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, - PIN_GLOBAL | PIN_HIGH); - if (ret) + vma = i915_gem_context_pin_legacy(ctx, PIN_HIGH); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto error; + } } /* The kernel context is only used as a placeholder for flushing the -- cgit v1.1 From 580503c7c52a8605c3be008a93a572be840a4f16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:00 +0200 Subject: drm/i915: Pass dev_priv to plane constructors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 40 +++++++++++++++++------------------- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_sprite.c | 12 +++++------ 4 files changed, 27 insertions(+), 29 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b5bfc4c6..ea1cfc6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -185,7 +185,7 @@ enum plane { }; #define plane_name(p) ((p) + 'A') -#define sprite_name(p, s) ((p) * INTEL_INFO(dev)->num_sprites[(p)] + (s) + 'A') +#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A') enum port { PORT_NONE = -1, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5a87dbf..e486567 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -14960,9 +14960,8 @@ const struct drm_plane_funcs intel_plane_funcs = { }; static struct intel_plane * -intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) +intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *primary = NULL; struct intel_plane_state *state = NULL; const uint32_t *intel_primary_formats; @@ -14986,7 +14985,7 @@ intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) primary->can_scale = false; primary->max_downscale = 1; - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { primary->can_scale = true; state->scaler_id = -1; } @@ -14994,10 +14993,10 @@ intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) primary->plane = pipe; primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe); primary->check_plane = intel_check_primary_plane; - if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4) + if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) primary->plane = !pipe; - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { intel_primary_formats = skl_primary_formats; num_formats = ARRAY_SIZE(skl_primary_formats); @@ -15009,7 +15008,7 @@ intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) primary->update_plane = ironlake_update_primary_plane; primary->disable_plane = i9xx_disable_primary_plane; - } else if (INTEL_INFO(dev)->gen >= 4) { + } else if (INTEL_GEN(dev_priv) >= 4) { intel_primary_formats = i965_primary_formats; num_formats = ARRAY_SIZE(i965_primary_formats); @@ -15023,21 +15022,21 @@ intel_primary_plane_create(struct drm_device *dev, enum pipe pipe) primary->disable_plane = i9xx_disable_primary_plane; } - if (INTEL_INFO(dev)->gen >= 9) - ret = drm_universal_plane_init(dev, &primary->base, 0, - &intel_plane_funcs, + if (INTEL_GEN(dev_priv) >= 9) + ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, + 0, &intel_plane_funcs, intel_primary_formats, num_formats, DRM_PLANE_TYPE_PRIMARY, "plane 1%c", pipe_name(pipe)); else if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) - ret = drm_universal_plane_init(dev, &primary->base, 0, - &intel_plane_funcs, + ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, + 0, &intel_plane_funcs, intel_primary_formats, num_formats, DRM_PLANE_TYPE_PRIMARY, "primary %c", pipe_name(pipe)); else - ret = drm_universal_plane_init(dev, &primary->base, 0, - &intel_plane_funcs, + ret = drm_universal_plane_init(&dev_priv->drm, &primary->base, + 0, &intel_plane_funcs, intel_primary_formats, num_formats, DRM_PLANE_TYPE_PRIMARY, "plane %c", plane_name(primary->plane)); @@ -15165,9 +15164,8 @@ intel_update_cursor_plane(struct drm_plane *plane, } static struct intel_plane * -intel_cursor_plane_create(struct drm_device *dev, enum pipe pipe) +intel_cursor_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *cursor = NULL; struct intel_plane_state *state = NULL; int ret; @@ -15195,8 +15193,8 @@ intel_cursor_plane_create(struct drm_device *dev, enum pipe pipe) cursor->update_plane = intel_update_cursor_plane; cursor->disable_plane = intel_disable_cursor_plane; - ret = drm_universal_plane_init(dev, &cursor->base, 0, - &intel_plane_funcs, + ret = drm_universal_plane_init(&dev_priv->drm, &cursor->base, + 0, &intel_plane_funcs, intel_cursor_formats, ARRAY_SIZE(intel_cursor_formats), DRM_PLANE_TYPE_CURSOR, @@ -15210,7 +15208,7 @@ intel_cursor_plane_create(struct drm_device *dev, enum pipe pipe) DRM_ROTATE_0 | DRM_ROTATE_180); - if (INTEL_INFO(dev)->gen >=9) + if (INTEL_GEN(dev_priv) >= 9) state->scaler_id = -1; drm_plane_helper_add(&cursor->base, &intel_plane_helper_funcs); @@ -15272,7 +15270,7 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) skl_init_scalers(dev, intel_crtc, crtc_state); } - primary = intel_primary_plane_create(dev, pipe); + primary = intel_primary_plane_create(dev_priv, pipe); if (IS_ERR(primary)) { ret = PTR_ERR(primary); goto fail; @@ -15281,14 +15279,14 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) for_each_sprite(dev_priv, pipe, sprite) { struct intel_plane *plane; - plane = intel_sprite_plane_create(dev, pipe, sprite); + plane = intel_sprite_plane_create(dev_priv, pipe, sprite); if (!plane) { ret = PTR_ERR(plane); goto fail; } } - cursor = intel_cursor_plane_create(dev, pipe); + cursor = intel_cursor_plane_create(dev_priv, pipe); if (!cursor) { ret = PTR_ERR(cursor); goto fail; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8c931bc..caa91d7 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1769,7 +1769,7 @@ bool intel_sdvo_init(struct drm_device *dev, /* intel_sprite.c */ int intel_usecs_to_scanlines(const struct drm_display_mode *adjusted_mode, int usecs); -struct intel_plane *intel_sprite_plane_create(struct drm_device *dev, +struct intel_plane *intel_sprite_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe, int plane); int intel_sprite_set_colorkey(struct drm_device *dev, void *data, struct drm_file *file_priv); diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 70b50a2..df0fbb4 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -1043,9 +1043,9 @@ static uint32_t skl_plane_formats[] = { }; struct intel_plane * -intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) +intel_sprite_plane_create(struct drm_i915_private *dev_priv, + enum pipe pipe, int plane) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = NULL; struct intel_plane_state *state = NULL; unsigned long possible_crtcs; @@ -1132,14 +1132,14 @@ intel_sprite_plane_create(struct drm_device *dev, enum pipe pipe, int plane) possible_crtcs = (1 << pipe); if (INTEL_GEN(dev_priv) >= 9) - ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs, - &intel_plane_funcs, + ret = drm_universal_plane_init(&dev_priv->drm, &intel_plane->base, + possible_crtcs, &intel_plane_funcs, plane_formats, num_plane_formats, DRM_PLANE_TYPE_OVERLAY, "plane %d%c", plane + 2, pipe_name(pipe)); else - ret = drm_universal_plane_init(dev, &intel_plane->base, possible_crtcs, - &intel_plane_funcs, + ret = drm_universal_plane_init(&dev_priv->drm, &intel_plane->base, + possible_crtcs, &intel_plane_funcs, plane_formats, num_plane_formats, DRM_PLANE_TYPE_OVERLAY, "sprite %c", sprite_name(pipe, plane)); -- cgit v1.1 From 65edcccef3b5ea5a67beebe9a6beeea9a837749f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:01 +0200 Subject: drm/i915: Pass dev_priv to skl_init_scalers() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. While at it let's do some house cleaning: s/intel_foo/foo/ and move things into tighter scope. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-3-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e486567..0beee9e 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -115,8 +115,9 @@ static void chv_prepare_pll(struct intel_crtc *crtc, const struct intel_crtc_state *pipe_config); static void intel_begin_crtc_commit(struct drm_crtc *, struct drm_crtc_state *); static void intel_finish_crtc_commit(struct drm_crtc *, struct drm_crtc_state *); -static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state); +static void skl_init_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state); static void skylake_pfit_enable(struct intel_crtc *crtc); static void ironlake_pfit_disable(struct intel_crtc *crtc, bool force); static void ironlake_pfit_enable(struct intel_crtc *crtc); @@ -10735,10 +10736,8 @@ static bool haswell_get_pipe_config(struct intel_crtc *crtc, I915_READ(GAMMA_MODE(crtc->pipe)) & GAMMA_MODE_MODE_MASK; if (INTEL_INFO(dev)->gen >= 9) { - skl_init_scalers(dev, crtc, pipe_config); - } + skl_init_scalers(dev_priv, crtc, pipe_config); - if (INTEL_INFO(dev)->gen >= 9) { pipe_config->scaler_state.scaler_id = -1; pipe_config->scaler_state.scaler_users &= ~(1 << SKL_CRTC_INDEX); } @@ -15222,17 +15221,19 @@ fail: return ERR_PTR(ret); } -static void skl_init_scalers(struct drm_device *dev, struct intel_crtc *intel_crtc, - struct intel_crtc_state *crtc_state) +static void skl_init_scalers(struct drm_i915_private *dev_priv, + struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) { + struct intel_crtc_scaler_state *scaler_state = + &crtc_state->scaler_state; int i; - struct intel_scaler *intel_scaler; - struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; - for (i = 0; i < intel_crtc->num_scalers; i++) { - intel_scaler = &scaler_state->scalers[i]; - intel_scaler->in_use = 0; - intel_scaler->mode = PS_SCALER_MODE_DYN; + for (i = 0; i < crtc->num_scalers; i++) { + struct intel_scaler *scaler = &scaler_state->scalers[i]; + + scaler->in_use = 0; + scaler->mode = PS_SCALER_MODE_DYN; } scaler_state->scaler_id = -1; @@ -15267,7 +15268,7 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) else intel_crtc->num_scalers = SKL_NUM_SCALERS; - skl_init_scalers(dev, intel_crtc, crtc_state); + skl_init_scalers(dev_priv, intel_crtc, crtc_state); } primary = intel_primary_plane_create(dev_priv, pipe); -- cgit v1.1 From 525b9311be957e319680025e06f9f722ccfd54b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:02 +0200 Subject: drm/i915: Pass intel_crtc to intel_crtc_active() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around intel_crtc instead of drm_crtc. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 8 +++----- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_fbc.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 8 ++++---- 4 files changed, 9 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 0beee9e..00784d9 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1008,10 +1008,8 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock, target_clock, refclk, NULL, best_clock); } -bool intel_crtc_active(struct drm_crtc *crtc) +bool intel_crtc_active(struct intel_crtc *crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - /* Be paranoid as we can arrive here with only partial * state retrieved from the hardware during setup. * @@ -1025,8 +1023,8 @@ bool intel_crtc_active(struct drm_crtc *crtc) * crtc->state->active once we have proper CRTC states wired up * for atomic. */ - return intel_crtc->active && crtc->primary->state->fb && - intel_crtc->config->base.adjusted_mode.crtc_clock; + return crtc->active && crtc->base.primary->state->fb && + crtc->config->base.adjusted_mode.crtc_clock; } enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index caa91d7..67b9052 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1349,7 +1349,7 @@ bool bxt_find_best_dpll(struct intel_crtc_state *crtc_state, int target_clock, struct dpll *best_clock); int chv_calc_dpll_params(int refclk, struct dpll *pll_clock); -bool intel_crtc_active(struct drm_crtc *crtc); +bool intel_crtc_active(struct intel_crtc *crtc); void hsw_enable_ips(struct intel_crtc *crtc); void hsw_disable_ips(struct intel_crtc *crtc); enum intel_display_power_domain diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index cbe2ebd..e230d48 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1306,7 +1306,7 @@ void intel_fbc_init_pipe_state(struct drm_i915_private *dev_priv) return; for_each_intel_crtc(&dev_priv->drm, crtc) - if (intel_crtc_active(&crtc->base) && + if (intel_crtc_active(crtc) && to_intel_plane_state(crtc->base.primary->state)->base.visible) dev_priv->fbc.visible_pipes_mask |= (1 << crtc->pipe); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9cfc19e..419c5eb 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -630,7 +630,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) struct drm_crtc *crtc, *enabled = NULL; for_each_crtc(dev, crtc) { - if (intel_crtc_active(crtc)) { + if (intel_crtc_active(to_intel_crtc(crtc))) { if (enabled) return NULL; enabled = crtc; @@ -725,7 +725,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (!intel_crtc_active(crtc)) { + if (!intel_crtc_active(to_intel_crtc(crtc))) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; @@ -1538,7 +1538,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); - if (intel_crtc_active(crtc)) { + if (intel_crtc_active(to_intel_crtc(crtc))) { const struct drm_display_mode *adjusted_mode; int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); if (IS_GEN2(dev_priv)) @@ -1560,7 +1560,7 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); - if (intel_crtc_active(crtc)) { + if (intel_crtc_active(to_intel_crtc(crtc))) { const struct drm_display_mode *adjusted_mode; int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); if (IS_GEN2(dev_priv)) -- cgit v1.1 From 432081bcbf9a605ea072f6aa3dd0b0420344878c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:03 +0200 Subject: drm/i915: Pass intel_crtc to update_wm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around intel_crtc instead of drm_crtc. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-5-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 14 +++++----- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 50 +++++++++++++++++------------------- 4 files changed, 33 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ea1cfc6..98f5986 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -503,7 +503,7 @@ struct drm_i915_display_funcs { void (*initial_watermarks)(struct intel_crtc_state *cstate); void (*optimize_watermarks)(struct intel_crtc_state *cstate); int (*compute_global_watermarks)(struct drm_atomic_state *state); - void (*update_wm)(struct drm_crtc *crtc); + void (*update_wm)(struct intel_crtc *crtc); int (*modeset_calc_cdclk)(struct drm_atomic_state *state); void (*modeset_commit_cdclk)(struct drm_atomic_state *state); /* Returns the active state of the crtc, and if the crtc is active, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 00784d9..936870d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5073,7 +5073,7 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) crtc->wm.cxsr_allowed = true; if (pipe_config->update_wm_post && pipe_config->base.active) - intel_update_watermarks(&crtc->base); + intel_update_watermarks(crtc); if (old_pri_state) { struct intel_plane_state *primary_state = @@ -5171,7 +5171,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(pipe_config); else if (pipe_config->update_wm_pre) - intel_update_watermarks(&crtc->base); + intel_update_watermarks(crtc); } static void intel_crtc_disable_planes(struct drm_crtc *crtc, unsigned plane_mask) @@ -5491,7 +5491,7 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (dev_priv->display.initial_watermarks != NULL) dev_priv->display.initial_watermarks(pipe_config); else - intel_update_watermarks(crtc); + intel_update_watermarks(intel_crtc); /* XXX: Do the pipe assertions at the right place for BXT DSI. */ if (!transcoder_is_dsi(cpu_transcoder)) @@ -6744,7 +6744,7 @@ static void valleyview_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(crtc); + intel_update_watermarks(intel_crtc); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -6797,7 +6797,7 @@ static void i9xx_crtc_enable(struct intel_crtc_state *pipe_config, intel_color_load_luts(&pipe_config->base); - intel_update_watermarks(crtc); + intel_update_watermarks(intel_crtc); intel_enable_pipe(intel_crtc); assert_vblank_disabled(crtc); @@ -6913,7 +6913,7 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc) encoder->base.crtc = NULL; intel_fbc_disable(intel_crtc); - intel_update_watermarks(crtc); + intel_update_watermarks(intel_crtc); intel_disable_shared_dpll(intel_crtc); domains = intel_crtc->enabled_power_domains; @@ -14397,7 +14397,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_check_pch_fifo_underruns(dev_priv); if (!crtc->state->active) - intel_update_watermarks(crtc); + intel_update_watermarks(intel_crtc); } } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 67b9052..8ae13a6 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1709,7 +1709,7 @@ bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, void intel_init_clock_gating(struct drm_device *dev); void intel_suspend_hw(struct drm_device *dev); int ilk_wm_max_level(const struct drm_i915_private *dev_priv); -void intel_update_watermarks(struct drm_crtc *crtc); +void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_device *dev); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 419c5eb..9bcb15a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -640,9 +640,9 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) return enabled; } -static void pineview_update_wm(struct drm_crtc *unused_crtc) +static void pineview_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->dev; + struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; const struct cxsr_latency *latency; @@ -1330,20 +1330,19 @@ static void vlv_merge_wm(struct drm_device *dev, } } -static void vlv_update_wm(struct drm_crtc *crtc) +static void vlv_update_wm(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - enum pipe pipe = intel_crtc->pipe; + enum pipe pipe = crtc->pipe; struct vlv_wm_values wm = {}; - vlv_compute_wm(intel_crtc); + vlv_compute_wm(crtc); vlv_merge_wm(dev, &wm); if (memcmp(&dev_priv->wm.vlv, &wm, sizeof(wm)) == 0) { /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(intel_crtc); + vlv_pipe_set_fifo_size(crtc); return; } @@ -1359,9 +1358,9 @@ static void vlv_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, false); /* FIXME should be part of crtc atomic commit */ - vlv_pipe_set_fifo_size(intel_crtc); + vlv_pipe_set_fifo_size(crtc); - vlv_write_wm_values(intel_crtc, &wm); + vlv_write_wm_values(crtc, &wm); DRM_DEBUG_KMS("Setting FIFO watermarks - %c: plane=%d, cursor=%d, " "sprite0=%d, sprite1=%d, SR: plane=%d, cursor=%d level=%d cxsr=%d\n", @@ -1385,9 +1384,9 @@ static void vlv_update_wm(struct drm_crtc *crtc) #define single_plane_enabled(mask) is_power_of_2(mask) -static void g4x_update_wm(struct drm_crtc *crtc) +static void g4x_update_wm(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->dev; + struct drm_device *dev = crtc->base.dev; static const int sr_latency_ns = 12000; struct drm_i915_private *dev_priv = to_i915(dev); int planea_wm, planeb_wm, cursora_wm, cursorb_wm; @@ -1443,9 +1442,9 @@ static void g4x_update_wm(struct drm_crtc *crtc) intel_set_memory_cxsr(dev_priv, true); } -static void i965_update_wm(struct drm_crtc *unused_crtc) +static void i965_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->dev; + struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; int srwm = 1; @@ -1517,9 +1516,9 @@ static void i965_update_wm(struct drm_crtc *unused_crtc) #undef FW_WM -static void i9xx_update_wm(struct drm_crtc *unused_crtc) +static void i9xx_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->dev; + struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); const struct intel_watermark_params *wm_info; uint32_t fwater_lo; @@ -1650,9 +1649,9 @@ static void i9xx_update_wm(struct drm_crtc *unused_crtc) intel_set_memory_cxsr(dev_priv, true); } -static void i845_update_wm(struct drm_crtc *unused_crtc) +static void i845_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->dev; + struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *crtc; const struct drm_display_mode *adjusted_mode; @@ -4193,18 +4192,17 @@ skl_compute_wm(struct drm_atomic_state *state) return 0; } -static void skl_update_wm(struct drm_crtc *crtc) +static void skl_update_wm(struct intel_crtc *intel_crtc) { - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); - struct drm_device *dev = crtc->dev; + struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &dev_priv->wm.skl_results; struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; - struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); + struct intel_crtc_state *cstate = to_intel_crtc_state(intel_crtc->base.state); struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; enum pipe pipe = intel_crtc->pipe; - if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) + if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) return; mutex_lock(&dev_priv->wm.wm_mutex); @@ -4215,7 +4213,7 @@ static void skl_update_wm(struct drm_crtc *crtc) * the pipe's shut off, just do so here. Already active pipes will have * their watermarks updated once we update their planes. */ - if (crtc->state->active_changed) { + if (intel_crtc->base.state->active_changed) { int plane; for_each_universal_plane(dev_priv, pipe, plane) @@ -4654,9 +4652,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev) * We don't use the sprite, so we can ignore that. And on Crestline we have * to set the non-SR watermarks to 8. */ -void intel_update_watermarks(struct drm_crtc *crtc) +void intel_update_watermarks(struct intel_crtc *crtc) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); if (dev_priv->display.update_wm) dev_priv->display.update_wm(crtc); -- cgit v1.1 From efc2611e6ecbb14572b55f038a62439b845e3250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:04 +0200 Subject: drm/i915: Use struct intel_crtc in legacy platform wm code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by using intel_crtc instead of drm_crtc. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-6-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 109 ++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 42 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 9bcb15a..f3bd220 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -625,12 +625,12 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, return wm_size; } -static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) +static struct intel_crtc *single_enabled_crtc(struct drm_device *dev) { - struct drm_crtc *crtc, *enabled = NULL; + struct intel_crtc *crtc, *enabled = NULL; - for_each_crtc(dev, crtc) { - if (intel_crtc_active(to_intel_crtc(crtc))) { + for_each_intel_crtc(dev, crtc) { + if (intel_crtc_active(crtc)) { if (enabled) return NULL; enabled = crtc; @@ -644,7 +644,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; + struct intel_crtc *crtc; const struct cxsr_latency *latency; u32 reg; unsigned long wm; @@ -661,8 +661,11 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) crtc = single_enabled_crtc(dev); if (crtc) { - const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; - int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + const struct drm_display_mode *adjusted_mode = + &crtc->config->base.adjusted_mode; + const struct drm_framebuffer *fb = + crtc->base.primary->state->fb; + int cpp = drm_format_plane_cpp(fb->pixel_format, 0); int clock = adjusted_mode->crtc_clock; /* Display SR */ @@ -718,24 +721,26 @@ static bool g4x_compute_wm0(struct drm_device *dev, int *plane_wm, int *cursor_wm) { - struct drm_crtc *crtc; + struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; + const struct drm_framebuffer *fb; int htotal, hdisplay, clock, cpp; int line_time_us, line_count; int entries, tlb_miss; - crtc = intel_get_crtc_for_plane(dev, plane); - if (!intel_crtc_active(to_intel_crtc(crtc))) { + crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, plane)); + if (!intel_crtc_active(crtc)) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; } - adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; + adjusted_mode = &crtc->config->base.adjusted_mode; + fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; - cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + hdisplay = crtc->config->pipe_src_w; + cpp = drm_format_plane_cpp(fb->pixel_format, 0); /* Use the small buffer method to calculate plane watermark */ entries = ((clock * cpp / 1000) * display_latency_ns) / 1000; @@ -750,7 +755,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, /* Use the large buffer method to calculate cursor watermark */ line_time_us = max(htotal * 1000 / clock, 1); line_count = (cursor_latency_ns / line_time_us + 1000) / 1000; - entries = line_count * crtc->cursor->state->crtc_w * cpp; + entries = line_count * crtc->base.cursor->state->crtc_w * cpp; tlb_miss = cursor->fifo_size*cursor->cacheline_size - hdisplay * 8; if (tlb_miss > 0) entries += tlb_miss; @@ -804,8 +809,9 @@ static bool g4x_compute_srwm(struct drm_device *dev, const struct intel_watermark_params *cursor, int *display_wm, int *cursor_wm) { - struct drm_crtc *crtc; + struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; + const struct drm_framebuffer *fb; int hdisplay, htotal, cpp, clock; unsigned long line_time_us; int line_count, line_size; @@ -817,12 +823,13 @@ static bool g4x_compute_srwm(struct drm_device *dev, return false; } - crtc = intel_get_crtc_for_plane(dev, plane); - adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; + crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, plane)); + adjusted_mode = &crtc->config->base.adjusted_mode; + fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; htotal = adjusted_mode->crtc_htotal; - hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; - cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + hdisplay = crtc->config->pipe_src_w; + cpp = drm_format_plane_cpp(fb->pixel_format, 0); line_time_us = max(htotal * 1000 / clock, 1); line_count = (latency_ns / line_time_us + 1000) / 1000; @@ -836,7 +843,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, *display_wm = entries + display->guard_size; /* calculate the self-refresh watermark for display cursor */ - entries = line_count * cpp * crtc->cursor->state->crtc_w; + entries = line_count * cpp * crtc->base.cursor->state->crtc_w; entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; @@ -1446,7 +1453,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; + struct intel_crtc *crtc; int srwm = 1; int cursor_sr = 16; bool cxsr_enabled; @@ -1456,11 +1463,14 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) if (crtc) { /* self-refresh has much higher latency */ static const int sr_latency_ns = 12000; - const struct drm_display_mode *adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = + &crtc->config->base.adjusted_mode; + const struct drm_framebuffer *fb = + crtc->base.primary->state->fb; int clock = adjusted_mode->crtc_clock; int htotal = adjusted_mode->crtc_htotal; - int hdisplay = to_intel_crtc(crtc)->config->pipe_src_w; - int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + int hdisplay = crtc->config->pipe_src_w; + int cpp = drm_format_plane_cpp(fb->pixel_format, 0); unsigned long line_time_us; int entries; @@ -1478,7 +1488,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) entries, srwm); entries = (((sr_latency_ns / line_time_us) + 1000) / 1000) * - cpp * crtc->cursor->state->crtc_w; + cpp * crtc->base.cursor->state->crtc_w; entries = DIV_ROUND_UP(entries, i965_cursor_wm_info.cacheline_size); cursor_sr = i965_cursor_wm_info.fifo_size - @@ -1526,7 +1536,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) int cwm, srwm = 1; int fifo_size; int planea_wm, planeb_wm; - struct drm_crtc *crtc, *enabled = NULL; + struct intel_crtc *crtc, *enabled = NULL; if (IS_I945GM(dev)) wm_info = &i945_wm_info; @@ -1536,14 +1546,19 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_a_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 0); - crtc = intel_get_crtc_for_plane(dev, 0); - if (intel_crtc_active(to_intel_crtc(crtc))) { - const struct drm_display_mode *adjusted_mode; - int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, 0)); + if (intel_crtc_active(crtc)) { + const struct drm_display_mode *adjusted_mode = + &crtc->config->base.adjusted_mode; + const struct drm_framebuffer *fb = + crtc->base.primary->state->fb; + int cpp; + if (IS_GEN2(dev_priv)) cpp = 4; + else + cpp = drm_format_plane_cpp(fb->pixel_format, 0); - adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, wm_info, fifo_size, cpp, pessimal_latency_ns); @@ -1558,14 +1573,19 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_bc_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 1); - crtc = intel_get_crtc_for_plane(dev, 1); - if (intel_crtc_active(to_intel_crtc(crtc))) { - const struct drm_display_mode *adjusted_mode; - int cpp = drm_format_plane_cpp(crtc->primary->state->fb->pixel_format, 0); + crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, 1)); + if (intel_crtc_active(crtc)) { + const struct drm_display_mode *adjusted_mode = + &crtc->config->base.adjusted_mode; + const struct drm_framebuffer *fb = + crtc->base.primary->state->fb; + int cpp; + if (IS_GEN2(dev_priv)) cpp = 4; + else + cpp = drm_format_plane_cpp(fb->pixel_format, 0); - adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; planeb_wm = intel_calculate_wm(adjusted_mode->crtc_clock, wm_info, fifo_size, cpp, pessimal_latency_ns); @@ -1584,7 +1604,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (IS_I915GM(dev_priv) && enabled) { struct drm_i915_gem_object *obj; - obj = intel_fb_obj(enabled->primary->state->fb); + obj = intel_fb_obj(enabled->base.primary->state->fb); /* self-refresh seems busted with untiled */ if (!i915_gem_object_is_tiled(obj)) @@ -1603,16 +1623,21 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (HAS_FW_BLC(dev) && enabled) { /* self-refresh has much higher latency */ static const int sr_latency_ns = 6000; - const struct drm_display_mode *adjusted_mode = &to_intel_crtc(enabled)->config->base.adjusted_mode; + const struct drm_display_mode *adjusted_mode = + &enabled->config->base.adjusted_mode; + const struct drm_framebuffer *fb = + enabled->base.primary->state->fb; int clock = adjusted_mode->crtc_clock; int htotal = adjusted_mode->crtc_htotal; - int hdisplay = to_intel_crtc(enabled)->config->pipe_src_w; - int cpp = drm_format_plane_cpp(enabled->primary->state->fb->pixel_format, 0); + int hdisplay = enabled->config->pipe_src_w; + int cpp; unsigned long line_time_us; int entries; if (IS_I915GM(dev_priv) || IS_I945GM(dev_priv)) cpp = 4; + else + cpp = drm_format_plane_cpp(fb->pixel_format, 0); line_time_us = max(htotal * 1000 / clock, 1); @@ -1653,7 +1678,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) { struct drm_device *dev = unused_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc; + struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; uint32_t fwater_lo; int planea_wm; @@ -1662,7 +1687,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) if (crtc == NULL) return; - adjusted_mode = &to_intel_crtc(crtc)->config->base.adjusted_mode; + adjusted_mode = &crtc->config->base.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, dev_priv->display.get_fifo_size(dev, 0), -- cgit v1.1 From e2af48c66b41fecbec5bc1b07640c2419a724c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:05 +0200 Subject: drm/i915: Store struct intel_crtc * in {pipe,plane}_to_crtc_mapping[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A lot of users of the {pipe,plane}_to_crtc_mapping[] will end up casting the result to intel_crtc, so let's just store the intel_crtc pointer in the first place. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-7-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 9 ++-- drivers/gpu/drm/i915/i915_drv.h | 4 +- drivers/gpu/drm/i915/i915_irq.c | 12 +++--- drivers/gpu/drm/i915/intel_display.c | 68 ++++++++++++++---------------- drivers/gpu/drm/i915/intel_drv.h | 7 ++- drivers/gpu/drm/i915/intel_dvo.c | 4 +- drivers/gpu/drm/i915/intel_fifo_underrun.c | 22 +++++----- drivers/gpu/drm/i915/intel_lvds.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 10 ++--- 9 files changed, 64 insertions(+), 76 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bf19192..1696f7b 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4065,8 +4065,7 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, bool enable) { struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[PIPE_A]); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[PIPE_A]; struct intel_crtc_state *pipe_config; struct drm_atomic_state *state; int ret = 0; @@ -4134,8 +4133,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, { struct drm_device *dev = &dev_priv->drm; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - struct intel_crtc *crtc = - to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); enum intel_display_power_domain power_domain; u32 val = 0; /* shut up gcc */ int ret; @@ -4206,8 +4204,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, /* real source -> none transition */ if (source == INTEL_PIPE_CRC_SOURCE_NONE) { struct intel_pipe_crc_entry *entries; - struct intel_crtc *crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; DRM_DEBUG_DRIVER("stopping CRCs for pipe %c\n", pipe_name(pipe)); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 98f5986..eeed752 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1935,8 +1935,8 @@ struct drm_i915_private { /* Kernel Modesetting */ - struct drm_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; - struct drm_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES]; + struct intel_crtc *plane_to_crtc_mapping[I915_MAX_PIPES]; + struct intel_crtc *pipe_to_crtc_mapping[I915_MAX_PIPES]; wait_queue_head_t pending_flip_queue; #ifdef CONFIG_DEBUG_FS diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 4dda2b1..696dc6f 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -725,8 +725,7 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; - struct intel_crtc *intel_crtc = - to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; const struct drm_display_mode *mode = &intel_crtc->base.hwmode; htotal = mode->crtc_htotal; @@ -831,8 +830,7 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, const struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; bool in_vbl = true; @@ -967,7 +965,7 @@ static int i915_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe, struct timeval *vblank_time, unsigned flags) { - struct drm_crtc *crtc; + struct intel_crtc *crtc; if (pipe >= INTEL_INFO(dev)->num_pipes) { DRM_ERROR("Invalid crtc %u\n", pipe); @@ -981,7 +979,7 @@ static int i915_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe, return -EINVAL; } - if (!crtc->hwmode.crtc_clock) { + if (!crtc->base.hwmode.crtc_clock) { DRM_DEBUG_KMS("crtc %u is disabled\n", pipe); return -EBUSY; } @@ -989,7 +987,7 @@ static int i915_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe, /* Helper routine in DRM core does all the work: */ return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, vblank_time, flags, - &crtc->hwmode); + &crtc->base.hwmode); } static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 936870d..bed201a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1030,10 +1030,9 @@ bool intel_crtc_active(struct intel_crtc *crtc) enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - return intel_crtc->config->cpu_transcoder; + return crtc->config->cpu_transcoder; } static bool pipe_dsl_stopped(struct drm_device *dev, enum pipe pipe) @@ -1786,8 +1785,7 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; i915_reg_t reg; uint32_t val, pipeconf_val; @@ -7073,7 +7071,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, if (pipe_config->fdi_lanes <= 2) return 0; - other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_C)); + other_crtc = intel_get_crtc_for_pipe(dev, PIPE_C); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) @@ -7092,7 +7090,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, return -EINVAL; } - other_crtc = to_intel_crtc(intel_get_crtc_for_pipe(dev, PIPE_B)); + other_crtc = intel_get_crtc_for_pipe(dev, PIPE_B); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) @@ -8107,8 +8105,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe, const struct dpll *dpll) { - struct intel_crtc *crtc = - to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); struct intel_crtc_state *pipe_config; pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); @@ -11663,8 +11660,7 @@ static bool pageflip_finished(struct intel_crtc *crtc, void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_flip_work *work; unsigned long flags; @@ -11677,12 +11673,12 @@ void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) * lost pageflips) so needs the full irqsave spinlocks. */ spin_lock_irqsave(&dev->event_lock, flags); - work = intel_crtc->flip_work; + work = crtc->flip_work; if (work != NULL && !is_mmio_work(work) && - pageflip_finished(intel_crtc, work)) - page_flip_completed(intel_crtc); + pageflip_finished(crtc, work)) + page_flip_completed(crtc); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -11690,8 +11686,7 @@ void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_flip_work *work; unsigned long flags; @@ -11704,12 +11699,12 @@ void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe) * lost pageflips) so needs the full irqsave spinlocks. */ spin_lock_irqsave(&dev->event_lock, flags); - work = intel_crtc->flip_work; + work = crtc->flip_work; if (work != NULL && is_mmio_work(work) && - pageflip_finished(intel_crtc, work)) - page_flip_completed(intel_crtc); + pageflip_finished(crtc, work)) + page_flip_completed(crtc); spin_unlock_irqrestore(&dev->event_lock, flags); } @@ -12121,8 +12116,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; struct intel_flip_work *work; WARN_ON(!in_interrupt()); @@ -12131,19 +12125,19 @@ void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) return; spin_lock(&dev->event_lock); - work = intel_crtc->flip_work; + work = crtc->flip_work; if (work != NULL && !is_mmio_work(work) && - __pageflip_stall_check_cs(dev_priv, intel_crtc, work)) { + __pageflip_stall_check_cs(dev_priv, crtc, work)) { WARN_ONCE(1, "Kicking stuck page flip: queued at %d, now %d\n", - work->flip_queued_vblank, intel_crtc_get_vblank_counter(intel_crtc)); - page_flip_completed(intel_crtc); + work->flip_queued_vblank, intel_crtc_get_vblank_counter(crtc)); + page_flip_completed(crtc); work = NULL; } if (work != NULL && !is_mmio_work(work) && - intel_crtc_get_vblank_counter(intel_crtc) - work->flip_queued_vblank > 1) + intel_crtc_get_vblank_counter(crtc) - work->flip_queued_vblank > 1) intel_queue_rps_boost_for_request(work->flip_queued_req); spin_unlock(&dev->event_lock); } @@ -14181,22 +14175,22 @@ static void intel_atomic_wait_for_vblanks(struct drm_device *dev, return; for_each_pipe(dev_priv, pipe) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; if (!((1 << pipe) & crtc_mask)) continue; - ret = drm_crtc_vblank_get(crtc); + ret = drm_crtc_vblank_get(&crtc->base); if (WARN_ON(ret != 0)) { crtc_mask &= ~(1 << pipe); continue; } - last_vblank_count[pipe] = drm_crtc_vblank_count(crtc); + last_vblank_count[pipe] = drm_crtc_vblank_count(&crtc->base); } for_each_pipe(dev_priv, pipe) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; long lret; if (!((1 << pipe) & crtc_mask)) @@ -14204,12 +14198,12 @@ static void intel_atomic_wait_for_vblanks(struct drm_device *dev, lret = wait_event_timeout(dev->vblank[pipe].queue, last_vblank_count[pipe] != - drm_crtc_vblank_count(crtc), + drm_crtc_vblank_count(&crtc->base), msecs_to_jiffies(50)); WARN(!lret, "pipe %c vblank wait timed out\n", pipe_name(pipe)); - drm_crtc_vblank_put(crtc); + drm_crtc_vblank_put(&crtc->base); } } @@ -15317,8 +15311,8 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || dev_priv->plane_to_crtc_mapping[intel_crtc->plane] != NULL); - dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = &intel_crtc->base; - dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = &intel_crtc->base; + dev_priv->plane_to_crtc_mapping[intel_crtc->plane] = intel_crtc; + dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc; drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -16871,7 +16865,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { - crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + encoder->base.crtc = &crtc->base; crtc->config->output_types |= 1 << encoder->type; encoder->get_config(encoder, crtc->config); @@ -16972,7 +16967,8 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } for_each_pipe(dev_priv, pipe) { - crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + intel_sanitize_crtc(crtc); intel_dump_pipe_config(crtc, crtc->config, "[setup_hw_state]"); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8ae13a6..58e33c3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1028,14 +1028,14 @@ vlv_pipe_to_channel(enum pipe pipe) } } -static inline struct drm_crtc * +static inline struct intel_crtc * intel_get_crtc_for_pipe(struct drm_device *dev, int pipe) { struct drm_i915_private *dev_priv = to_i915(dev); return dev_priv->pipe_to_crtc_mapping[pipe]; } -static inline struct drm_crtc * +static inline struct intel_crtc * intel_get_crtc_for_plane(struct drm_device *dev, int plane) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -1251,8 +1251,7 @@ intel_wait_for_vblank(struct drm_device *dev, int pipe) static inline void intel_wait_for_vblank_if_active(struct drm_device *dev, int pipe) { - const struct intel_crtc *crtc = - to_intel_crtc(intel_get_crtc_for_pipe(dev, pipe)); + const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); if (crtc->active) intel_wait_for_vblank(dev, pipe); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index cd57490..61d78fc 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -393,12 +393,12 @@ intel_dvo_get_current_mode(struct drm_connector *connector) * its timings to get how the BIOS set up the panel. */ if (dvo_val & DVO_ENABLE) { - struct drm_crtc *crtc; + struct intel_crtc *crtc; int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; crtc = intel_get_crtc_for_pipe(dev, pipe); if (crtc) { - mode = intel_crtc_mode_get(dev, crtc); + mode = intel_crtc_mode_get(dev, &crtc->base); if (mode) { mode->type |= DRM_MODE_TYPE_PREFERRED; if (dvo_val & DVO_HSYNC_ACTIVE_HIGH) diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 3018f4f..34f93f1 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -57,7 +57,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) assert_spin_locked(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; if (crtc->cpu_fifo_underrun_disabled) return false; @@ -75,7 +75,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) assert_spin_locked(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; if (crtc->pch_fifo_underrun_disabled) return false; @@ -245,14 +245,13 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; bool old; assert_spin_locked(&dev_priv->irq_lock); - old = !intel_crtc->cpu_fifo_underrun_disabled; - intel_crtc->cpu_fifo_underrun_disabled = !enable; + old = !crtc->cpu_fifo_underrun_disabled; + crtc->cpu_fifo_underrun_disabled = !enable; if (HAS_GMCH_DISPLAY(dev_priv)) i9xx_set_fifo_underrun_reporting(dev, pipe, enable, old); @@ -314,8 +313,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder, bool enable) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder]; - struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder]; unsigned long flags; bool old; @@ -330,8 +328,8 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, spin_lock_irqsave(&dev_priv->irq_lock, flags); - old = !intel_crtc->pch_fifo_underrun_disabled; - intel_crtc->pch_fifo_underrun_disabled = !enable; + old = !crtc->pch_fifo_underrun_disabled; + crtc->pch_fifo_underrun_disabled = !enable; if (HAS_PCH_IBX(dev_priv)) ibx_set_fifo_underrun_reporting(&dev_priv->drm, @@ -358,7 +356,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; /* We may be called too early in init, thanks BIOS! */ if (crtc == NULL) @@ -366,7 +364,7 @@ void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, /* GMCH can't disable fifo underruns, filter them. */ if (HAS_GMCH_DISPLAY(dev_priv) && - to_intel_crtc(crtc)->cpu_fifo_underrun_disabled) + crtc->cpu_fifo_underrun_disabled) return; if (intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false)) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 199b90c..81ed32b 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -985,7 +985,7 @@ void intel_lvds_init(struct drm_device *dev) struct drm_display_mode *fixed_mode = NULL; struct drm_display_mode *downclock_mode = NULL; struct edid *edid; - struct drm_crtc *crtc; + struct intel_crtc *crtc; i915_reg_t lvds_reg; u32 lvds; int pipe; @@ -1166,7 +1166,7 @@ void intel_lvds_init(struct drm_device *dev) crtc = intel_get_crtc_for_pipe(dev, pipe); if (crtc && (lvds & LVDS_PORT_EN)) { - fixed_mode = intel_crtc_mode_get(dev, crtc); + fixed_mode = intel_crtc_mode_get(dev, &crtc->base); if (fixed_mode) { DRM_DEBUG_KMS("using current (BIOS) mode: "); drm_mode_debug_printmodeline(fixed_mode); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f3bd220..8d1040c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -728,7 +728,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int line_time_us, line_count; int entries, tlb_miss; - crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, plane)); + crtc = intel_get_crtc_for_plane(dev, plane); if (!intel_crtc_active(crtc)) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; @@ -823,7 +823,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, return false; } - crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, plane)); + crtc = intel_get_crtc_for_plane(dev, plane); adjusted_mode = &crtc->config->base.adjusted_mode; fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; @@ -1546,7 +1546,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_a_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 0); - crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, 0)); + crtc = intel_get_crtc_for_plane(dev, 0); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -1573,7 +1573,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_bc_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 1); - crtc = to_intel_crtc(intel_get_crtc_for_plane(dev, 1)); + crtc = intel_get_crtc_for_plane(dev, 1); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -3066,7 +3066,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* Since we're now guaranteed to only have one active CRTC... */ pipe = ffs(intel_state->active_crtcs) - 1; - crtc = to_intel_crtc(dev_priv->pipe_to_crtc_mapping[pipe]); + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; cstate = to_intel_crtc_state(crtc->base.state); if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) -- cgit v1.1 From 0f0f74bc83aa2902914d6c4ecee3d5d37338f591 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:06 +0200 Subject: drm/i915: Pass dev_priv to intel_wait_for_vblank() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-8-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 2 +- drivers/gpu/drm/i915/intel_crt.c | 2 +- drivers/gpu/drm/i915/intel_display.c | 30 ++++++++++++++++-------------- drivers/gpu/drm/i915/intel_dp.c | 18 +++++++++--------- drivers/gpu/drm/i915/intel_drv.h | 10 +++++----- drivers/gpu/drm/i915/intel_hdmi.c | 2 +- drivers/gpu/drm/i915/intel_sdvo.c | 4 ++-- drivers/gpu/drm/i915/intel_tv.c | 6 +++--- 8 files changed, 38 insertions(+), 36 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1696f7b..ce6aa7e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4211,7 +4211,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, drm_modeset_lock(&crtc->base.mutex, NULL); if (crtc->base.state->active) - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); drm_modeset_unlock(&crtc->base.mutex); spin_lock_irq(&pipe_crc->lock); diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index a97151f..30eb95b 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -573,7 +573,7 @@ intel_crt_load_detect(struct intel_crt *crt, uint32_t pipe) POSTING_READ(pipeconf_reg); /* Wait for next Vblank to substitue * border color for Color info */ - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); st00 = I915_READ8(_VGA_MSR_WRITE); status = ((st00 & (1 << 4)) != 0) ? connector_status_connected : diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index bed201a..50901b0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4249,6 +4249,7 @@ static void ironlake_fdi_disable(struct drm_crtc *crtc) bool intel_has_pending_fb_unpin(struct drm_device *dev) { + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc; /* Note that we don't need to be called with mode_config.lock here @@ -4263,7 +4264,7 @@ bool intel_has_pending_fb_unpin(struct drm_device *dev) continue; if (crtc->flip_work) - intel_wait_for_vblank(dev, crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); return true; } @@ -4940,7 +4941,7 @@ void hsw_disable_ips(struct intel_crtc *crtc) } /* We need to wait for a vblank before we can disable the plane. */ - intel_wait_for_vblank(dev, crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); } static void intel_crtc_dpms_overlay_disable(struct intel_crtc *intel_crtc) @@ -5052,7 +5053,7 @@ intel_pre_disable_primary_noatomic(struct drm_crtc *crtc) if (HAS_GMCH_DISPLAY(dev_priv)) { intel_set_memory_cxsr(dev_priv, false); dev_priv->wm.vlv.cxsr = false; - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); } } @@ -5129,7 +5130,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) if (old_crtc_state->base.active) { intel_set_memory_cxsr(dev_priv, false); dev_priv->wm.vlv.cxsr = false; - intel_wait_for_vblank(dev, crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); } } @@ -5142,7 +5143,7 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state) */ if (pipe_config->disable_lp_wm) { ilk_disable_lp_wm(dev); - intel_wait_for_vblank(dev, crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); } /* @@ -5397,7 +5398,7 @@ static void ironlake_crtc_enable(struct intel_crtc_state *pipe_config, /* Must wait for vblank to avoid spurious PCH FIFO underruns */ if (intel_crtc->config->has_pch_encoder) - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true); } @@ -5507,8 +5508,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, intel_encoders_enable(crtc, pipe_config, old_state); if (intel_crtc->config->has_pch_encoder) { - intel_wait_for_vblank(dev, pipe); - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); + intel_wait_for_vblank(dev_priv, pipe); intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true); intel_set_pch_fifo_underrun_reporting(dev_priv, TRANSCODER_A, true); @@ -5518,8 +5519,8 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, * to change the workaround. */ hsw_workaround_pipe = pipe_config->hsw_workaround_pipe; if (IS_HASWELL(dev_priv) && hsw_workaround_pipe != INVALID_PIPE) { - intel_wait_for_vblank(dev, hsw_workaround_pipe); - intel_wait_for_vblank(dev, hsw_workaround_pipe); + intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); + intel_wait_for_vblank(dev_priv, hsw_workaround_pipe); } } @@ -6833,7 +6834,7 @@ static void i9xx_crtc_disable(struct intel_crtc_state *old_crtc_state, * wait for planes to fully turn off before disabling the pipe. */ if (IS_GEN2(dev_priv)) - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); intel_encoders_disable(crtc, old_crtc_state, old_state); @@ -11129,6 +11130,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, struct drm_encoder *encoder = &intel_encoder->base; struct drm_crtc *crtc = NULL; struct drm_device *dev = encoder->dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_framebuffer *fb; struct drm_mode_config *config = &dev->mode_config; struct drm_atomic_state *state = NULL, *restore_state = NULL; @@ -11281,7 +11283,7 @@ found: old->restore_state = restore_state; /* let the connector get through one full cycle before testing */ - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); return true; fail: @@ -14277,7 +14279,7 @@ static void intel_update_crtcs(struct drm_atomic_state *state, static void skl_update_crtcs(struct drm_atomic_state *state, unsigned int *crtc_vblank_mask) { - struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); struct drm_crtc *crtc; struct intel_crtc *intel_crtc; @@ -14328,7 +14330,7 @@ static void skl_update_crtcs(struct drm_atomic_state *state, crtc_vblank_mask); if (vbl_wait) - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); progress = true; } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 1063afe..9c1921a 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -2364,7 +2364,7 @@ static void ironlake_edp_pll_on(struct intel_dp *intel_dp, * 2. Program DP PLL enable */ if (IS_GEN5(dev_priv)) - intel_wait_for_vblank_if_active(&dev_priv->drm, !crtc->pipe); + intel_wait_for_vblank_if_active(dev_priv, !crtc->pipe); intel_dp->DP |= DP_PLL_ENABLE; @@ -3487,7 +3487,7 @@ intel_dp_link_down(struct intel_dp *intel_dp) I915_WRITE(intel_dp->output_reg, DP); POSTING_READ(intel_dp->output_reg); - intel_wait_for_vblank_if_active(&dev_priv->drm, PIPE_A); + intel_wait_for_vblank_if_active(dev_priv, PIPE_A); intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } @@ -3667,7 +3667,7 @@ intel_dp_configure_mst(struct intel_dp *intel_dp) static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; int ret = 0; @@ -3688,7 +3688,7 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) } do { - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { @@ -3711,7 +3711,7 @@ static int intel_dp_sink_crc_stop(struct intel_dp *intel_dp) static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; int ret; @@ -3739,14 +3739,14 @@ static int intel_dp_sink_crc_start(struct intel_dp *intel_dp) return -EIO; } - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); return 0; } int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); struct intel_crtc *intel_crtc = to_intel_crtc(dig_port->base.base.crtc); u8 buf; int count, ret; @@ -3757,7 +3757,7 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc) return ret; do { - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, &buf) < 0) { @@ -3990,7 +3990,7 @@ intel_dp_retrain_link(struct intel_dp *intel_dp) intel_dp_stop_link_train(intel_dp); /* Keep underrun reporting disabled until things are stable */ - intel_wait_for_vblank(&dev_priv->drm, crtc->pipe); + intel_wait_for_vblank(dev_priv, crtc->pipe); intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); if (crtc->config->has_pch_encoder) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 58e33c3..6c096e4 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1244,17 +1244,17 @@ intel_crtc_has_dp_encoder(const struct intel_crtc_state *crtc_state) (1 << INTEL_OUTPUT_EDP)); } static inline void -intel_wait_for_vblank(struct drm_device *dev, int pipe) +intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) { - drm_wait_one_vblank(dev, pipe); + drm_wait_one_vblank(&dev_priv->drm, pipe); } static inline void -intel_wait_for_vblank_if_active(struct drm_device *dev, int pipe) +intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) { - const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); + const struct intel_crtc *crtc = intel_get_crtc_for_pipe(&dev_priv->drm, pipe); if (crtc->active) - intel_wait_for_vblank(dev, pipe); + intel_wait_for_vblank(dev_priv, pipe); } u32 intel_crtc_get_vblank_counter(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index af8715f..35ada4e 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -1164,7 +1164,7 @@ static void intel_disable_hdmi(struct intel_encoder *encoder, I915_WRITE(intel_hdmi->hdmi_reg, temp); POSTING_READ(intel_hdmi->hdmi_reg); - intel_wait_for_vblank_if_active(&dev_priv->drm, PIPE_A); + intel_wait_for_vblank_if_active(dev_priv, PIPE_A); intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 49fb95d..5d59a48 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1472,7 +1472,7 @@ static void intel_disable_sdvo(struct intel_encoder *encoder, temp &= ~SDVO_ENABLE; intel_sdvo_write_sdvox(intel_sdvo, temp); - intel_wait_for_vblank_if_active(&dev_priv->drm, PIPE_A); + intel_wait_for_vblank_if_active(dev_priv, PIPE_A); intel_set_cpu_fifo_underrun_reporting(dev_priv, PIPE_A, true); intel_set_pch_fifo_underrun_reporting(dev_priv, PIPE_A, true); } @@ -1509,7 +1509,7 @@ static void intel_enable_sdvo(struct intel_encoder *encoder, intel_sdvo_write_sdvox(intel_sdvo, temp); for (i = 0; i < 2; i++) - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); success = intel_sdvo_get_trained_inputs(intel_sdvo, &input1, &input2); /* Warn if the device reported failure to sync. diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index 7118fb55..9212f00 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -856,7 +856,7 @@ intel_enable_tv(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(dev); /* Prevents vblank waits from timing out in intel_tv_detect_type() */ - intel_wait_for_vblank(encoder->base.dev, + intel_wait_for_vblank(dev_priv, to_intel_crtc(encoder->base.crtc)->pipe); I915_WRITE(TV_CTL, I915_READ(TV_CTL) | TV_ENC_ENABLE); @@ -1238,7 +1238,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, I915_WRITE(TV_DAC, tv_dac); POSTING_READ(TV_DAC); - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); type = -1; tv_dac = I915_READ(TV_DAC); @@ -1268,7 +1268,7 @@ intel_tv_detect_type(struct intel_tv *intel_tv, POSTING_READ(TV_CTL); /* For unknown reasons the hw barfs if we don't do this vblank wait. */ - intel_wait_for_vblank(dev, intel_crtc->pipe); + intel_wait_for_vblank(dev_priv, intel_crtc->pipe); /* Restore interrupt config */ if (connector->polled & DRM_CONNECTOR_POLL_HPD) { -- cgit v1.1 From 30ad9814d5ce93423a0f1e6206ae94f15ed95a24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:07 +0200 Subject: drm/i915: Pass dev_priv to vlv force pll functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-9-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 14 +++++++------- drivers/gpu/drm/i915/intel_dp.c | 7 +++---- drivers/gpu/drm/i915/intel_drv.h | 4 ++-- 3 files changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 50901b0..419c6a2 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8103,10 +8103,10 @@ static void chv_prepare_pll(struct intel_crtc *crtc, * in cases where we need the PLL enabled even when @pipe is not going to * be enabled. */ -int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe, +int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, const struct dpll *dpll) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); + struct intel_crtc *crtc = intel_get_crtc_for_pipe(&dev_priv->drm, pipe); struct intel_crtc_state *pipe_config; pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); @@ -8117,7 +8117,7 @@ int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe, pipe_config->pixel_multiplier = 1; pipe_config->dpll = *dpll; - if (IS_CHERRYVIEW(to_i915(dev))) { + if (IS_CHERRYVIEW(dev_priv)) { chv_compute_dpll(crtc, pipe_config); chv_prepare_pll(crtc, pipe_config); chv_enable_pll(crtc, pipe_config); @@ -8140,12 +8140,12 @@ int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe, * Disable the PLL for @pipe. To be used in cases where we need * the PLL enabled even when @pipe is not going to be enabled. */ -void vlv_force_pll_off(struct drm_device *dev, enum pipe pipe) +void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe) { - if (IS_CHERRYVIEW(to_i915(dev))) - chv_disable_pll(to_i915(dev), pipe); + if (IS_CHERRYVIEW(dev_priv)) + chv_disable_pll(dev_priv, pipe); else - vlv_disable_pll(to_i915(dev), pipe); + vlv_disable_pll(dev_priv, pipe); } static void i9xx_compute_dpll(struct intel_crtc *crtc, diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9c1921a..9df331b 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -395,8 +395,7 @@ static void vlv_power_sequencer_kick(struct intel_dp *intel_dp) { struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = intel_dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum pipe pipe = intel_dp->pps_pipe; bool pll_enabled, release_cl_override = false; enum dpio_phy phy = DPIO_PHY(pipe); @@ -434,7 +433,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) release_cl_override = IS_CHERRYVIEW(dev_priv) && !chv_phy_powergate_ch(dev_priv, phy, ch, true); - if (vlv_force_pll_on(dev, pipe, IS_CHERRYVIEW(dev_priv) ? + if (vlv_force_pll_on(dev_priv, pipe, IS_CHERRYVIEW(dev_priv) ? &chv_dpll[0].dpll : &vlv_dpll[0].dpll)) { DRM_ERROR("Failed to force on pll for pipe %c!\n", pipe_name(pipe)); @@ -458,7 +457,7 @@ vlv_power_sequencer_kick(struct intel_dp *intel_dp) POSTING_READ(intel_dp->output_reg); if (!pll_enabled) { - vlv_force_pll_off(dev, pipe); + vlv_force_pll_off(dev_priv, pipe); if (release_cl_override) chv_phy_powergate_ch(dev_priv, phy, ch, false); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 6c096e4..7c0af57 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1301,9 +1301,9 @@ unsigned int intel_tile_height(const struct drm_i915_private *dev_priv, void assert_pch_transcoder_disabled(struct drm_i915_private *dev_priv, enum pipe pipe); -int vlv_force_pll_on(struct drm_device *dev, enum pipe pipe, +int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, const struct dpll *dpll); -void vlv_force_pll_off(struct drm_device *dev, enum pipe pipe); +void vlv_force_pll_off(struct drm_i915_private *dev_priv, enum pipe pipe); int lpt_get_iclkip(struct drm_i915_private *dev_priv); /* modesetting asserts */ -- cgit v1.1 From f0ce23104012d7a153a079890d6070c2a184f8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:08 +0200 Subject: drm/i915: Pass dev_priv to g4x wm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-10-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8d1040c..c9990a9 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -712,7 +712,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) } } -static bool g4x_compute_wm0(struct drm_device *dev, +static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, int plane, const struct intel_watermark_params *display, int display_latency_ns, @@ -728,7 +728,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int line_time_us, line_count; int entries, tlb_miss; - crtc = intel_get_crtc_for_plane(dev, plane); + crtc = intel_get_crtc_for_plane(&dev_priv->drm, plane); if (!intel_crtc_active(crtc)) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; @@ -774,7 +774,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, * can be programmed into the associated watermark register, that watermark * must be disabled. */ -static bool g4x_check_srwm(struct drm_device *dev, +static bool g4x_check_srwm(struct drm_i915_private *dev_priv, int display_wm, int cursor_wm, const struct intel_watermark_params *display, const struct intel_watermark_params *cursor) @@ -802,7 +802,7 @@ static bool g4x_check_srwm(struct drm_device *dev, return true; } -static bool g4x_compute_srwm(struct drm_device *dev, +static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, int plane, int latency_ns, const struct intel_watermark_params *display, @@ -823,7 +823,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, return false; } - crtc = intel_get_crtc_for_plane(dev, plane); + crtc = intel_get_crtc_for_plane(&dev_priv->drm, plane); adjusted_mode = &crtc->config->base.adjusted_mode; fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; @@ -847,7 +847,7 @@ static bool g4x_compute_srwm(struct drm_device *dev, entries = DIV_ROUND_UP(entries, cursor->cacheline_size); *cursor_wm = entries + cursor->guard_size; - return g4x_check_srwm(dev, + return g4x_check_srwm(dev_priv, *display_wm, *cursor_wm, display, cursor); } @@ -1401,20 +1401,20 @@ static void g4x_update_wm(struct intel_crtc *crtc) unsigned int enabled = 0; bool cxsr_enabled; - if (g4x_compute_wm0(dev, PIPE_A, + if (g4x_compute_wm0(dev_priv, PIPE_A, &g4x_wm_info, pessimal_latency_ns, &g4x_cursor_wm_info, pessimal_latency_ns, &planea_wm, &cursora_wm)) enabled |= 1 << PIPE_A; - if (g4x_compute_wm0(dev, PIPE_B, + if (g4x_compute_wm0(dev_priv, PIPE_B, &g4x_wm_info, pessimal_latency_ns, &g4x_cursor_wm_info, pessimal_latency_ns, &planeb_wm, &cursorb_wm)) enabled |= 1 << PIPE_B; if (single_plane_enabled(enabled) && - g4x_compute_srwm(dev, ffs(enabled) - 1, + g4x_compute_srwm(dev_priv, ffs(enabled) - 1, sr_latency_ns, &g4x_wm_info, &g4x_cursor_wm_info, -- cgit v1.1 From b91eb5cce65047eced9d13e8e15b46dced0c3c53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:09 +0200 Subject: drm/i915: Pass dev_priv to intel_get_crtc_for_pipe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-11-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 3 +-- drivers/gpu/drm/i915/i915_irq.c | 5 +++-- drivers/gpu/drm/i915/intel_display.c | 6 +++--- drivers/gpu/drm/i915/intel_drv.h | 8 +++----- drivers/gpu/drm/i915/intel_dvo.c | 2 +- drivers/gpu/drm/i915/intel_lvds.c | 2 +- drivers/gpu/drm/i915/intel_pm.c | 11 +++++------ 7 files changed, 17 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ce6aa7e..008f91d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4131,9 +4131,8 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source source) { - struct drm_device *dev = &dev_priv->drm; struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[pipe]; - struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev, pipe); + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); enum intel_display_power_domain power_domain; u32 val = 0; /* shut up gcc */ int ret; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 696dc6f..48bb5de 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -965,15 +965,16 @@ static int i915_get_vblank_timestamp(struct drm_device *dev, unsigned int pipe, struct timeval *vblank_time, unsigned flags) { + struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *crtc; - if (pipe >= INTEL_INFO(dev)->num_pipes) { + if (pipe >= INTEL_INFO(dev_priv)->num_pipes) { DRM_ERROR("Invalid crtc %u\n", pipe); return -EINVAL; } /* Get drm_crtc to timestamp: */ - crtc = intel_get_crtc_for_pipe(dev, pipe); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc == NULL) { DRM_ERROR("Invalid crtc %u\n", pipe); return -EINVAL; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 419c6a2..3cb4133 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7072,7 +7072,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, if (pipe_config->fdi_lanes <= 2) return 0; - other_crtc = intel_get_crtc_for_pipe(dev, PIPE_C); + other_crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_C); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) @@ -7091,7 +7091,7 @@ static int ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, return -EINVAL; } - other_crtc = intel_get_crtc_for_pipe(dev, PIPE_B); + other_crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_B); other_crtc_state = intel_atomic_get_crtc_state(state, other_crtc); if (IS_ERR(other_crtc_state)) @@ -8106,7 +8106,7 @@ static void chv_prepare_pll(struct intel_crtc *crtc, int vlv_force_pll_on(struct drm_i915_private *dev_priv, enum pipe pipe, const struct dpll *dpll) { - struct intel_crtc *crtc = intel_get_crtc_for_pipe(&dev_priv->drm, pipe); + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); struct intel_crtc_state *pipe_config; pipe_config = kzalloc(sizeof(*pipe_config), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 7c0af57..0fdd546 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1029,16 +1029,14 @@ vlv_pipe_to_channel(enum pipe pipe) } static inline struct intel_crtc * -intel_get_crtc_for_pipe(struct drm_device *dev, int pipe) +intel_get_crtc_for_pipe(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); return dev_priv->pipe_to_crtc_mapping[pipe]; } static inline struct intel_crtc * -intel_get_crtc_for_plane(struct drm_device *dev, int plane) +intel_get_crtc_for_plane(struct drm_i915_private *dev_priv, enum plane plane) { - struct drm_i915_private *dev_priv = to_i915(dev); return dev_priv->plane_to_crtc_mapping[plane]; } @@ -1251,7 +1249,7 @@ intel_wait_for_vblank(struct drm_i915_private *dev_priv, enum pipe pipe) static inline void intel_wait_for_vblank_if_active(struct drm_i915_private *dev_priv, int pipe) { - const struct intel_crtc *crtc = intel_get_crtc_for_pipe(&dev_priv->drm, pipe); + const struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc->active) intel_wait_for_vblank(dev_priv, pipe); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index 61d78fc..7086454 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -396,7 +396,7 @@ intel_dvo_get_current_mode(struct drm_connector *connector) struct intel_crtc *crtc; int pipe = (dvo_val & DVO_PIPE_B_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev, pipe); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc) { mode = intel_crtc_mode_get(dev, &crtc->base); if (mode) { diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 81ed32b..de7b3e6 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -1163,7 +1163,7 @@ void intel_lvds_init(struct drm_device *dev) goto failed; pipe = (lvds & LVDS_PIPEB_SELECT) ? 1 : 0; - crtc = intel_get_crtc_for_pipe(dev, pipe); + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc && (lvds & LVDS_PORT_EN)) { fixed_mode = intel_crtc_mode_get(dev, &crtc->base); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c9990a9..976d66a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -728,7 +728,7 @@ static bool g4x_compute_wm0(struct drm_i915_private *dev_priv, int line_time_us, line_count; int entries, tlb_miss; - crtc = intel_get_crtc_for_plane(&dev_priv->drm, plane); + crtc = intel_get_crtc_for_plane(dev_priv, plane); if (!intel_crtc_active(crtc)) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; @@ -823,7 +823,7 @@ static bool g4x_compute_srwm(struct drm_i915_private *dev_priv, return false; } - crtc = intel_get_crtc_for_plane(&dev_priv->drm, plane); + crtc = intel_get_crtc_for_plane(dev_priv, plane); adjusted_mode = &crtc->config->base.adjusted_mode; fb = crtc->base.primary->state->fb; clock = adjusted_mode->crtc_clock; @@ -1393,9 +1393,8 @@ static void vlv_update_wm(struct intel_crtc *crtc) static void g4x_update_wm(struct intel_crtc *crtc) { - struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); static const int sr_latency_ns = 12000; - struct drm_i915_private *dev_priv = to_i915(dev); int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; unsigned int enabled = 0; @@ -1546,7 +1545,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_a_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 0); - crtc = intel_get_crtc_for_plane(dev, 0); + crtc = intel_get_crtc_for_plane(dev_priv, 0); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -1573,7 +1572,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) wm_info = &i830_bc_wm_info; fifo_size = dev_priv->display.get_fifo_size(dev, 1); - crtc = intel_get_crtc_for_plane(dev, 1); + crtc = intel_get_crtc_for_plane(dev_priv, 1); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; -- cgit v1.1 From 98187836fc75b7a003db101e19cfa8685da78905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:10 +0200 Subject: drm/i915: Always use intel_get_crtc_for_pipe() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the open coded dev_priv->pipe_to_crtc_mapping[] usage with intel_get_crtc_for_pipe(). Mostly done with coccinelle, with a few manual tweaks @@ expression E1, E2; @@ ( - E1->pipe_to_crtc_mapping[E2] + intel_get_crtc_for_pipe(E1, E2) | - E1->plane_to_crtc_mapping[E2] + intel_get_crtc_for_plane(E1, E2) ) Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-12-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_debugfs.c | 5 +++-- drivers/gpu/drm/i915/i915_irq.c | 6 ++++-- drivers/gpu/drm/i915/intel_display.c | 21 ++++++++++++--------- drivers/gpu/drm/i915/intel_fifo_underrun.c | 11 ++++++----- drivers/gpu/drm/i915/intel_pm.c | 2 +- 5 files changed, 26 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 008f91d..a13ff47 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4065,7 +4065,7 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv, bool enable) { struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[PIPE_A]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, PIPE_A); struct intel_crtc_state *pipe_config; struct drm_atomic_state *state; int ret = 0; @@ -4203,7 +4203,8 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, /* real source -> none transition */ if (source == INTEL_PIPE_CRC_SOURCE_NONE) { struct intel_pipe_crc_entry *entries; - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); DRM_DEBUG_DRIVER("stopping CRCs for pipe %c\n", pipe_name(pipe)); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 48bb5de..ecd06d3 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -725,7 +725,8 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, unsigned int pipe) struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t high_frame, low_frame; u32 high1, high2, low, pixel, vbl_start, hsync_start, htotal; - struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); const struct drm_display_mode *mode = &intel_crtc->base.hwmode; htotal = mode->crtc_htotal; @@ -830,7 +831,8 @@ static int i915_get_crtc_scanoutpos(struct drm_device *dev, unsigned int pipe, const struct drm_display_mode *mode) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); int position; int vbl_start, vbl_end, hsync_start, htotal, vtotal; bool in_vbl = true; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3cb4133..b21dc27 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1030,7 +1030,7 @@ bool intel_crtc_active(struct intel_crtc *crtc) enum transcoder intel_pipe_to_cpu_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); return crtc->config->cpu_transcoder; } @@ -1785,7 +1785,8 @@ void vlv_wait_port_ready(struct drm_i915_private *dev_priv, static void ironlake_enable_pch_transcoder(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *intel_crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *intel_crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); i915_reg_t reg; uint32_t val, pipeconf_val; @@ -11662,7 +11663,7 @@ static bool pageflip_finished(struct intel_crtc *crtc, void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); struct intel_flip_work *work; unsigned long flags; @@ -11688,7 +11689,7 @@ void intel_finish_page_flip_cs(struct drm_i915_private *dev_priv, int pipe) void intel_finish_page_flip_mmio(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); struct intel_flip_work *work; unsigned long flags; @@ -12118,7 +12119,7 @@ static bool __pageflip_stall_check_cs(struct drm_i915_private *dev_priv, void intel_check_page_flip(struct drm_i915_private *dev_priv, int pipe) { struct drm_device *dev = &dev_priv->drm; - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); struct intel_flip_work *work; WARN_ON(!in_interrupt()); @@ -14177,7 +14178,8 @@ static void intel_atomic_wait_for_vblanks(struct drm_device *dev, return; for_each_pipe(dev_priv, pipe) { - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); if (!((1 << pipe) & crtc_mask)) continue; @@ -14192,7 +14194,8 @@ static void intel_atomic_wait_for_vblanks(struct drm_device *dev, } for_each_pipe(dev_priv, pipe) { - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, + pipe); long lret; if (!((1 << pipe) & crtc_mask)) @@ -16867,7 +16870,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pipe = 0; if (encoder->get_hw_state(encoder, &pipe)) { - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); encoder->base.crtc = &crtc->base; crtc->config->output_types |= 1 << encoder->type; @@ -16969,7 +16972,7 @@ intel_modeset_setup_hw_state(struct drm_device *dev) } for_each_pipe(dev_priv, pipe) { - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); intel_sanitize_crtc(crtc); intel_dump_pipe_config(crtc, crtc->config, diff --git a/drivers/gpu/drm/i915/intel_fifo_underrun.c b/drivers/gpu/drm/i915/intel_fifo_underrun.c index 34f93f1..e660d8b 100644 --- a/drivers/gpu/drm/i915/intel_fifo_underrun.c +++ b/drivers/gpu/drm/i915/intel_fifo_underrun.c @@ -57,7 +57,7 @@ static bool ivb_can_enable_err_int(struct drm_device *dev) assert_spin_locked(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc->cpu_fifo_underrun_disabled) return false; @@ -75,7 +75,7 @@ static bool cpt_can_enable_serr_int(struct drm_device *dev) assert_spin_locked(&dev_priv->irq_lock); for_each_pipe(dev_priv, pipe) { - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); if (crtc->pch_fifo_underrun_disabled) return false; @@ -245,7 +245,7 @@ static bool __intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, enum pipe pipe, bool enable) { struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); bool old; assert_spin_locked(&dev_priv->irq_lock); @@ -313,7 +313,8 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, enum transcoder pch_transcoder, bool enable) { - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pch_transcoder]; + struct intel_crtc *crtc = + intel_get_crtc_for_pipe(dev_priv, (enum pipe) pch_transcoder); unsigned long flags; bool old; @@ -356,7 +357,7 @@ bool intel_set_pch_fifo_underrun_reporting(struct drm_i915_private *dev_priv, void intel_cpu_fifo_underrun_irq_handler(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct intel_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *crtc = intel_get_crtc_for_pipe(dev_priv, pipe); /* We may be called too early in init, thanks BIOS! */ if (crtc == NULL) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 976d66a..4f51f86 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3065,7 +3065,7 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) /* Since we're now guaranteed to only have one active CRTC... */ pipe = ffs(intel_state->active_crtcs) - 1; - crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + crtc = intel_get_crtc_for_pipe(dev_priv, pipe); cstate = to_intel_crtc_state(crtc->base.state); if (crtc->base.state->adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) -- cgit v1.1 From 5ab0d85b6bf01a9e47e74ebc2876534115444c6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:11 +0200 Subject: drm/i915: Pass dev_priv to intel_crtc_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-13-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b21dc27..71a2fc5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15236,9 +15236,8 @@ static void skl_init_scalers(struct drm_i915_private *dev_priv, scaler_state->scaler_id = -1; } -static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) +static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) { - struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc; struct intel_crtc_state *crtc_state = NULL; struct intel_plane *primary = NULL; @@ -15259,7 +15258,7 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) crtc_state->base.crtc = &intel_crtc->base; /* initialize shared scalers */ - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { if (pipe == PIPE_C) intel_crtc->num_scalers = 1; else @@ -15290,7 +15289,7 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) goto fail; } - ret = drm_crtc_init_with_planes(dev, &intel_crtc->base, + ret = drm_crtc_init_with_planes(&dev_priv->drm, &intel_crtc->base, &primary->base, &cursor->base, &intel_crtc_funcs, "pipe %c", pipe_name(pipe)); @@ -15303,7 +15302,7 @@ static int intel_crtc_init(struct drm_device *dev, enum pipe pipe) */ intel_crtc->pipe = pipe; intel_crtc->plane = (enum plane) pipe; - if (HAS_FBC(dev) && INTEL_INFO(dev)->gen < 4) { + if (HAS_FBC(dev_priv) && INTEL_GEN(dev_priv) < 4) { DRM_DEBUG_KMS("swapping pipes & planes for FBC\n"); intel_crtc->plane = !pipe; } @@ -16487,7 +16486,7 @@ int intel_modeset_init(struct drm_device *dev) for_each_pipe(dev_priv, pipe) { int ret; - ret = intel_crtc_init(dev, pipe); + ret = intel_crtc_init(dev_priv, pipe); if (ret) { drm_mode_config_cleanup(dev); return ret; -- cgit v1.1 From 4c75b9405ea34c9223890d6470b809126b45c173 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:12 +0200 Subject: drm/i915: Pass dev_priv to cdclk update funcs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-14-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 71a2fc5..3c26ea0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5842,10 +5842,8 @@ static int intel_compute_max_dotclk(struct drm_i915_private *dev_priv) static int skl_calc_cdclk(int max_pixclk, int vco); -static void intel_update_max_cdclk(struct drm_device *dev) +static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { u32 limit = I915_READ(SKL_DFSM) & SKL_DFSM_CDCLK_LIMIT_MASK; int max_cdclk, vco; @@ -5903,11 +5901,9 @@ static void intel_update_max_cdclk(struct drm_device *dev) dev_priv->max_dotclk_freq); } -static void intel_update_cdclk(struct drm_device *dev) +static void intel_update_cdclk(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev); + dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(&dev_priv->drm); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", @@ -6068,14 +6064,14 @@ static void bxt_set_cdclk(struct drm_i915_private *dev_priv, int cdclk) return; } - intel_update_cdclk(&dev_priv->drm); + intel_update_cdclk(dev_priv); } static void bxt_sanitize_cdclk(struct drm_i915_private *dev_priv) { u32 cdctl, expected; - intel_update_cdclk(&dev_priv->drm); + intel_update_cdclk(dev_priv); if (dev_priv->cdclk_pll.vco == 0 || dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) @@ -6208,7 +6204,7 @@ void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv, int vco) dev_priv->skl_preferred_vco_freq = vco; if (changed) - intel_update_max_cdclk(&dev_priv->drm); + intel_update_max_cdclk(dev_priv); } static void @@ -6294,7 +6290,6 @@ static bool skl_cdclk_wait_for_pcu_ready(struct drm_i915_private *dev_priv) static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) { - struct drm_device *dev = &dev_priv->drm; u32 freq_select, pcu_ack; WARN_ON((cdclk == 24000) != (vco == 0)); @@ -6345,7 +6340,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv, int cdclk, int vco) sandybridge_pcode_write(dev_priv, SKL_PCODE_CDCLK_CONTROL, pcu_ack); mutex_unlock(&dev_priv->rps.hw_lock); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); } static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv); @@ -6392,7 +6387,7 @@ static void skl_sanitize_cdclk(struct drm_i915_private *dev_priv) if ((I915_READ(SWF_ILK(0x18)) & 0x00FFFFFF) == 0) goto sanitize; - intel_update_cdclk(&dev_priv->drm); + intel_update_cdclk(dev_priv); /* Is PLL enabled and locked ? */ if (dev_priv->cdclk_pll.vco == 0 || dev_priv->cdclk_freq == dev_priv->cdclk_pll.ref) @@ -6483,7 +6478,7 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) mutex_unlock(&dev_priv->sb_lock); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); } static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) @@ -6524,7 +6519,7 @@ static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) } mutex_unlock(&dev_priv->rps.hw_lock); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); } static int valleyview_calc_cdclk(struct drm_i915_private *dev_priv, @@ -10188,7 +10183,7 @@ static void hsw_restore_lcpll(struct drm_i915_private *dev_priv) } intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); - intel_update_cdclk(&dev_priv->drm); + intel_update_cdclk(dev_priv); } /* @@ -10368,7 +10363,7 @@ static void broadwell_set_cdclk(struct drm_device *dev, int cdclk) I915_WRITE(CDCLK_FREQ, DIV_ROUND_CLOSEST(cdclk, 1000) - 1); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); WARN(cdclk != dev_priv->cdclk_freq, "cdclk requested %d kHz but got %d kHz\n", @@ -16323,7 +16318,7 @@ void intel_modeset_init_hw(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; @@ -16494,12 +16489,12 @@ int intel_modeset_init(struct drm_device *dev) } intel_update_czclk(dev_priv); - intel_update_cdclk(dev); + intel_update_cdclk(dev_priv); intel_shared_dpll_init(dev); if (dev_priv->max_cdclk_freq == 0) - intel_update_max_cdclk(dev); + intel_update_max_cdclk(dev_priv); /* Just disable it once at startup */ i915_disable_vga(dev); -- cgit v1.1 From 1353c4fb1803cc094bb0194bb317b090ab08d83c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:13 +0200 Subject: drm/i915: Pass dev_priv to .get_display_clock_speed() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-15-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 77 +++++++++++++++------------------ drivers/gpu/drm/i915/intel_runtime_pm.c | 2 +- 3 files changed, 38 insertions(+), 43 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eeed752..60c937d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -494,7 +494,7 @@ struct intel_limit; struct dpll; struct drm_i915_display_funcs { - int (*get_display_clock_speed)(struct drm_device *dev); + int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); int (*get_fifo_size)(struct drm_device *dev, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3c26ea0..b670ae7 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -5903,7 +5903,7 @@ static void intel_update_max_cdclk(struct drm_i915_private *dev_priv) static void intel_update_cdclk(struct drm_i915_private *dev_priv) { - dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(&dev_priv->drm); + dev_priv->cdclk_freq = dev_priv->display.get_display_clock_speed(dev_priv); if (INTEL_GEN(dev_priv) >= 9) DRM_DEBUG_DRIVER("Current CD clock rate: %d kHz, VCO: %d kHz, ref: %d kHz\n", @@ -6421,7 +6421,7 @@ static void valleyview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev) + WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) != dev_priv->cdclk_freq); if (cdclk >= 320000) /* jump to highest voltage for 400MHz too */ @@ -6486,7 +6486,7 @@ static void cherryview_set_cdclk(struct drm_device *dev, int cdclk) struct drm_i915_private *dev_priv = to_i915(dev); u32 val, cmd; - WARN_ON(dev_priv->display.get_display_clock_speed(dev) + WARN_ON(dev_priv->display.get_display_clock_speed(dev_priv) != dev_priv->cdclk_freq); switch (cdclk) { @@ -7245,10 +7245,9 @@ static int intel_crtc_compute_config(struct intel_crtc *crtc, return 0; } -static int skylake_get_display_clock_speed(struct drm_device *dev) +static int skylake_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t cdctl; + u32 cdctl; skl_dpll0_update(dev_priv); @@ -7307,9 +7306,8 @@ static void bxt_de_pll_update(struct drm_i915_private *dev_priv) dev_priv->cdclk_pll.ref; } -static int broxton_get_display_clock_speed(struct drm_device *dev) +static int broxton_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 divider; int div, vco; @@ -7342,9 +7340,8 @@ static int broxton_get_display_clock_speed(struct drm_device *dev) return DIV_ROUND_CLOSEST(vco, div); } -static int broadwell_get_display_clock_speed(struct drm_device *dev) +static int broadwell_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7362,9 +7359,8 @@ static int broadwell_get_display_clock_speed(struct drm_device *dev) return 675000; } -static int haswell_get_display_clock_speed(struct drm_device *dev) +static int haswell_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t lcpll = I915_READ(LCPLL_CTL); uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; @@ -7380,35 +7376,35 @@ static int haswell_get_display_clock_speed(struct drm_device *dev) return 540000; } -static int valleyview_get_display_clock_speed(struct drm_device *dev) +static int valleyview_get_display_clock_speed(struct drm_i915_private *dev_priv) { - return vlv_get_cck_clock_hpll(to_i915(dev), "cdclk", + return vlv_get_cck_clock_hpll(dev_priv, "cdclk", CCK_DISPLAY_CLOCK_CONTROL); } -static int ilk_get_display_clock_speed(struct drm_device *dev) +static int ilk_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 450000; } -static int i945_get_display_clock_speed(struct drm_device *dev) +static int i945_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 400000; } -static int i915_get_display_clock_speed(struct drm_device *dev) +static int i915_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 333333; } -static int i9xx_misc_get_display_clock_speed(struct drm_device *dev) +static int i9xx_misc_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 200000; } -static int pnv_get_display_clock_speed(struct drm_device *dev) +static int pnv_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; + struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); @@ -7431,9 +7427,9 @@ static int pnv_get_display_clock_speed(struct drm_device *dev) } } -static int i915gm_get_display_clock_speed(struct drm_device *dev) +static int i915gm_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; + struct pci_dev *pdev = dev_priv->drm.pdev; u16 gcfgc = 0; pci_read_config_word(pdev, GCFGC, &gcfgc); @@ -7451,14 +7447,14 @@ static int i915gm_get_display_clock_speed(struct drm_device *dev) } } -static int i865_get_display_clock_speed(struct drm_device *dev) +static int i865_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 266667; } -static int i85x_get_display_clock_speed(struct drm_device *dev) +static int i85x_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; + struct pci_dev *pdev = dev_priv->drm.pdev; u16 hpllcc = 0; /* @@ -7494,14 +7490,13 @@ static int i85x_get_display_clock_speed(struct drm_device *dev) return 0; } -static int i830_get_display_clock_speed(struct drm_device *dev) +static int i830_get_display_clock_speed(struct drm_i915_private *dev_priv) { return 133333; } -static unsigned int intel_hpll_vco(struct drm_device *dev) +static unsigned int intel_hpll_vco(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); static const unsigned int blb_vco[8] = { [0] = 3200000, [1] = 4000000, @@ -7548,16 +7543,16 @@ static unsigned int intel_hpll_vco(struct drm_device *dev) vco_table = ctg_vco; else if (IS_G4X(dev_priv)) vco_table = elk_vco; - else if (IS_CRESTLINE(dev)) + else if (IS_CRESTLINE(dev_priv)) vco_table = cl_vco; - else if (IS_PINEVIEW(dev)) + else if (IS_PINEVIEW(dev_priv)) vco_table = pnv_vco; - else if (IS_G33(dev)) + else if (IS_G33(dev_priv)) vco_table = blb_vco; else return 0; - tmp = I915_READ(IS_MOBILE(dev) ? HPLLVCO_MOBILE : HPLLVCO); + tmp = I915_READ(IS_MOBILE(dev_priv) ? HPLLVCO_MOBILE : HPLLVCO); vco = vco_table[tmp & 0x7]; if (vco == 0) @@ -7568,10 +7563,10 @@ static unsigned int intel_hpll_vco(struct drm_device *dev) return vco; } -static int gm45_get_display_clock_speed(struct drm_device *dev) +static int gm45_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev); + struct pci_dev *pdev = dev_priv->drm.pdev; + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); uint16_t tmp = 0; pci_read_config_word(pdev, GCFGC, &tmp); @@ -7591,14 +7586,14 @@ static int gm45_get_display_clock_speed(struct drm_device *dev) } } -static int i965gm_get_display_clock_speed(struct drm_device *dev) +static int i965gm_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; + struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 16, 10, 8 }; static const uint8_t div_4000[] = { 20, 12, 10 }; static const uint8_t div_5333[] = { 24, 16, 14 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev); + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); uint16_t tmp = 0; pci_read_config_word(pdev, GCFGC, &tmp); @@ -7629,15 +7624,15 @@ fail: return 200000; } -static int g33_get_display_clock_speed(struct drm_device *dev) +static int g33_get_display_clock_speed(struct drm_i915_private *dev_priv) { - struct pci_dev *pdev = dev->pdev; + struct pci_dev *pdev = dev_priv->drm.pdev; static const uint8_t div_3200[] = { 12, 10, 8, 7, 5, 16 }; static const uint8_t div_4000[] = { 14, 12, 10, 8, 6, 20 }; static const uint8_t div_4800[] = { 20, 14, 12, 10, 8, 24 }; static const uint8_t div_5333[] = { 20, 16, 12, 12, 8, 28 }; const uint8_t *div_table; - unsigned int cdclk_sel, vco = intel_hpll_vco(dev); + unsigned int cdclk_sel, vco = intel_hpll_vco(dev_priv); uint16_t tmp = 0; pci_read_config_word(pdev, GCFGC, &tmp); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 9a3a745..0599408 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -907,7 +907,7 @@ static void gen9_dc_off_power_well_enable(struct drm_i915_private *dev_priv, gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); WARN_ON(dev_priv->cdclk_freq != - dev_priv->display.get_display_clock_speed(&dev_priv->drm)); + dev_priv->display.get_display_clock_speed(dev_priv)); gen9_assert_dbuf_enabled(dev_priv); -- cgit v1.1 From 646d57720969f1abf92f6358af0fe91b04201ba9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:14 +0200 Subject: drm/i915: Pass dev_priv to IS_MOBILE() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-16-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 8 +++----- drivers/gpu/drm/i915/intel_sdvo.c | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 60c937d..474c728 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2749,7 +2749,7 @@ struct drm_i915_cmd_table { #define IS_SKYLAKE(dev_priv) ((dev_priv)->info.is_skylake) #define IS_BROXTON(dev_priv) ((dev_priv)->info.is_broxton) #define IS_KABYLAKE(dev_priv) ((dev_priv)->info.is_kabylake) -#define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) +#define IS_MOBILE(dev_priv) ((dev_priv)->info.is_mobile) #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \ (INTEL_DEVID(dev_priv) & 0xFF00) == 0x0C00) #define IS_BDW_ULT(dev_priv) (IS_BROADWELL(dev_priv) && \ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index b670ae7..50261c5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -15374,11 +15374,9 @@ static int intel_encoder_clones(struct intel_encoder *encoder) return index_mask; } -static bool has_edp_a(struct drm_device *dev) +static bool has_edp_a(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - if (!IS_MOBILE(dev)) + if (!IS_MOBILE(dev_priv)) return false; if ((I915_READ(DP_A) & DP_DETECTED) == 0) @@ -15518,7 +15516,7 @@ static void intel_setup_outputs(struct drm_device *dev) int found; dpd_is_edp = intel_dp_is_edp(dev, PORT_D); - if (has_edp_a(dev)) + if (has_edp_a(dev_priv)) intel_dp_init(dev, DP_A, PORT_A); if (I915_READ(PCH_HDMIB) & SDVO_DETECTED) { diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 5d59a48..3990c80 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2411,10 +2411,10 @@ static void intel_sdvo_add_hdmi_properties(struct intel_sdvo *intel_sdvo, struct intel_sdvo_connector *connector) { - struct drm_device *dev = connector->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(connector->base.base.dev); intel_attach_force_audio_property(&connector->base.base); - if (INTEL_INFO(dev)->gen >= 4 && IS_MOBILE(dev)) { + if (INTEL_GEN(dev_priv) >= 4 && IS_MOBILE(dev_priv)) { intel_attach_broadcast_rgb_property(&connector->base.base); intel_sdvo->color_range_auto = true; } -- cgit v1.1 From 9b1e14f4d892e568c0b53aaa53eec6029b750e5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:15 +0200 Subject: drm/i915: Pass dev_priv to IS_PINEVIEW() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-17-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 19 +++++++++---------- drivers/gpu/drm/i915/intel_pm.c | 9 ++++----- 3 files changed, 14 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 474c728..8b35cfa 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2735,7 +2735,7 @@ struct drm_i915_cmd_table { #define IS_G4X(dev_priv) ((dev_priv)->info.is_g4x) #define IS_PINEVIEW_G(dev_priv) (INTEL_DEVID(dev_priv) == 0xa001) #define IS_PINEVIEW_M(dev_priv) (INTEL_DEVID(dev_priv) == 0xa011) -#define IS_PINEVIEW(dev) (INTEL_INFO(dev)->is_pineview) +#define IS_PINEVIEW(dev_priv) ((dev_priv)->info.is_pineview) #define IS_G33(dev) (INTEL_INFO(dev)->is_g33) #define IS_IRONLAKE_M(dev_priv) (INTEL_DEVID(dev_priv) == 0x0046) #define IS_IVYBRIDGE(dev_priv) ((dev_priv)->info.is_ivybridge) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 50261c5..53e84615 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7721,10 +7721,10 @@ static void i9xx_update_pll_dividers(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct dpll *reduced_clock) { - struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 fp, fp2 = 0; - if (IS_PINEVIEW(dev)) { + if (IS_PINEVIEW(dev_priv)) { fp = pnv_dpll_compute_fp(&crtc_state->dpll); if (reduced_clock) fp2 = pnv_dpll_compute_fp(reduced_clock); @@ -8143,8 +8143,7 @@ static void i9xx_compute_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct dpll *reduced_clock) { - struct drm_device *dev = crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); u32 dpll; struct dpll *clock = &crtc_state->dpll; @@ -8170,7 +8169,7 @@ static void i9xx_compute_dpll(struct intel_crtc *crtc, dpll |= DPLL_SDVO_HIGH_SPEED; /* compute bitmask from p1 value */ - if (IS_PINEVIEW(dev)) + if (IS_PINEVIEW(dev_priv)) dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW; else { dpll |= (1 << (clock->p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT; @@ -8191,7 +8190,7 @@ static void i9xx_compute_dpll(struct intel_crtc *crtc, dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14; break; } - if (INTEL_INFO(dev)->gen >= 4) + if (INTEL_GEN(dev_priv) >= 4) dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT); if (crtc_state->sdvo_tv_clock) @@ -8205,7 +8204,7 @@ static void i9xx_compute_dpll(struct intel_crtc *crtc, dpll |= DPLL_VCO_ENABLE; crtc_state->dpll_hw_state.dpll = dpll; - if (INTEL_INFO(dev)->gen >= 4) { + if (INTEL_GEN(dev_priv) >= 4) { u32 dpll_md = (crtc_state->pixel_multiplier - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT; crtc_state->dpll_hw_state.dpll_md = dpll_md; @@ -11353,7 +11352,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, fp = pipe_config->dpll_hw_state.fp1; clock.m1 = (fp & FP_M1_DIV_MASK) >> FP_M1_DIV_SHIFT; - if (IS_PINEVIEW(dev)) { + if (IS_PINEVIEW(dev_priv)) { clock.n = ffs((fp & FP_N_PINEVIEW_DIV_MASK) >> FP_N_DIV_SHIFT) - 1; clock.m2 = (fp & FP_M2_PINEVIEW_DIV_MASK) >> FP_M2_DIV_SHIFT; } else { @@ -11362,7 +11361,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, } if (!IS_GEN2(dev_priv)) { - if (IS_PINEVIEW(dev)) + if (IS_PINEVIEW(dev_priv)) clock.p1 = ffs((dpll & DPLL_FPA01_P1_POST_DIV_MASK_PINEVIEW) >> DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW); else @@ -11384,7 +11383,7 @@ static void i9xx_crtc_clock_get(struct intel_crtc *crtc, return; } - if (IS_PINEVIEW(dev)) + if (IS_PINEVIEW(dev_priv)) port_clock = pnv_calc_dpll_params(refclk, &clock); else port_clock = i9xx_calc_dpll_params(refclk, &clock); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4f51f86..11bbc35 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -320,7 +320,6 @@ static void chv_set_memory_pm5(struct drm_i915_private *dev_priv, bool enable) void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) { - struct drm_device *dev = &dev_priv->drm; u32 val; if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { @@ -330,7 +329,7 @@ void intel_set_memory_cxsr(struct drm_i915_private *dev_priv, bool enable) } else if (IS_G4X(dev_priv) || IS_CRESTLINE(dev_priv)) { I915_WRITE(FW_BLC_SELF, enable ? FW_BLC_SELF_EN : 0); POSTING_READ(FW_BLC_SELF); - } else if (IS_PINEVIEW(dev)) { + } else if (IS_PINEVIEW(dev_priv)) { val = I915_READ(DSPFW3) & ~PINEVIEW_SELF_REFRESH_EN; val |= enable ? PINEVIEW_SELF_REFRESH_EN : 0; I915_WRITE(DSPFW3, val); @@ -7628,7 +7627,7 @@ static void gen3_init_clock_gating(struct drm_device *dev) DSTATE_DOT_CLOCK_GATING; I915_WRITE(D_STATE, dstate); - if (IS_PINEVIEW(dev)) + if (IS_PINEVIEW(dev_priv)) I915_WRITE(ECOSKPD, _MASKED_BIT_ENABLE(ECO_GATING_CX_ONLY)); /* IIR "flip pending" means done if this bit is set */ @@ -7744,7 +7743,7 @@ void intel_init_pm(struct drm_device *dev) intel_fbc_init(dev_priv); /* For cxsr */ - if (IS_PINEVIEW(dev)) + if (IS_PINEVIEW(dev_priv)) i915_pineview_get_mem_freq(dev); else if (IS_GEN5(dev_priv)) i915_ironlake_get_mem_freq(dev); @@ -7778,7 +7777,7 @@ void intel_init_pm(struct drm_device *dev) } else if (IS_VALLEYVIEW(dev_priv)) { vlv_setup_wm_latency(dev); dev_priv->display.update_wm = vlv_update_wm; - } else if (IS_PINEVIEW(dev)) { + } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), dev_priv->is_ddr3, dev_priv->fsb_freq, -- cgit v1.1 From 148ac1f375fe9625f08a531d198e2d8559cbdd79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:16 +0200 Subject: drm/i915: Pass dev_priv to i915_pineview_get_mem_freq() and i915_ironlake_get_mem_freq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-18-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 11bbc35..d423fc8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -108,9 +108,8 @@ static void bxt_init_clock_gating(struct drm_device *dev) PWM1_GATING_DIS | PWM2_GATING_DIS); } -static void i915_pineview_get_mem_freq(struct drm_device *dev) +static void i915_pineview_get_mem_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 tmp; tmp = I915_READ(CLKCFG); @@ -147,9 +146,8 @@ static void i915_pineview_get_mem_freq(struct drm_device *dev) dev_priv->is_ddr3 = (tmp & CSHRDDR3CTL_DDR3) ? 1 : 0; } -static void i915_ironlake_get_mem_freq(struct drm_device *dev) +static void i915_ironlake_get_mem_freq(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u16 ddrpll, csipll; ddrpll = I915_READ16(DDRMPLL1); @@ -7744,9 +7742,9 @@ void intel_init_pm(struct drm_device *dev) /* For cxsr */ if (IS_PINEVIEW(dev_priv)) - i915_pineview_get_mem_freq(dev); + i915_pineview_get_mem_freq(dev_priv); else if (IS_GEN5(dev_priv)) - i915_ironlake_get_mem_freq(dev); + i915_ironlake_get_mem_freq(dev_priv); /* For FIFO watermark updates */ if (INTEL_INFO(dev)->gen >= 9) { -- cgit v1.1 From ef0f5e93bd1ffced24370399b2b6bbe600a840bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:17 +0200 Subject: drm/i915: Pass dev_priv to .get_fifo_size() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-19-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8b35cfa..fd08559 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -495,7 +495,7 @@ struct dpll; struct drm_i915_display_funcs { int (*get_display_clock_speed)(struct drm_i915_private *dev_priv); - int (*get_fifo_size)(struct drm_device *dev, int plane); + int (*get_fifo_size)(struct drm_i915_private *dev_priv, int plane); int (*compute_pipe_wm)(struct intel_crtc_state *cstate); int (*compute_intermediate_wm)(struct drm_device *dev, struct intel_crtc *intel_crtc, diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d423fc8..83391c2 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -375,10 +375,9 @@ static const int pessimal_latency_ns = 5000; #define VLV_FIFO_START(dsparb, dsparb2, lo_shift, hi_shift) \ ((((dsparb) >> (lo_shift)) & 0xff) | ((((dsparb2) >> (hi_shift)) & 0x1) << 8)) -static int vlv_get_fifo_size(struct drm_device *dev, +static int vlv_get_fifo_size(struct drm_i915_private *dev_priv, enum pipe pipe, int plane) { - struct drm_i915_private *dev_priv = to_i915(dev); int sprite0_start, sprite1_start, size; switch (pipe) { @@ -427,9 +426,8 @@ static int vlv_get_fifo_size(struct drm_device *dev, return size; } -static int i9xx_get_fifo_size(struct drm_device *dev, int plane) +static int i9xx_get_fifo_size(struct drm_i915_private *dev_priv, int plane) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -443,9 +441,8 @@ static int i9xx_get_fifo_size(struct drm_device *dev, int plane) return size; } -static int i830_get_fifo_size(struct drm_device *dev, int plane) +static int i830_get_fifo_size(struct drm_i915_private *dev_priv, int plane) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -460,9 +457,8 @@ static int i830_get_fifo_size(struct drm_device *dev, int plane) return size; } -static int i845_get_fifo_size(struct drm_device *dev, int plane) +static int i845_get_fifo_size(struct drm_i915_private *dev_priv, int plane) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dsparb = I915_READ(DSPARB); int size; @@ -1541,7 +1537,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) else wm_info = &i830_a_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev, 0); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, 0); crtc = intel_get_crtc_for_plane(dev_priv, 0); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = @@ -1568,7 +1564,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) if (IS_GEN2(dev_priv)) wm_info = &i830_bc_wm_info; - fifo_size = dev_priv->display.get_fifo_size(dev, 1); + fifo_size = dev_priv->display.get_fifo_size(dev_priv, 1); crtc = intel_get_crtc_for_plane(dev_priv, 1); if (intel_crtc_active(crtc)) { const struct drm_display_mode *adjusted_mode = @@ -1686,7 +1682,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc) adjusted_mode = &crtc->config->base.adjusted_mode; planea_wm = intel_calculate_wm(adjusted_mode->crtc_clock, &i845_wm_info, - dev_priv->display.get_fifo_size(dev, 0), + dev_priv->display.get_fifo_size(dev_priv, 0), 4, pessimal_latency_ns); fwater_lo = I915_READ(FW_BLC) & ~0xfff; fwater_lo |= (3<<8) | planea_wm; @@ -4556,11 +4552,11 @@ void vlv_wm_get_hw_state(struct drm_device *dev) plane->wm.fifo_size = 63; break; case DRM_PLANE_TYPE_PRIMARY: - plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, 0); + plane->wm.fifo_size = vlv_get_fifo_size(dev_priv, plane->pipe, 0); break; case DRM_PLANE_TYPE_OVERLAY: sprite = plane->plane; - plane->wm.fifo_size = vlv_get_fifo_size(dev, plane->pipe, sprite + 1); + plane->wm.fifo_size = vlv_get_fifo_size(dev_priv, plane->pipe, sprite + 1); break; } } -- cgit v1.1 From 03427fcb75ef0b31f883a2b6075e33e56e668241 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:18 +0200 Subject: drm/i915: Pass dev_priv to HAS_FW_BLC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-20-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fd08559..fdfb1d2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2893,7 +2893,7 @@ struct drm_i915_cmd_table { #define SUPPORTS_TV(dev) (INTEL_INFO(dev)->supports_tv) #define I915_HAS_HOTPLUG(dev) (INTEL_INFO(dev)->has_hotplug) -#define HAS_FW_BLC(dev) (INTEL_INFO(dev)->gen > 2) +#define HAS_FW_BLC(dev_priv) (INTEL_GEN(dev_priv) > 2) #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) #define HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 83391c2..afd3bab 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1612,7 +1612,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) intel_set_memory_cxsr(dev_priv, false); /* Calc sr entries for one plane configs */ - if (HAS_FW_BLC(dev) && enabled) { + if (HAS_FW_BLC(dev_priv) && enabled) { /* self-refresh has much higher latency */ static const int sr_latency_ns = 6000; const struct drm_display_mode *adjusted_mode = -- cgit v1.1 From a26e52392191f51605af9017058cf5e43334cd5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:19 +0200 Subject: drm/i915: Pass dev_priv to IS_BROADWATER/IS_CRESTLINE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-21-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 3 +-- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 3b9bfd2..77ca07f7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -981,7 +981,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) static int i915_driver_init_hw(struct drm_i915_private *dev_priv) { struct pci_dev *pdev = dev_priv->drm.pdev; - struct drm_device *dev = &dev_priv->drm; int ret; if (i915_inject_load_failure()) @@ -1039,7 +1038,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) * behaviour if any general state is accessed within a page above 4GB, * which also needs to be handled carefully. */ - if (IS_BROADWATER(dev) || IS_CRESTLINE(dev)) { + if (IS_BROADWATER(dev_priv) || IS_CRESTLINE(dev_priv)) { ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (ret) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index fdfb1d2..888ddf8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2729,8 +2729,8 @@ struct drm_i915_cmd_table { #define IS_I915GM(dev_priv) (INTEL_DEVID(dev_priv) == 0x2592) #define IS_I945G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2772) #define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm) -#define IS_BROADWATER(dev) (INTEL_INFO(dev)->is_broadwater) -#define IS_CRESTLINE(dev) (INTEL_INFO(dev)->is_crestline) +#define IS_BROADWATER(dev_priv) ((dev_priv)->info.is_broadwater) +#define IS_CRESTLINE(dev_priv) ((dev_priv)->info.is_crestline) #define IS_GM45(dev_priv) (INTEL_DEVID(dev_priv) == 0x2A42) #define IS_G4X(dev_priv) ((dev_priv)->info.is_g4x) #define IS_PINEVIEW_G(dev_priv) (INTEL_DEVID(dev_priv) == 0xa001) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cbbfaa7..207b8ef 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4158,6 +4158,7 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { struct drm_i915_gem_object * i915_gem_object_create(struct drm_device *dev, u64 size) { + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; @@ -4183,7 +4184,7 @@ i915_gem_object_create(struct drm_device *dev, u64 size) goto fail; mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; - if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) { + if (IS_CRESTLINE(dev_priv) || IS_BROADWATER(dev_priv)) { /* 965gm cannot relocate objects above 4GiB. */ mask &= ~__GFP_HIGHMEM; mask |= __GFP_DMA32; -- cgit v1.1 From a9097be4f980c1b6757bab3c89609f81d390b478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:20 +0200 Subject: drm/i915: Pass dev_priv to rest of IS_FOO() macros for the old platforms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-22-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 8 ++++---- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 ++-- drivers/gpu/drm/i915/intel_pm.c | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 888ddf8..63672c3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2723,12 +2723,12 @@ struct drm_i915_cmd_table { #define IS_I830(dev_priv) (INTEL_DEVID(dev_priv) == 0x3577) #define IS_845G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2562) -#define IS_I85X(dev) (INTEL_INFO(dev)->is_i85x) +#define IS_I85X(dev_priv) ((dev_priv)->info.is_i85x) #define IS_I865G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2572) -#define IS_I915G(dev) (INTEL_INFO(dev)->is_i915g) +#define IS_I915G(dev_priv) ((dev_priv)->info.is_i915g) #define IS_I915GM(dev_priv) (INTEL_DEVID(dev_priv) == 0x2592) #define IS_I945G(dev_priv) (INTEL_DEVID(dev_priv) == 0x2772) -#define IS_I945GM(dev) (INTEL_INFO(dev)->is_i945gm) +#define IS_I945GM(dev_priv) ((dev_priv)->info.is_i945gm) #define IS_BROADWATER(dev_priv) ((dev_priv)->info.is_broadwater) #define IS_CRESTLINE(dev_priv) ((dev_priv)->info.is_crestline) #define IS_GM45(dev_priv) (INTEL_DEVID(dev_priv) == 0x2A42) @@ -2736,7 +2736,7 @@ struct drm_i915_cmd_table { #define IS_PINEVIEW_G(dev_priv) (INTEL_DEVID(dev_priv) == 0xa001) #define IS_PINEVIEW_M(dev_priv) (INTEL_DEVID(dev_priv) == 0xa011) #define IS_PINEVIEW(dev_priv) ((dev_priv)->info.is_pineview) -#define IS_G33(dev) (INTEL_INFO(dev)->is_g33) +#define IS_G33(dev_priv) ((dev_priv)->info.is_g33) #define IS_IRONLAKE_M(dev_priv) (INTEL_DEVID(dev_priv) == 0x0046) #define IS_IVYBRIDGE(dev_priv) ((dev_priv)->info.is_ivybridge) #define IS_IVB_GT1(dev_priv) (INTEL_DEVID(dev_priv) == 0x0156 || \ diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 1a63ffa..3725a1d 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -109,7 +109,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) * */ base = 0; - if (INTEL_INFO(dev)->gen >= 3) { + if (INTEL_GEN(dev_priv) >= 3) { u32 bsm; pci_read_config_dword(pdev, INTEL_BSM, &bsm); @@ -138,7 +138,7 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) I865_TOUD, &toud); base = (toud << 16) + tseg_size; - } else if (IS_I85X(dev)) { + } else if (IS_I85X(dev_priv)) { u32 tseg_size = 0; u32 tom; u8 tmp; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index afd3bab..7199f85 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -1530,7 +1530,7 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) int planea_wm, planeb_wm; struct intel_crtc *crtc, *enabled = NULL; - if (IS_I945GM(dev)) + if (IS_I945GM(dev_priv)) wm_info = &i945_wm_info; else if (!IS_GEN2(dev_priv)) wm_info = &i915_wm_info; -- cgit v1.1 From ffc7a76b8bce780aa21b9979198da8ecd8303544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:21 +0200 Subject: drm/i915: Pass dev_priv to single_enabled_crtc() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-23-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 7199f85..cb518c5 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -618,11 +618,11 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz, return wm_size; } -static struct intel_crtc *single_enabled_crtc(struct drm_device *dev) +static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv) { struct intel_crtc *crtc, *enabled = NULL; - for_each_intel_crtc(dev, crtc) { + for_each_intel_crtc(&dev_priv->drm, crtc) { if (intel_crtc_active(crtc)) { if (enabled) return NULL; @@ -635,8 +635,7 @@ static struct intel_crtc *single_enabled_crtc(struct drm_device *dev) static void pineview_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; const struct cxsr_latency *latency; u32 reg; @@ -652,7 +651,7 @@ static void pineview_update_wm(struct intel_crtc *unused_crtc) return; } - crtc = single_enabled_crtc(dev); + crtc = single_enabled_crtc(dev_priv); if (crtc) { const struct drm_display_mode *adjusted_mode = &crtc->config->base.adjusted_mode; @@ -1443,15 +1442,14 @@ static void g4x_update_wm(struct intel_crtc *crtc) static void i965_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; int srwm = 1; int cursor_sr = 16; bool cxsr_enabled; /* Calc sr entries for one plane configs */ - crtc = single_enabled_crtc(dev); + crtc = single_enabled_crtc(dev_priv); if (crtc) { /* self-refresh has much higher latency */ static const int sr_latency_ns = 12000; @@ -1520,8 +1518,7 @@ static void i965_update_wm(struct intel_crtc *unused_crtc) static void i9xx_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); const struct intel_watermark_params *wm_info; uint32_t fwater_lo; uint32_t fwater_hi; @@ -1668,14 +1665,13 @@ static void i9xx_update_wm(struct intel_crtc *unused_crtc) static void i845_update_wm(struct intel_crtc *unused_crtc) { - struct drm_device *dev = unused_crtc->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); + struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev); struct intel_crtc *crtc; const struct drm_display_mode *adjusted_mode; uint32_t fwater_lo; int planea_wm; - crtc = single_enabled_crtc(dev); + crtc = single_enabled_crtc(dev_priv); if (crtc == NULL) return; -- cgit v1.1 From 46f16e631aa2001cd4a4899b4797d4f0b847ad10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:22 +0200 Subject: drm/i915: Pass dev_priv to init_clock_gating MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-24-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 129 ++++++++++++----------------------- 5 files changed, 49 insertions(+), 89 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 77ca07f7..f83dde9 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -2264,7 +2264,6 @@ err1: static int vlv_resume_prepare(struct drm_i915_private *dev_priv, bool rpm_resume) { - struct drm_device *dev = &dev_priv->drm; int err; int ret; @@ -2289,7 +2288,7 @@ static int vlv_resume_prepare(struct drm_i915_private *dev_priv, vlv_check_no_gt_access(dev_priv); if (rpm_resume) - intel_init_clock_gating(dev); + intel_init_clock_gating(dev_priv); return ret; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 63672c3..b8e72cd 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -525,7 +525,7 @@ struct drm_i915_display_funcs { const struct drm_display_mode *adjusted_mode); void (*audio_codec_disable)(struct intel_encoder *encoder); void (*fdi_link_train)(struct drm_crtc *crtc); - void (*init_clock_gating)(struct drm_device *dev); + void (*init_clock_gating)(struct drm_i915_private *dev_priv); int (*queue_flip)(struct drm_device *dev, struct drm_crtc *crtc, struct drm_framebuffer *fb, struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 53e84615..a46922a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16314,7 +16314,7 @@ void intel_modeset_init_hw(struct drm_device *dev) dev_priv->atomic_cdclk_freq = dev_priv->cdclk_freq; - intel_init_clock_gating(dev); + intel_init_clock_gating(dev_priv); } /* diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 0fdd546..3a9ca27 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1703,7 +1703,7 @@ bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, /* intel_pm.c */ -void intel_init_clock_gating(struct drm_device *dev); +void intel_init_clock_gating(struct drm_i915_private *dev_priv); void intel_suspend_hw(struct drm_device *dev); int ilk_wm_max_level(const struct drm_i915_private *dev_priv); void intel_update_watermarks(struct intel_crtc *crtc); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index cb518c5..3e9bc2a 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -56,10 +56,8 @@ #define INTEL_RC6p_ENABLE (1<<1) #define INTEL_RC6pp_ENABLE (1<<2) -static void gen9_init_clock_gating(struct drm_device *dev) +static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - /* See Bspec note for PSR2_CTL bit 31, Wa#828:skl,bxt,kbl */ I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); @@ -82,11 +80,9 @@ static void gen9_init_clock_gating(struct drm_device *dev) ILK_DPFC_DISABLE_DUMMY0); } -static void bxt_init_clock_gating(struct drm_device *dev) +static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - gen9_init_clock_gating(dev); + gen9_init_clock_gating(dev_priv); /* WaDisableSDEUnitClockGating:bxt */ I915_WRITE(GEN8_UCGCTL6, I915_READ(GEN8_UCGCTL6) | @@ -6857,10 +6853,8 @@ void intel_autoenable_gt_powersave(struct drm_i915_private *dev_priv) } } -static void ibx_init_clock_gating(struct drm_device *dev) +static void ibx_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* * On Ibex Peak and Cougar Point, we need to disable clock * gating for the panel power sequencer or it will fail to @@ -6869,9 +6863,8 @@ static void ibx_init_clock_gating(struct drm_device *dev) I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE); } -static void g4x_disable_trickle_feed(struct drm_device *dev) +static void g4x_disable_trickle_feed(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; for_each_pipe(dev_priv, pipe) { @@ -6884,10 +6877,8 @@ static void g4x_disable_trickle_feed(struct drm_device *dev) } } -static void ilk_init_lp_watermarks(struct drm_device *dev) +static void ilk_init_lp_watermarks(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(WM3_LP_ILK, I915_READ(WM3_LP_ILK) & ~WM1_LP_SR_EN); I915_WRITE(WM2_LP_ILK, I915_READ(WM2_LP_ILK) & ~WM1_LP_SR_EN); I915_WRITE(WM1_LP_ILK, I915_READ(WM1_LP_ILK) & ~WM1_LP_SR_EN); @@ -6898,9 +6889,8 @@ static void ilk_init_lp_watermarks(struct drm_device *dev) */ } -static void ironlake_init_clock_gating(struct drm_device *dev) +static void ironlake_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; /* @@ -6932,7 +6922,7 @@ static void ironlake_init_clock_gating(struct drm_device *dev) (I915_READ(DISP_ARB_CTL) | DISP_FBC_WM_DIS)); - ilk_init_lp_watermarks(dev); + ilk_init_lp_watermarks(dev_priv); /* * Based on the document from hardware guys the following bits @@ -6967,14 +6957,13 @@ static void ironlake_init_clock_gating(struct drm_device *dev) /* WaDisable_RenderCache_OperationalFlush:ilk */ I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev); + g4x_disable_trickle_feed(dev_priv); - ibx_init_clock_gating(dev); + ibx_init_clock_gating(dev_priv); } -static void cpt_init_clock_gating(struct drm_device *dev) +static void cpt_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); int pipe; uint32_t val; @@ -7009,9 +6998,8 @@ static void cpt_init_clock_gating(struct drm_device *dev) } } -static void gen6_check_mch_setup(struct drm_device *dev) +static void gen6_check_mch_setup(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t tmp; tmp = I915_READ(MCH_SSKPD); @@ -7020,9 +7008,8 @@ static void gen6_check_mch_setup(struct drm_device *dev) tmp); } -static void gen6_init_clock_gating(struct drm_device *dev) +static void gen6_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate = ILK_VRHUNIT_CLOCK_GATE_DISABLE; I915_WRITE(ILK_DSPCLK_GATE_D, dspclk_gate); @@ -7049,7 +7036,7 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN6_GT_MODE, _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4)); - ilk_init_lp_watermarks(dev); + ilk_init_lp_watermarks(dev_priv); I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB)); @@ -7110,11 +7097,11 @@ static void gen6_init_clock_gating(struct drm_device *dev) ILK_DPARBUNIT_CLOCK_GATE_ENABLE | ILK_DPFDUNIT_CLOCK_GATE_ENABLE); - g4x_disable_trickle_feed(dev); + g4x_disable_trickle_feed(dev_priv); - cpt_init_clock_gating(dev); + cpt_init_clock_gating(dev_priv); - gen6_check_mch_setup(dev); + gen6_check_mch_setup(dev_priv); } static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) @@ -7135,10 +7122,8 @@ static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv) I915_WRITE(GEN7_FF_THREAD_MODE, reg); } -static void lpt_init_clock_gating(struct drm_device *dev) +static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* * TODO: this bit should only be enabled when really needed, then * disabled when not needed anymore in order to save power. @@ -7189,11 +7174,9 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } -static void kabylake_init_clock_gating(struct drm_device *dev) +static void kabylake_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - gen9_init_clock_gating(dev); + gen9_init_clock_gating(dev_priv); /* WaDisableSDEUnitClockGating:kbl */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) @@ -7210,11 +7193,9 @@ static void kabylake_init_clock_gating(struct drm_device *dev) ILK_DPFC_NUKE_ON_ANY_MODIFICATION); } -static void skylake_init_clock_gating(struct drm_device *dev) +static void skylake_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = dev->dev_private; - - gen9_init_clock_gating(dev); + gen9_init_clock_gating(dev_priv); /* WAC6entrylatency:skl */ I915_WRITE(FBC_LLC_READ_CTRL, I915_READ(FBC_LLC_READ_CTRL) | @@ -7225,12 +7206,11 @@ static void skylake_init_clock_gating(struct drm_device *dev) ILK_DPFC_NUKE_ON_ANY_MODIFICATION); } -static void broadwell_init_clock_gating(struct drm_device *dev) +static void broadwell_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); enum pipe pipe; - ilk_init_lp_watermarks(dev); + ilk_init_lp_watermarks(dev_priv); /* WaSwitchSolVfFArbitrationPriority:bdw */ I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); @@ -7273,14 +7253,12 @@ static void broadwell_init_clock_gating(struct drm_device *dev) I915_WRITE(CHICKEN_PAR2_1, I915_READ(CHICKEN_PAR2_1) | KVM_CONFIG_CHANGE_NOTIFICATION_SELECT); - lpt_init_clock_gating(dev); + lpt_init_clock_gating(dev_priv); } -static void haswell_init_clock_gating(struct drm_device *dev) +static void haswell_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - ilk_init_lp_watermarks(dev); + ilk_init_lp_watermarks(dev_priv); /* L3 caching of data atomics doesn't work -- disable it. */ I915_WRITE(HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE); @@ -7329,15 +7307,14 @@ static void haswell_init_clock_gating(struct drm_device *dev) I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | FORCE_ARB_IDLE_PLANES); - lpt_init_clock_gating(dev); + lpt_init_clock_gating(dev_priv); } -static void ivybridge_init_clock_gating(struct drm_device *dev) +static void ivybridge_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t snpcr; - ilk_init_lp_watermarks(dev); + ilk_init_lp_watermarks(dev_priv); I915_WRITE(ILK_DSPCLK_GATE_D, ILK_VRHUNIT_CLOCK_GATE_DISABLE); @@ -7394,7 +7371,7 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_READ(GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) | GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB); - g4x_disable_trickle_feed(dev); + g4x_disable_trickle_feed(dev_priv); gen7_setup_fixed_func_scheduler(dev_priv); @@ -7425,15 +7402,13 @@ static void ivybridge_init_clock_gating(struct drm_device *dev) I915_WRITE(GEN6_MBCUNIT_SNPCR, snpcr); if (!HAS_PCH_NOP(dev_priv)) - cpt_init_clock_gating(dev); + cpt_init_clock_gating(dev_priv); - gen6_check_mch_setup(dev); + gen6_check_mch_setup(dev_priv); } -static void valleyview_init_clock_gating(struct drm_device *dev) +static void valleyview_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* WaDisableEarlyCull:vlv */ I915_WRITE(_3D_CHICKEN3, _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_DISABLE_OBJEND_CULL)); @@ -7512,10 +7487,8 @@ static void valleyview_init_clock_gating(struct drm_device *dev) I915_WRITE(VLV_GUNIT_CLOCK_GATE, GCFG_DIS); } -static void cherryview_init_clock_gating(struct drm_device *dev) +static void cherryview_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* WaVSRefCountFullforceMissDisable:chv */ /* WaDSRefCountFullforceMissDisable:chv */ I915_WRITE(GEN7_FF_THREAD_MODE, @@ -7548,9 +7521,8 @@ static void cherryview_init_clock_gating(struct drm_device *dev) I915_WRITE(HSW_GTT_CACHE_EN, GTT_CACHE_EN_ALL); } -static void g4x_init_clock_gating(struct drm_device *dev) +static void g4x_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); uint32_t dspclk_gate; I915_WRITE(RENCLK_GATE_D1, 0); @@ -7572,13 +7544,11 @@ static void g4x_init_clock_gating(struct drm_device *dev) /* WaDisable_RenderCache_OperationalFlush:g4x */ I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); - g4x_disable_trickle_feed(dev); + g4x_disable_trickle_feed(dev_priv); } -static void crestline_init_clock_gating(struct drm_device *dev) +static void crestline_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE); I915_WRITE(RENCLK_GATE_D2, 0); I915_WRITE(DSPCLK_GATE_D, 0); @@ -7591,10 +7561,8 @@ static void crestline_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -static void broadwater_init_clock_gating(struct drm_device *dev) +static void broadwater_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE | I965_RCC_CLOCK_GATE_DISABLE | I965_RCPB_CLOCK_GATE_DISABLE | @@ -7608,9 +7576,8 @@ static void broadwater_init_clock_gating(struct drm_device *dev) I915_WRITE(CACHE_MODE_0, _MASKED_BIT_DISABLE(RC_OP_FLUSH_ENABLE)); } -static void gen3_init_clock_gating(struct drm_device *dev) +static void gen3_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 dstate = I915_READ(D_STATE); dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING | @@ -7633,10 +7600,8 @@ static void gen3_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(MI_ARB_DISPLAY_TRICKLE_FEED_DISABLE)); } -static void i85x_init_clock_gating(struct drm_device *dev) +static void i85x_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE); /* interrupts should cause a wake up from C3 */ @@ -7647,10 +7612,8 @@ static void i85x_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(MEM_DISPLAY_TRICKLE_FEED_DISABLE)); } -static void i830_init_clock_gating(struct drm_device *dev) +static void i830_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE); I915_WRITE(MEM_MODE, @@ -7658,11 +7621,9 @@ static void i830_init_clock_gating(struct drm_device *dev) _MASKED_BIT_ENABLE(MEM_DISPLAY_B_TRICKLE_FEED_DISABLE)); } -void intel_init_clock_gating(struct drm_device *dev) +void intel_init_clock_gating(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - dev_priv->display.init_clock_gating(dev); + dev_priv->display.init_clock_gating(dev_priv); } void intel_suspend_hw(struct drm_device *dev) @@ -7671,7 +7632,7 @@ void intel_suspend_hw(struct drm_device *dev) lpt_suspend_hw(dev); } -static void nop_init_clock_gating(struct drm_device *dev) +static void nop_init_clock_gating(struct drm_i915_private *dev_priv) { DRM_DEBUG_KMS("No clock gating settings or workarounds applied.\n"); } -- cgit v1.1 From 712bf36449ef6eb7a130153db9cb13fe897dc2e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:23 +0200 Subject: drm/i915: Pass dev_priv to intel_suspend_hw() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-25-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index f83dde9..48f4d21 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1435,7 +1435,7 @@ static int i915_drm_suspend(struct drm_device *dev) intel_suspend_encoders(dev_priv); - intel_suspend_hw(dev); + intel_suspend_hw(dev_priv); i915_gem_suspend_gtt_mappings(dev); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 3a9ca27..8765f8d 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1704,7 +1704,7 @@ bool chv_phy_powergate_ch(struct drm_i915_private *dev_priv, enum dpio_phy phy, /* intel_pm.c */ void intel_init_clock_gating(struct drm_i915_private *dev_priv); -void intel_suspend_hw(struct drm_device *dev); +void intel_suspend_hw(struct drm_i915_private *dev_priv); int ilk_wm_max_level(const struct drm_i915_private *dev_priv); void intel_update_watermarks(struct intel_crtc *crtc); void intel_init_pm(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 3e9bc2a..8c5fb8f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7139,10 +7139,8 @@ static void lpt_init_clock_gating(struct drm_i915_private *dev_priv) TRANS_CHICKEN1_DP0UNIT_GC_DISABLE); } -static void lpt_suspend_hw(struct drm_device *dev) +static void lpt_suspend_hw(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (HAS_PCH_LPT_LP(dev_priv)) { uint32_t val = I915_READ(SOUTH_DSPCLK_GATE_D); @@ -7626,10 +7624,10 @@ void intel_init_clock_gating(struct drm_i915_private *dev_priv) dev_priv->display.init_clock_gating(dev_priv); } -void intel_suspend_hw(struct drm_device *dev) +void intel_suspend_hw(struct drm_i915_private *dev_priv) { - if (HAS_PCH_LPT(to_i915(dev))) - lpt_suspend_hw(dev); + if (HAS_PCH_LPT(dev_priv)) + lpt_suspend_hw(dev_priv); } static void nop_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit v1.1 From bb7265197a86bd68078825723b572599fcc60d2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:24 +0200 Subject: drm/i915: Pass dev_priv to ilk_setup_wm_latency() & co. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-26-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_pm.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8c5fb8f..d2320b1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -935,10 +935,8 @@ static unsigned int vlv_wm_method2(unsigned int pixel_rate, return ret; } -static void vlv_setup_wm_latency(struct drm_device *dev) +static void vlv_setup_wm_latency(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - /* all latencies in usec */ dev_priv->wm.pri_latency[VLV_WM_LEVEL_PM2] = 3; @@ -2087,10 +2085,9 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) PIPE_WM_LINETIME_TIME(linetime); } -static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) +static void intel_read_wm_latency(struct drm_i915_private *dev_priv, + uint16_t wm[8]) { - struct drm_i915_private *dev_priv = to_i915(dev); - if (IS_GEN9(dev_priv)) { uint32_t val; int ret, i; @@ -2176,14 +2173,14 @@ static void intel_read_wm_latency(struct drm_device *dev, uint16_t wm[8]) wm[2] = (sskpd >> 12) & 0xFF; wm[3] = (sskpd >> 20) & 0x1FF; wm[4] = (sskpd >> 32) & 0x1FF; - } else if (INTEL_INFO(dev)->gen >= 6) { + } else if (INTEL_GEN(dev_priv) >= 6) { uint32_t sskpd = I915_READ(MCH_SSKPD); wm[0] = (sskpd >> SSKPD_WM0_SHIFT) & SSKPD_WM_MASK; wm[1] = (sskpd >> SSKPD_WM1_SHIFT) & SSKPD_WM_MASK; wm[2] = (sskpd >> SSKPD_WM2_SHIFT) & SSKPD_WM_MASK; wm[3] = (sskpd >> SSKPD_WM3_SHIFT) & SSKPD_WM_MASK; - } else if (INTEL_INFO(dev)->gen >= 5) { + } else if (INTEL_GEN(dev_priv) >= 5) { uint32_t mltr = I915_READ(MLTR_ILK); /* ILK primary LP0 latency is 700 ns */ @@ -2271,9 +2268,8 @@ static bool ilk_increase_wm_latency(struct drm_i915_private *dev_priv, return true; } -static void snb_wm_latency_quirk(struct drm_device *dev) +static void snb_wm_latency_quirk(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); bool changed; /* @@ -2293,11 +2289,9 @@ static void snb_wm_latency_quirk(struct drm_device *dev) intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); } -static void ilk_setup_wm_latency(struct drm_device *dev) +static void ilk_setup_wm_latency(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_read_wm_latency(dev, dev_priv->wm.pri_latency); + intel_read_wm_latency(dev_priv, dev_priv->wm.pri_latency); memcpy(dev_priv->wm.spr_latency, dev_priv->wm.pri_latency, sizeof(dev_priv->wm.pri_latency)); @@ -2312,14 +2306,12 @@ static void ilk_setup_wm_latency(struct drm_device *dev) intel_print_wm_latency(dev_priv, "Cursor", dev_priv->wm.cur_latency); if (IS_GEN6(dev_priv)) - snb_wm_latency_quirk(dev); + snb_wm_latency_quirk(dev_priv); } -static void skl_setup_wm_latency(struct drm_device *dev) +static void skl_setup_wm_latency(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - - intel_read_wm_latency(dev, dev_priv->wm.skl_latency); + intel_read_wm_latency(dev_priv, dev_priv->wm.skl_latency); intel_print_wm_latency(dev_priv, "Gen9 Plane", dev_priv->wm.skl_latency); } @@ -7699,11 +7691,11 @@ void intel_init_pm(struct drm_device *dev) /* For FIFO watermark updates */ if (INTEL_INFO(dev)->gen >= 9) { - skl_setup_wm_latency(dev); + skl_setup_wm_latency(dev_priv); dev_priv->display.update_wm = skl_update_wm; dev_priv->display.compute_global_watermarks = skl_compute_wm; } else if (HAS_PCH_SPLIT(dev_priv)) { - ilk_setup_wm_latency(dev); + ilk_setup_wm_latency(dev_priv); if ((IS_GEN5(dev_priv) && dev_priv->wm.pri_latency[1] && dev_priv->wm.spr_latency[1] && dev_priv->wm.cur_latency[1]) || @@ -7721,10 +7713,10 @@ void intel_init_pm(struct drm_device *dev) "Disable CxSR\n"); } } else if (IS_CHERRYVIEW(dev_priv)) { - vlv_setup_wm_latency(dev); + vlv_setup_wm_latency(dev_priv); dev_priv->display.update_wm = vlv_update_wm; } else if (IS_VALLEYVIEW(dev_priv)) { - vlv_setup_wm_latency(dev); + vlv_setup_wm_latency(dev_priv); dev_priv->display.update_wm = vlv_update_wm; } else if (IS_PINEVIEW(dev_priv)) { if (!intel_get_cxsr_latency(IS_PINEVIEW_G(dev_priv), -- cgit v1.1 From 62d75df7b00fab9487ba6f8012581c1d88a05abb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Oct 2016 22:37:25 +0200 Subject: drm/i915: Pass dev_priv to intel_init_pm() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unify our approach to things by passing around dev_priv instead of dev. Signed-off-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/1477946245-14134-27-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 2 +- drivers/gpu/drm/i915/intel_pm.c | 8 +++----- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index a46922a..9758910 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -16419,7 +16419,7 @@ int intel_modeset_init(struct drm_device *dev) intel_init_quirks(dev); - intel_init_pm(dev); + intel_init_pm(dev_priv); if (INTEL_INFO(dev)->num_pipes == 0) return 0; diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 8765f8d..398195b 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1707,7 +1707,7 @@ void intel_init_clock_gating(struct drm_i915_private *dev_priv); void intel_suspend_hw(struct drm_i915_private *dev_priv); int ilk_wm_max_level(const struct drm_i915_private *dev_priv); void intel_update_watermarks(struct intel_crtc *crtc); -void intel_init_pm(struct drm_device *dev); +void intel_init_pm(struct drm_i915_private *dev_priv); void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv); void intel_pm_setup(struct drm_device *dev); void intel_gpu_ips_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d2320b1..cc9e0c0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7677,10 +7677,8 @@ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) } /* Set up chip specific power management-related functions */ -void intel_init_pm(struct drm_device *dev) +void intel_init_pm(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - intel_fbc_init(dev_priv); /* For cxsr */ @@ -7690,7 +7688,7 @@ void intel_init_pm(struct drm_device *dev) i915_ironlake_get_mem_freq(dev_priv); /* For FIFO watermark updates */ - if (INTEL_INFO(dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { skl_setup_wm_latency(dev_priv); dev_priv->display.update_wm = skl_update_wm; dev_priv->display.compute_global_watermarks = skl_compute_wm; @@ -7741,7 +7739,7 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.update_wm = i9xx_update_wm; dev_priv->display.get_fifo_size = i9xx_get_fifo_size; } else if (IS_GEN2(dev_priv)) { - if (INTEL_INFO(dev)->num_pipes == 1) { + if (INTEL_INFO(dev_priv)->num_pipes == 1) { dev_priv->display.update_wm = i845_update_wm; dev_priv->display.get_fifo_size = i845_get_fifo_size; } else { -- cgit v1.1 From 3599a91cc8d07542dbeef44034a70ed2d1d1ba98 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Tue, 1 Nov 2016 14:44:10 +0000 Subject: drm/i915: Allow shrinking of userptr objects once again Commit 1bec9b0bda3d ("drm/i915/shrinker: Only shmemfs objects are backed by swap") stopped considering the userptr objects in shrinker callbacks. Restore that so idle userptr objects can be discarded in order to free up memory. One change further to what was introduced in 1bec9b0bda3d is to start considering userptr objects in oom but that should also be a correct thing to do. v2: Introduce I915_GEM_OBJECT_IS_SHRINKABLE. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Fixes: 1bec9b0bda3d ("drm/i915/shrinker: Only shmemfs objects are backed by swap") Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Mika Kuoppala Cc: Link: http://patchwork.freedesktop.org/patch/msgid/1478011450-6634-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 7 +++++++ drivers/gpu/drm/i915/i915_gem.c | 3 ++- drivers/gpu/drm/i915/i915_gem_internal.c | 3 ++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_userptr.c | 3 ++- 5 files changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b8e72cd..f20e24b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2181,6 +2181,7 @@ enum hdmi_force_audio { struct drm_i915_gem_object_ops { unsigned int flags; #define I915_GEM_OBJECT_HAS_STRUCT_PAGE 0x1 +#define I915_GEM_OBJECT_IS_SHRINKABLE 0x2 /* Interface between the GEM object and its backing storage. * get_pages() is called once prior to the use of the associated set @@ -2430,6 +2431,12 @@ i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj) } static inline bool +i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) +{ + return obj->ops->flags & I915_GEM_OBJECT_IS_SHRINKABLE; +} + +static inline bool i915_gem_object_is_active(const struct drm_i915_gem_object *obj) { return obj->active_count; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 207b8ef..a97fdfa 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4146,7 +4146,8 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops i915_gem_object_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_object_get_pages_gtt, .put_pages = i915_gem_object_put_pages_gtt, }; diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c index 1b0607a..4b3ff3e 100644 --- a/drivers/gpu/drm/i915/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/i915_gem_internal.c @@ -132,7 +132,8 @@ static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj, } static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_object_get_pages_internal, .put_pages = i915_gem_object_put_pages_internal, }; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index f988652..87dd27d 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -83,8 +83,8 @@ static bool can_release_pages(struct drm_i915_gem_object *obj) if (!obj->mm.pages) return false; - /* Only shmemfs objects are backed by swap */ - if (!obj->base.filp) + /* Consider only shrinkable ojects. */ + if (!i915_gem_object_is_shrinkable(obj)) return false; /* Only report true if by unbinding the object and putting its pages diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 9bf44b5..6426163 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -707,7 +707,8 @@ i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj) } static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = { - .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE, + .flags = I915_GEM_OBJECT_HAS_STRUCT_PAGE | + I915_GEM_OBJECT_IS_SHRINKABLE, .get_pages = i915_gem_userptr_get_pages, .put_pages = i915_gem_userptr_put_pages, .dmabuf_export = i915_gem_userptr_dmabuf_export, -- cgit v1.1 From 0a379e27db5164a7e49a3717b16a59469d944b5d Mon Sep 17 00:00:00 2001 From: Ander Conselvan de Oliveira Date: Wed, 2 Nov 2016 08:44:56 +0200 Subject: drm/i915/bxt: Don't set OCL2_LDOFUSE_PWR_DIS bit in phy init sequence Hardware engineers confirmed that writing to it has no effect, as implied by the FIXME comment. v2: Also remove comment from bxt_ddi_phy_verify_state(). (Imre) Cc: Imre Deak Signed-off-by: Ander Conselvan de Oliveira Reviewed-by: Imre Deak Link: http://patchwork.freedesktop.org/patch/msgid/1478069096-11209-1-git-send-email-ander.conselvan.de.oliveira@intel.com --- drivers/gpu/drm/i915/intel_dpio_phy.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index 4a6164a..7a8e82d 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -365,22 +365,6 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, I915_WRITE(BXT_PORT_CL2CM_DW6(phy), val); } - val = I915_READ(BXT_PORT_CL1CM_DW30(phy)); - val &= ~OCL2_LDOFUSE_PWR_DIS; - /* - * On PHY1 disable power on the second channel, since no port is - * connected there. On PHY0 both channels have a port, so leave it - * enabled. - * TODO: port C is only connected on BXT-P, so on BXT0/1 we should - * power down the second channel on PHY0 as well. - * - * FIXME: Clarify programming of the following, the register is - * read-only with bit 6 fixed at 0 at least in stepping A. - */ - if (!phy_info->dual_channel) - val |= OCL2_LDOFUSE_PWR_DIS; - I915_WRITE(BXT_PORT_CL1CM_DW30(phy), val); - if (phy_info->rcomp_phy != -1) { uint32_t grc_code; /* @@ -508,11 +492,6 @@ bool bxt_ddi_phy_verify_state(struct drm_i915_private *dev_priv, DW6_OLDO_DYN_PWR_DOWN_EN, DW6_OLDO_DYN_PWR_DOWN_EN, "BXT_PORT_CL2CM_DW6(%d)", phy); - /* - * TODO: Verify BXT_PORT_CL1CM_DW30 bit OCL2_LDOFUSE_PWR_DIS, - * at least on stepping A this bit is read-only and fixed at 0. - */ - if (phy_info->rcomp_phy != -1) { u32 grc_code = dev_priv->bxt_phy_grc; -- cgit v1.1 From 37c6393431bf526d6f465e095c1201c1b890dd51 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 1 Nov 2016 15:27:36 +0200 Subject: drm/i915/gtt: Fix pte clear range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Comparing pte index to a number of entries is wrong when clearing a range of pte entries. Use end marker of 'one past' to correctly point adequate number of ptes to the scratch page. v2: assert early instead of warning late (Chris) v3: removed consts (Joonas) Fixes: d209b9c3cd28 ("drm/i915/gtt: Split gen8_ppgtt_clear_pte_range") Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98282 Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Michel Thierry Cc: Michał Winiarski Reported-by: Mike Lothian Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Tested-by: Mike Lothian Reviewed-by: Joonas Lahtinen Signed-off-by: Mika Kuoppala --- drivers/gpu/drm/i915/i915_gem_gtt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 00606a2..dc279ca 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -716,9 +716,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, uint64_t length) { struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm); - unsigned int pte_start = gen8_pte_index(start); unsigned int num_entries = gen8_pte_count(start, length); - uint64_t pte; + unsigned int pte = gen8_pte_index(start); + unsigned int pte_end = pte + num_entries; gen8_pte_t *pt_vaddr; gen8_pte_t scratch_pte = gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC); @@ -726,7 +726,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, if (WARN_ON(!px_page(pt))) return false; - bitmap_clear(pt->used_ptes, pte_start, num_entries); + GEM_BUG_ON(pte_end > GEN8_PTES); + + bitmap_clear(pt->used_ptes, pte, num_entries); if (bitmap_empty(pt->used_ptes, GEN8_PTES)) { free_pt(vm->dev, pt); @@ -735,8 +737,8 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm, pt_vaddr = kmap_px(pt); - for (pte = pte_start; pte < num_entries; pte++) - pt_vaddr[pte] = scratch_pte; + while (pte < pte_end) + pt_vaddr[pte++] = scratch_pte; kunmap_px(ppgtt, pt_vaddr); -- cgit v1.1 From fce937559ef04aa2e29719842ec0d56b14eaf8c8 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Mon, 31 Oct 2016 17:24:46 +0200 Subject: drm/i915/gtt: Mark tlbs dirty on clear Now when clearing ptes can modify upper level pdp's, we need to mark them dirty so that they will be flushed correctly. Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1478006856-8313-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/i915_gem_gtt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index dc279ca..67606bf 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -707,6 +707,16 @@ static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt, return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4)); } +/* PDE TLBs are a pain to invalidate on GEN8+. When we modify + * the page table structures, we mark them dirty so that + * context switching/execlist queuing code takes extra steps + * to ensure that tlbs are flushed. + */ +static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) +{ + ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; +} + /* Removes entries from a single page table, releasing it if it's empty. * Caller can use the return value to update higher-level entries. */ @@ -809,6 +819,8 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm, } } + mark_tlbs_dirty(ppgtt); + if (USES_FULL_48BIT_PPGTT(vm->dev) && bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(vm->dev))) { free_pdp(vm->dev, pdp); @@ -1283,16 +1295,6 @@ err_out: return -ENOMEM; } -/* PDE TLBs are a pain to invalidate on GEN8+. When we modify - * the page table structures, we mark them dirty so that - * context switching/execlist queuing code takes extra steps - * to ensure that tlbs are flushed. - */ -static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt) -{ - ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask; -} - static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm, struct i915_page_directory_pointer *pdp, uint64_t start, -- cgit v1.1 From 3ac168a70b2417c602fb86d48cb0cd12102890e1 Mon Sep 17 00:00:00 2001 From: Mika Kuoppala Date: Tue, 1 Nov 2016 18:43:03 +0200 Subject: drm/i915: Move hangcheck code out from i915_irq.c Create new file for hangcheck specific code, intel_hangcheck.c, and move all related code in it. v2: s/intel_engine_hangcheck/intel_engine (Chris) No functional changes. Cc: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Signed-off-by: Mika Kuoppala Reviewed-by: Chris Wilson Signed-off-by: Mika Kuoppala Link: http://patchwork.freedesktop.org/patch/msgid/1478018583-5816-1-git-send-email-mika.kuoppala@intel.com --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 1 + drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_irq.c | 417 ------------------------------ drivers/gpu/drm/i915/intel_engine_cs.c | 5 - drivers/gpu/drm/i915/intel_hangcheck.c | 450 +++++++++++++++++++++++++++++++++ 6 files changed, 453 insertions(+), 422 deletions(-) create mode 100644 drivers/gpu/drm/i915/intel_hangcheck.c (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 240ce9a..0857e50 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -47,6 +47,7 @@ i915-y += i915_cmd_parser.o \ i915_trace_points.o \ intel_breadcrumbs.o \ intel_engine_cs.o \ + intel_hangcheck.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 48f4d21..9e5a547 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -830,6 +830,7 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_dpio(dev_priv); intel_power_domains_init(dev_priv); intel_irq_init(dev_priv); + intel_hangcheck_init(dev_priv); intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index f20e24b..d868979 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3009,6 +3009,7 @@ extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); extern void i915_reset(struct drm_i915_private *dev_priv); extern int intel_guc_reset(struct drm_i915_private *dev_priv); extern void intel_engine_init_hangcheck(struct intel_engine_cs *engine); +extern void intel_hangcheck_init(struct drm_i915_private *dev_priv); extern unsigned long i915_chipset_val(struct drm_i915_private *dev_priv); extern unsigned long i915_mch_val(struct drm_i915_private *dev_priv); extern unsigned long i915_gfx_val(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index ecd06d3..6d7505b 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -2848,420 +2848,6 @@ static void gen8_disable_vblank(struct drm_device *dev, unsigned int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } -static bool -ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) -{ - if (INTEL_GEN(engine->i915) >= 8) { - return (ipehr >> 23) == 0x1c; - } else { - ipehr &= ~MI_SEMAPHORE_SYNC_MASK; - return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | - MI_SEMAPHORE_REGISTER); - } -} - -static struct intel_engine_cs * -semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, - u64 offset) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *signaller; - enum intel_engine_id id; - - if (INTEL_GEN(dev_priv) >= 8) { - for_each_engine(signaller, dev_priv, id) { - if (engine == signaller) - continue; - - if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) - return signaller; - } - } else { - u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; - - for_each_engine(signaller, dev_priv, id) { - if(engine == signaller) - continue; - - if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) - return signaller; - } - } - - DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", - engine->name, ipehr, offset); - - return ERR_PTR(-ENODEV); -} - -static struct intel_engine_cs * -semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - void __iomem *vaddr; - u32 cmd, ipehr, head; - u64 offset = 0; - int i, backwards; - - /* - * This function does not support execlist mode - any attempt to - * proceed further into this function will result in a kernel panic - * when dereferencing ring->buffer, which is not set up in execlist - * mode. - * - * The correct way of doing it would be to derive the currently - * executing ring buffer from the current context, which is derived - * from the currently running request. Unfortunately, to get the - * current request we would have to grab the struct_mutex before doing - * anything else, which would be ill-advised since some other thread - * might have grabbed it already and managed to hang itself, causing - * the hang checker to deadlock. - * - * Therefore, this function does not support execlist mode in its - * current form. Just return NULL and move on. - */ - if (engine->buffer == NULL) - return NULL; - - ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); - if (!ipehr_is_semaphore_wait(engine, ipehr)) - return NULL; - - /* - * HEAD is likely pointing to the dword after the actual command, - * so scan backwards until we find the MBOX. But limit it to just 3 - * or 4 dwords depending on the semaphore wait command size. - * Note that we don't care about ACTHD here since that might - * point at at batch, and semaphores are always emitted into the - * ringbuffer itself. - */ - head = I915_READ_HEAD(engine) & HEAD_ADDR; - backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; - vaddr = (void __iomem *)engine->buffer->vaddr; - - for (i = backwards; i; --i) { - /* - * Be paranoid and presume the hw has gone off into the wild - - * our ring is smaller than what the hardware (and hence - * HEAD_ADDR) allows. Also handles wrap-around. - */ - head &= engine->buffer->size - 1; - - /* This here seems to blow up */ - cmd = ioread32(vaddr + head); - if (cmd == ipehr) - break; - - head -= 4; - } - - if (!i) - return NULL; - - *seqno = ioread32(vaddr + head + 4) + 1; - if (INTEL_GEN(dev_priv) >= 8) { - offset = ioread32(vaddr + head + 12); - offset <<= 32; - offset |= ioread32(vaddr + head + 8); - } - return semaphore_wait_to_signaller_ring(engine, ipehr, offset); -} - -static int semaphore_passed(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_engine_cs *signaller; - u32 seqno; - - engine->hangcheck.deadlock++; - - signaller = semaphore_waits_for(engine, &seqno); - if (signaller == NULL) - return -1; - - if (IS_ERR(signaller)) - return 0; - - /* Prevent pathological recursion due to driver bugs */ - if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) - return -1; - - if (i915_seqno_passed(intel_engine_get_seqno(signaller), seqno)) - return 1; - - /* cursory check for an unkickable deadlock */ - if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && - semaphore_passed(signaller) < 0) - return -1; - - return 0; -} - -static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, dev_priv, id) - engine->hangcheck.deadlock = 0; -} - -static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) -{ - u32 tmp = current_instdone | *old_instdone; - bool unchanged; - - unchanged = tmp == *old_instdone; - *old_instdone |= tmp; - - return unchanged; -} - -static bool subunits_stuck(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct intel_instdone instdone; - struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; - bool stuck; - int slice; - int subslice; - - if (engine->id != RCS) - return true; - - intel_engine_get_instdone(engine, &instdone); - - /* There might be unstable subunit states even when - * actual head is not moving. Filter out the unstable ones by - * accumulating the undone -> done transitions and only - * consider those as progress. - */ - stuck = instdone_unchanged(instdone.instdone, - &accu_instdone->instdone); - stuck &= instdone_unchanged(instdone.slice_common, - &accu_instdone->slice_common); - - for_each_instdone_slice_subslice(dev_priv, slice, subslice) { - stuck &= instdone_unchanged(instdone.sampler[slice][subslice], - &accu_instdone->sampler[slice][subslice]); - stuck &= instdone_unchanged(instdone.row[slice][subslice], - &accu_instdone->row[slice][subslice]); - } - - return stuck; -} - -static enum intel_engine_hangcheck_action -head_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - if (acthd != engine->hangcheck.acthd) { - - /* Clear subunit states on head movement */ - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - - return HANGCHECK_ACTIVE; - } - - if (!subunits_stuck(engine)) - return HANGCHECK_ACTIVE; - - return HANGCHECK_HUNG; -} - -static enum intel_engine_hangcheck_action -engine_stuck(struct intel_engine_cs *engine, u64 acthd) -{ - struct drm_i915_private *dev_priv = engine->i915; - enum intel_engine_hangcheck_action ha; - u32 tmp; - - ha = head_stuck(engine, acthd); - if (ha != HANGCHECK_HUNG) - return ha; - - if (IS_GEN2(dev_priv)) - return HANGCHECK_HUNG; - - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - tmp = I915_READ_CTL(engine); - if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, 0, - "Kicking stuck wait on %s", - engine->name); - I915_WRITE_CTL(engine, tmp); - return HANGCHECK_KICK; - } - - if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) { - switch (semaphore_passed(engine)) { - default: - return HANGCHECK_HUNG; - case 1: - i915_handle_error(dev_priv, 0, - "Kicking stuck semaphore on %s", - engine->name); - I915_WRITE_CTL(engine, tmp); - return HANGCHECK_KICK; - case 0: - return HANGCHECK_WAIT; - } - } - - return HANGCHECK_HUNG; -} - -/* - * This is called when the chip hasn't reported back with completed - * batchbuffers in a long time. We keep track per ring seqno progress and - * if there are no progress, hangcheck score for that ring is increased. - * Further, acthd is inspected to see if the ring is stuck. On stuck case - * we kick the ring. If we see no progress on three subsequent calls - * we assume chip is wedged and try to fix it by resetting the chip. - */ -static void i915_hangcheck_elapsed(struct work_struct *work) -{ - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), - gpu_error.hangcheck_work.work); - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int hung = 0, stuck = 0; - int busy_count = 0; -#define BUSY 1 -#define KICK 5 -#define HUNG 20 -#define ACTIVE_DECAY 15 - - if (!i915.enable_hangcheck) - return; - - if (!READ_ONCE(dev_priv->gt.awake)) - return; - - /* As enabling the GPU requires fairly extensive mmio access, - * periodically arm the mmio checker to see if we are triggering - * any invalid access. - */ - intel_uncore_arm_unclaimed_mmio_detection(dev_priv); - - for_each_engine(engine, dev_priv, id) { - bool busy = intel_engine_has_waiter(engine); - u64 acthd; - u32 seqno; - u32 submit; - - semaphore_clear_deadlocks(dev_priv); - - /* We don't strictly need an irq-barrier here, as we are not - * serving an interrupt request, be paranoid in case the - * barrier has side-effects (such as preventing a broken - * cacheline snoop) and so be sure that we can see the seqno - * advance. If the seqno should stick, due to a stale - * cacheline, we would erroneously declare the GPU hung. - */ - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - - acthd = intel_engine_get_active_head(engine); - seqno = intel_engine_get_seqno(engine); - submit = intel_engine_last_submit(engine); - - if (engine->hangcheck.seqno == seqno) { - if (i915_seqno_passed(seqno, submit)) { - engine->hangcheck.action = HANGCHECK_IDLE; - } else { - /* We always increment the hangcheck score - * if the engine is busy and still processing - * the same request, so that no single request - * can run indefinitely (such as a chain of - * batches). The only time we do not increment - * the hangcheck score on this ring, if this - * engine is in a legitimate wait for another - * engine. In that case the waiting engine is a - * victim and we want to be sure we catch the - * right culprit. Then every time we do kick - * the ring, add a small increment to the - * score so that we can catch a batch that is - * being repeatedly kicked and so responsible - * for stalling the machine. - */ - engine->hangcheck.action = - engine_stuck(engine, acthd); - - switch (engine->hangcheck.action) { - case HANGCHECK_IDLE: - case HANGCHECK_WAIT: - break; - case HANGCHECK_ACTIVE: - engine->hangcheck.score += BUSY; - break; - case HANGCHECK_KICK: - engine->hangcheck.score += KICK; - break; - case HANGCHECK_HUNG: - engine->hangcheck.score += HUNG; - break; - } - } - - if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { - hung |= intel_engine_flag(engine); - if (engine->hangcheck.action != HANGCHECK_HUNG) - stuck |= intel_engine_flag(engine); - } - } else { - engine->hangcheck.action = HANGCHECK_ACTIVE; - - /* Gradually reduce the count so that we catch DoS - * attempts across multiple batches. - */ - if (engine->hangcheck.score > 0) - engine->hangcheck.score -= ACTIVE_DECAY; - if (engine->hangcheck.score < 0) - engine->hangcheck.score = 0; - - /* Clear head and subunit states on seqno movement */ - acthd = 0; - - memset(&engine->hangcheck.instdone, 0, - sizeof(engine->hangcheck.instdone)); - } - - engine->hangcheck.seqno = seqno; - engine->hangcheck.acthd = acthd; - busy_count += busy; - } - - if (hung) { - char msg[80]; - unsigned int tmp; - int len; - - /* If some rings hung but others were still busy, only - * blame the hanging rings in the synopsis. - */ - if (stuck != hung) - hung &= ~stuck; - len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "No progress" : "Hang"); - for_each_engine_masked(engine, dev_priv, hung, tmp) - len += scnprintf(msg + len, sizeof(msg) - len, - "%s, ", engine->name); - msg[len-2] = '\0'; - - return i915_handle_error(dev_priv, hung, msg); - } - - /* Reset timer in case GPU hangs without another request being added */ - if (busy_count) - i915_queue_hangcheck(dev_priv); -} - static void ibx_irq_reset(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4583,9 +4169,6 @@ void intel_irq_init(struct drm_i915_private *dev_priv) if (INTEL_INFO(dev_priv)->gen >= 8) dev_priv->rps.pm_intr_keep |= GEN8_PMINTR_REDIRECT_TO_GUC; - INIT_DELAYED_WORK(&dev_priv->gpu_error.hangcheck_work, - i915_hangcheck_elapsed); - if (IS_GEN2(dev_priv)) { /* Gen2 doesn't have a hardware frame counter */ dev->max_vblank_count = 0; diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 94de3d6..841f8d1 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -220,11 +220,6 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) intel_engine_wakeup(engine); } -void intel_engine_init_hangcheck(struct intel_engine_cs *engine) -{ - memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); -} - static void intel_engine_init_timeline(struct intel_engine_cs *engine) { engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c new file mode 100644 index 0000000..53df5b1 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -0,0 +1,450 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" + +static bool +ipehr_is_semaphore_wait(struct intel_engine_cs *engine, u32 ipehr) +{ + if (INTEL_GEN(engine->i915) >= 8) { + return (ipehr >> 23) == 0x1c; + } else { + ipehr &= ~MI_SEMAPHORE_SYNC_MASK; + return ipehr == (MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | + MI_SEMAPHORE_REGISTER); + } +} + +static struct intel_engine_cs * +semaphore_wait_to_signaller_ring(struct intel_engine_cs *engine, u32 ipehr, + u64 offset) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_engine_cs *signaller; + enum intel_engine_id id; + + if (INTEL_GEN(dev_priv) >= 8) { + for_each_engine(signaller, dev_priv, id) { + if (engine == signaller) + continue; + + if (offset == signaller->semaphore.signal_ggtt[engine->hw_id]) + return signaller; + } + } else { + u32 sync_bits = ipehr & MI_SEMAPHORE_SYNC_MASK; + + for_each_engine(signaller, dev_priv, id) { + if(engine == signaller) + continue; + + if (sync_bits == signaller->semaphore.mbox.wait[engine->hw_id]) + return signaller; + } + } + + DRM_DEBUG_DRIVER("No signaller ring found for %s, ipehr 0x%08x, offset 0x%016llx\n", + engine->name, ipehr, offset); + + return ERR_PTR(-ENODEV); +} + +static struct intel_engine_cs * +semaphore_waits_for(struct intel_engine_cs *engine, u32 *seqno) +{ + struct drm_i915_private *dev_priv = engine->i915; + void __iomem *vaddr; + u32 cmd, ipehr, head; + u64 offset = 0; + int i, backwards; + + /* + * This function does not support execlist mode - any attempt to + * proceed further into this function will result in a kernel panic + * when dereferencing ring->buffer, which is not set up in execlist + * mode. + * + * The correct way of doing it would be to derive the currently + * executing ring buffer from the current context, which is derived + * from the currently running request. Unfortunately, to get the + * current request we would have to grab the struct_mutex before doing + * anything else, which would be ill-advised since some other thread + * might have grabbed it already and managed to hang itself, causing + * the hang checker to deadlock. + * + * Therefore, this function does not support execlist mode in its + * current form. Just return NULL and move on. + */ + if (engine->buffer == NULL) + return NULL; + + ipehr = I915_READ(RING_IPEHR(engine->mmio_base)); + if (!ipehr_is_semaphore_wait(engine, ipehr)) + return NULL; + + /* + * HEAD is likely pointing to the dword after the actual command, + * so scan backwards until we find the MBOX. But limit it to just 3 + * or 4 dwords depending on the semaphore wait command size. + * Note that we don't care about ACTHD here since that might + * point at at batch, and semaphores are always emitted into the + * ringbuffer itself. + */ + head = I915_READ_HEAD(engine) & HEAD_ADDR; + backwards = (INTEL_GEN(dev_priv) >= 8) ? 5 : 4; + vaddr = (void __iomem *)engine->buffer->vaddr; + + for (i = backwards; i; --i) { + /* + * Be paranoid and presume the hw has gone off into the wild - + * our ring is smaller than what the hardware (and hence + * HEAD_ADDR) allows. Also handles wrap-around. + */ + head &= engine->buffer->size - 1; + + /* This here seems to blow up */ + cmd = ioread32(vaddr + head); + if (cmd == ipehr) + break; + + head -= 4; + } + + if (!i) + return NULL; + + *seqno = ioread32(vaddr + head + 4) + 1; + if (INTEL_GEN(dev_priv) >= 8) { + offset = ioread32(vaddr + head + 12); + offset <<= 32; + offset |= ioread32(vaddr + head + 8); + } + return semaphore_wait_to_signaller_ring(engine, ipehr, offset); +} + +static int semaphore_passed(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_engine_cs *signaller; + u32 seqno; + + engine->hangcheck.deadlock++; + + signaller = semaphore_waits_for(engine, &seqno); + if (signaller == NULL) + return -1; + + if (IS_ERR(signaller)) + return 0; + + /* Prevent pathological recursion due to driver bugs */ + if (signaller->hangcheck.deadlock >= I915_NUM_ENGINES) + return -1; + + if (i915_seqno_passed(intel_engine_get_seqno(signaller), seqno)) + return 1; + + /* cursory check for an unkickable deadlock */ + if (I915_READ_CTL(signaller) & RING_WAIT_SEMAPHORE && + semaphore_passed(signaller) < 0) + return -1; + + return 0; +} + +static void semaphore_clear_deadlocks(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, dev_priv, id) + engine->hangcheck.deadlock = 0; +} + +static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone) +{ + u32 tmp = current_instdone | *old_instdone; + bool unchanged; + + unchanged = tmp == *old_instdone; + *old_instdone |= tmp; + + return unchanged; +} + +static bool subunits_stuck(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + struct intel_instdone instdone; + struct intel_instdone *accu_instdone = &engine->hangcheck.instdone; + bool stuck; + int slice; + int subslice; + + if (engine->id != RCS) + return true; + + intel_engine_get_instdone(engine, &instdone); + + /* There might be unstable subunit states even when + * actual head is not moving. Filter out the unstable ones by + * accumulating the undone -> done transitions and only + * consider those as progress. + */ + stuck = instdone_unchanged(instdone.instdone, + &accu_instdone->instdone); + stuck &= instdone_unchanged(instdone.slice_common, + &accu_instdone->slice_common); + + for_each_instdone_slice_subslice(dev_priv, slice, subslice) { + stuck &= instdone_unchanged(instdone.sampler[slice][subslice], + &accu_instdone->sampler[slice][subslice]); + stuck &= instdone_unchanged(instdone.row[slice][subslice], + &accu_instdone->row[slice][subslice]); + } + + return stuck; +} + +static enum intel_engine_hangcheck_action +head_stuck(struct intel_engine_cs *engine, u64 acthd) +{ + if (acthd != engine->hangcheck.acthd) { + + /* Clear subunit states on head movement */ + memset(&engine->hangcheck.instdone, 0, + sizeof(engine->hangcheck.instdone)); + + return HANGCHECK_ACTIVE; + } + + if (!subunits_stuck(engine)) + return HANGCHECK_ACTIVE; + + return HANGCHECK_HUNG; +} + +static enum intel_engine_hangcheck_action +engine_stuck(struct intel_engine_cs *engine, u64 acthd) +{ + struct drm_i915_private *dev_priv = engine->i915; + enum intel_engine_hangcheck_action ha; + u32 tmp; + + ha = head_stuck(engine, acthd); + if (ha != HANGCHECK_HUNG) + return ha; + + if (IS_GEN2(dev_priv)) + return HANGCHECK_HUNG; + + /* Is the chip hanging on a WAIT_FOR_EVENT? + * If so we can simply poke the RB_WAIT bit + * and break the hang. This should work on + * all but the second generation chipsets. + */ + tmp = I915_READ_CTL(engine); + if (tmp & RING_WAIT) { + i915_handle_error(dev_priv, 0, + "Kicking stuck wait on %s", + engine->name); + I915_WRITE_CTL(engine, tmp); + return HANGCHECK_KICK; + } + + if (INTEL_GEN(dev_priv) >= 6 && tmp & RING_WAIT_SEMAPHORE) { + switch (semaphore_passed(engine)) { + default: + return HANGCHECK_HUNG; + case 1: + i915_handle_error(dev_priv, 0, + "Kicking stuck semaphore on %s", + engine->name); + I915_WRITE_CTL(engine, tmp); + return HANGCHECK_KICK; + case 0: + return HANGCHECK_WAIT; + } + } + + return HANGCHECK_HUNG; +} + +/* + * This is called when the chip hasn't reported back with completed + * batchbuffers in a long time. We keep track per ring seqno progress and + * if there are no progress, hangcheck score for that ring is increased. + * Further, acthd is inspected to see if the ring is stuck. On stuck case + * we kick the ring. If we see no progress on three subsequent calls + * we assume chip is wedged and try to fix it by resetting the chip. + */ +static void i915_hangcheck_elapsed(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), + gpu_error.hangcheck_work.work); + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int hung = 0, stuck = 0; + int busy_count = 0; +#define BUSY 1 +#define KICK 5 +#define HUNG 20 +#define ACTIVE_DECAY 15 + + if (!i915.enable_hangcheck) + return; + + if (!READ_ONCE(dev_priv->gt.awake)) + return; + + /* As enabling the GPU requires fairly extensive mmio access, + * periodically arm the mmio checker to see if we are triggering + * any invalid access. + */ + intel_uncore_arm_unclaimed_mmio_detection(dev_priv); + + for_each_engine(engine, dev_priv, id) { + bool busy = intel_engine_has_waiter(engine); + u64 acthd; + u32 seqno; + u32 submit; + + semaphore_clear_deadlocks(dev_priv); + + /* We don't strictly need an irq-barrier here, as we are not + * serving an interrupt request, be paranoid in case the + * barrier has side-effects (such as preventing a broken + * cacheline snoop) and so be sure that we can see the seqno + * advance. If the seqno should stick, due to a stale + * cacheline, we would erroneously declare the GPU hung. + */ + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + + acthd = intel_engine_get_active_head(engine); + seqno = intel_engine_get_seqno(engine); + submit = intel_engine_last_submit(engine); + + if (engine->hangcheck.seqno == seqno) { + if (i915_seqno_passed(seqno, submit)) { + engine->hangcheck.action = HANGCHECK_IDLE; + } else { + /* We always increment the hangcheck score + * if the engine is busy and still processing + * the same request, so that no single request + * can run indefinitely (such as a chain of + * batches). The only time we do not increment + * the hangcheck score on this ring, if this + * engine is in a legitimate wait for another + * engine. In that case the waiting engine is a + * victim and we want to be sure we catch the + * right culprit. Then every time we do kick + * the ring, add a small increment to the + * score so that we can catch a batch that is + * being repeatedly kicked and so responsible + * for stalling the machine. + */ + engine->hangcheck.action = + engine_stuck(engine, acthd); + + switch (engine->hangcheck.action) { + case HANGCHECK_IDLE: + case HANGCHECK_WAIT: + break; + case HANGCHECK_ACTIVE: + engine->hangcheck.score += BUSY; + break; + case HANGCHECK_KICK: + engine->hangcheck.score += KICK; + break; + case HANGCHECK_HUNG: + engine->hangcheck.score += HUNG; + break; + } + } + + if (engine->hangcheck.score >= HANGCHECK_SCORE_RING_HUNG) { + hung |= intel_engine_flag(engine); + if (engine->hangcheck.action != HANGCHECK_HUNG) + stuck |= intel_engine_flag(engine); + } + } else { + engine->hangcheck.action = HANGCHECK_ACTIVE; + + /* Gradually reduce the count so that we catch DoS + * attempts across multiple batches. + */ + if (engine->hangcheck.score > 0) + engine->hangcheck.score -= ACTIVE_DECAY; + if (engine->hangcheck.score < 0) + engine->hangcheck.score = 0; + + /* Clear head and subunit states on seqno movement */ + acthd = 0; + + memset(&engine->hangcheck.instdone, 0, + sizeof(engine->hangcheck.instdone)); + } + + engine->hangcheck.seqno = seqno; + engine->hangcheck.acthd = acthd; + busy_count += busy; + } + + if (hung) { + char msg[80]; + unsigned int tmp; + int len; + + /* If some rings hung but others were still busy, only + * blame the hanging rings in the synopsis. + */ + if (stuck != hung) + hung &= ~stuck; + len = scnprintf(msg, sizeof(msg), + "%s on ", stuck == hung ? "No progress" : "Hang"); + for_each_engine_masked(engine, dev_priv, hung, tmp) + len += scnprintf(msg + len, sizeof(msg) - len, + "%s, ", engine->name); + msg[len-2] = '\0'; + + return i915_handle_error(dev_priv, hung, msg); + } + + /* Reset timer in case GPU hangs without another request being added */ + if (busy_count) + i915_queue_hangcheck(dev_priv); +} + +void intel_engine_init_hangcheck(struct intel_engine_cs *engine) +{ + memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); +} + +void intel_hangcheck_init(struct drm_i915_private *i915) +{ + INIT_DELAYED_WORK(&i915->gpu_error.hangcheck_work, + i915_hangcheck_elapsed); +} -- cgit v1.1 From 56cea32382bcad7eea0a1af5c448089c08ec9f1f Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Wed, 2 Nov 2016 12:16:04 +0200 Subject: drm/i915: Unify global_list into global_link $ sed -i -r 's/\bglobal_list\b/global_link/g' *.c *.h Cc: Chris Wilson Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478081764-8058-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_debugfs.c | 10 +++++----- drivers/gpu/drm/i915/i915_drv.h | 2 +- drivers/gpu/drm/i915/i915_gem.c | 12 ++++++------ drivers/gpu/drm/i915/i915_gem_gtt.c | 4 ++-- drivers/gpu/drm/i915/i915_gem_shrinker.c | 16 ++++++++-------- drivers/gpu/drm/i915/i915_gem_stolen.c | 2 +- 6 files changed, 23 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a13ff47..bc9c0cd 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -217,7 +217,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) return ret; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { if (obj->stolen == NULL) continue; @@ -227,7 +227,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) total_gtt_size += i915_gem_obj_total_ggtt_size(obj); count++; } - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { if (obj->stolen == NULL) continue; @@ -390,7 +390,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { size += obj->base.size; ++count; @@ -407,7 +407,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); size = count = dpy_size = dpy_count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { size += obj->base.size; ++count; @@ -493,7 +493,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) return ret; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { if (show_pin_display_only && !obj->pin_display) continue; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d868979..eaa01da 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2235,7 +2235,7 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; - struct list_head global_list; + struct list_head global_link; union { struct rcu_head rcu; struct llist_node freed; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a97fdfa..5839beb 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1446,7 +1446,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) i915 = to_i915(obj->base.dev); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; - list_move_tail(&obj->global_list, list); + list_move_tail(&obj->global_link, list); } /** @@ -2967,7 +2967,7 @@ int i915_vma_unbind(struct i915_vma *vma) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ if (--obj->bind_count == 0) - list_move_tail(&obj->global_list, + list_move_tail(&obj->global_link, &to_i915(obj->base.dev)->mm.unbound_list); /* And finally now the object is completely decoupled from this vma, @@ -3164,7 +3164,7 @@ search_free: } GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; @@ -4125,7 +4125,7 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->global_list); + INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); @@ -4272,7 +4272,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); - list_del(&obj->global_list); + list_del(&obj->global_link); } intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -4847,7 +4847,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_list) { + list_for_each_entry(obj, *p, global_link) { obj->base.read_domains = I915_GEM_DOMAIN_CPU; obj->base.write_domain = I915_GEM_DOMAIN_CPU; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 67606bf..0c1f5b4 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3296,7 +3296,7 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) /* clflush objects bound into the GGTT and rebind them. */ list_for_each_entry_safe(obj, on, - &dev_priv->mm.bound_list, global_list) { + &dev_priv->mm.bound_list, global_link) { bool ggtt_bound = false; struct i915_vma *vma; @@ -3376,7 +3376,7 @@ i915_vma_retire(struct i915_gem_active *active, * (unless we are forced to ofc!) */ if (obj->bind_count) - list_move_tail(&obj->global_list, &rq->i915->mm.bound_list); + list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); obj->mm.dirty = true; /* be paranoid */ diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 87dd27d..a6fc1bd 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -199,10 +199,10 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), - global_list))) { - list_move_tail(&obj->global_list, &still_in_list); + global_link))) { + list_move_tail(&obj->global_link, &still_in_list); if (!obj->mm.pages) { - list_del_init(&obj->global_list); + list_del_init(&obj->global_link); continue; } @@ -228,7 +228,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, I915_MM_SHRINKER); if (!obj->mm.pages) { __i915_gem_object_invalidate(obj); - list_del_init(&obj->global_list); + list_del_init(&obj->global_link); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); @@ -293,11 +293,11 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) i915_gem_retire_requests(dev_priv); count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) if (can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } @@ -398,7 +398,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) * being pointed to by hardware. */ unbound = bound = unevictable = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { if (!obj->mm.pages) continue; @@ -407,7 +407,7 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { if (!obj->mm.pages) continue; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 3725a1d..b1d367d 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -723,7 +723,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); - list_move_tail(&obj->global_list, &dev_priv->mm.bound_list); + list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); obj->bind_count++; return obj; -- cgit v1.1 From c8247c067159782a27c4f2c371c9e26827131a78 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 27 Oct 2016 01:03:43 +0100 Subject: drm/i915: Show the execlist queue in debugfs/i915_engine_info When looking at freezes whilst working on execlists, knowing the order of the pending requests in the driver is useful. Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161027000348.4641-2-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_debugfs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index bc9c0cd..c9465fb 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -3255,6 +3255,12 @@ static int i915_engine_info(struct seq_file *m, void *unused) else seq_printf(m, "\t\tELSP[1] idle\n"); rcu_read_unlock(); + + spin_lock_irq(&engine->execlist_lock); + list_for_each_entry(rq, &engine->execlist_queue, execlist_link) { + print_request(m, rq, "\t\tQ "); + } + spin_unlock_irq(&engine->execlist_lock); } else if (INTEL_GEN(dev_priv) > 6) { seq_printf(m, "\tPP_DIR_BASE: 0x%08x\n", I915_READ(RING_PP_DIR_BASE(engine))); -- cgit v1.1 From dfc5148fb3f935936e47b55f270cf10fcaf08487 Mon Sep 17 00:00:00 2001 From: Joonas Lahtinen Date: Thu, 3 Nov 2016 10:39:46 +0200 Subject: drm/i915: Introduce HAS_64BIT_RELOC Move has_64bit_reloc into dev_priv->info. This will make it visible in the feature listing debug output. v2: - Keep the struct member to keep GCC fragile but happy (Chris) v3: - More detailed commit message (Chris) - Include forgotten CHV and BXT (Chris) Cc: Chris Wilson Signed-off-by: Joonas Lahtinen Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478162386-5018-1-git-send-email-joonas.lahtinen@linux.intel.com --- drivers/gpu/drm/i915/i915_drv.h | 3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++- drivers/gpu/drm/i915/i915_gem_render_state.c | 3 +-- drivers/gpu/drm/i915/i915_pci.c | 5 ++++- 4 files changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eaa01da..ae0217d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -670,6 +670,7 @@ struct intel_csr { func(is_kabylake); \ func(is_preliminary); \ /* Keep has_* in alphabetical order */ \ + func(has_64bit_reloc); \ func(has_csr); \ func(has_ddi); \ func(has_dp_mst); \ @@ -2917,6 +2918,8 @@ struct drm_i915_cmd_table { #define HAS_CSR(dev) (INTEL_INFO(dev)->has_csr) #define HAS_RUNTIME_PM(dev_priv) ((dev_priv)->info.has_runtime_pm) +#define HAS_64BIT_RELOC(dev_priv) ((dev_priv)->info.has_64bit_reloc) + /* * For now, anything with a GuC requires uCode loading, and then supports * command submission once loaded. But these are logically independent diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c35e847..322c580 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -331,7 +331,8 @@ static void reloc_cache_init(struct reloc_cache *cache, cache->page = -1; cache->vaddr = 0; cache->i915 = i915; - cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + /* Must be a variable in the struct to allow GCC to unroll. */ + cache->use_64bit_reloc = HAS_64BIT_RELOC(i915); cache->node.allocated = false; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 57918f2..5af19b0 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -74,7 +74,6 @@ static int render_state_setup(struct intel_render_state *so, struct drm_i915_private *i915) { const struct intel_renderstate_rodata *rodata = so->rodata; - const bool has_64bit_reloc = INTEL_GEN(i915) >= 8; struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; unsigned int needs_clflush; @@ -93,7 +92,7 @@ static int render_state_setup(struct intel_render_state *so, if (i * 4 == rodata->reloc[reloc_index]) { u64 r = s + so->vma->node.start; s = lower_32_bits(r); - if (has_64bit_reloc) { + if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || rodata->batch[i + 1] != 0) goto err; diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 31e6edd..2a41950 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -288,7 +288,8 @@ static const struct intel_device_info intel_haswell_info = { #define BDW_FEATURES \ HSW_FEATURES, \ BDW_COLORS, \ - .has_logical_ring_contexts = 1 + .has_logical_ring_contexts = 1, \ + .has_64bit_reloc = 1 static const struct intel_device_info intel_broadwell_info = { BDW_FEATURES, @@ -308,6 +309,7 @@ static const struct intel_device_info intel_cherryview_info = { .has_hotplug = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .is_cherryview = 1, + .has_64bit_reloc = 1, .has_psr = 1, .has_runtime_pm = 1, .has_resource_streamer = 1, @@ -347,6 +349,7 @@ static const struct intel_device_info intel_broxton_info = { .has_hotplug = 1, .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, .num_pipes = 3, + .has_64bit_reloc = 1, .has_ddi = 1, .has_fpga_dbg = 1, .has_fbc = 1, -- cgit v1.1 From a933568eb61d6c0b12af4264ae1ba316447a2189 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Wed, 2 Nov 2016 15:14:59 +0000 Subject: drm/i915: Tidy slab cache allocations We can use the preferred KMEM_CACHE helper for brevity. Also simplifiy error unwind by only setting the ENOMEM error code once. v2: Add forgotten changes. (Joonas Lahtinen) Signed-off-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen (v1) Cc: Joonas Lahtinen Reviewed-by: Joonas Lahtinen Link: http://patchwork.freedesktop.org/patch/msgid/1478099699-28652-1-git-send-email-tvrtko.ursulin@linux.intel.com --- drivers/gpu/drm/i915/i915_gem.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5839beb..1f995ce 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4714,39 +4714,22 @@ int i915_gem_load_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - int err; + int err = -ENOMEM; - dev_priv->objects = - kmem_cache_create("i915_gem_object", - sizeof(struct drm_i915_gem_object), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!dev_priv->objects) { - err = -ENOMEM; + dev_priv->objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN); + if (!dev_priv->objects) goto err_out; - } - dev_priv->vmas = - kmem_cache_create("i915_gem_vma", - sizeof(struct i915_vma), 0, - SLAB_HWCACHE_ALIGN, - NULL); - if (!dev_priv->vmas) { - err = -ENOMEM; + dev_priv->vmas = KMEM_CACHE(i915_vma, SLAB_HWCACHE_ALIGN); + if (!dev_priv->vmas) goto err_objects; - } - dev_priv->requests = - kmem_cache_create("i915_gem_request", - sizeof(struct drm_i915_gem_request), 0, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_DESTROY_BY_RCU, - NULL); - if (!dev_priv->requests) { - err = -ENOMEM; + dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_DESTROY_BY_RCU); + if (!dev_priv->requests) goto err_vmas; - } mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); -- cgit v1.1 From 72aa0d899a3ea7e14436ac0b869cf3333277b92e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 2 Nov 2016 17:50:47 +0000 Subject: drm/i915/guc: Cache the client mapping Use i915_gem_object_pin_map() for the guc client's lifetime to replace the peristent kmap + frequent kmap_atomic with a permanent vmapping. This avoids taking the obj->mm.lock mutex whilst inside irq context later. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98571 Fixes: 96d776345277 ("drm/i915: Use a radixtree for random access..."); Signed-off-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/20161102175051.29163-9-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_guc_submission.c | 38 +++++++++++++++--------------- drivers/gpu/drm/i915/intel_guc.h | 2 +- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 857ef91..666dab7 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -220,7 +220,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc, struct guc_context_desc desc; size_t len; - doorbell = client->client_base + client->doorbell_offset; + doorbell = client->vaddr + client->doorbell_offset; if (client->doorbell_id != GUC_INVALID_DOORBELL_ID && test_bit(client->doorbell_id, doorbell_bitmap)) { @@ -326,7 +326,7 @@ static void guc_proc_desc_init(struct intel_guc *guc, { struct guc_process_desc *desc; - desc = client->client_base + client->proc_desc_offset; + desc = client->vaddr + client->proc_desc_offset; memset(desc, 0, sizeof(*desc)); @@ -413,8 +413,8 @@ static void guc_ctx_desc_init(struct intel_guc *guc, gfx_addr = i915_ggtt_offset(client->vma); desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; - desc.db_trigger_cpu = (uintptr_t)client->client_base + - client->doorbell_offset; + desc.db_trigger_cpu = + (uintptr_t)client->vaddr + client->doorbell_offset; desc.db_trigger_uk = gfx_addr + client->doorbell_offset; desc.process_desc = gfx_addr + client->proc_desc_offset; desc.wq_addr = gfx_addr + client->wq_offset; @@ -465,7 +465,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request) { const size_t wqi_size = sizeof(struct guc_wq_item); struct i915_guc_client *gc = request->i915->guc.execbuf_client; - struct guc_process_desc *desc = gc->client_base + gc->proc_desc_offset; + struct guc_process_desc *desc = gc->vaddr + gc->proc_desc_offset; u32 freespace; int ret; @@ -506,10 +506,9 @@ static void guc_wq_item_append(struct i915_guc_client *gc, struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item *wqi; - void *base; - u32 freespace, tail, wq_off, wq_page; + u32 freespace, tail, wq_off; - desc = gc->client_base + gc->proc_desc_offset; + desc = gc->vaddr + gc->proc_desc_offset; /* Free space is guaranteed, see i915_guc_wq_reserve() above */ freespace = CIRC_SPACE(gc->wq_tail, desc->head, gc->wq_size); @@ -539,10 +538,7 @@ static void guc_wq_item_append(struct i915_guc_client *gc, gc->wq_rsvd -= wqi_size; /* WQ starts from the page after doorbell / process_desc */ - wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; - wq_off &= PAGE_SIZE - 1; - base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page)); - wqi = (struct guc_wq_item *)((char *)base + wq_off); + wqi = gc->vaddr + wq_off + GUC_DB_SIZE; /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | @@ -555,8 +551,6 @@ static void guc_wq_item_append(struct i915_guc_client *gc, wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->global_seqno; - - kunmap_atomic(base); } static int guc_ring_doorbell(struct i915_guc_client *gc) @@ -566,7 +560,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) union guc_doorbell_qw *db; int attempt = 2, ret = -EAGAIN; - desc = gc->client_base + gc->proc_desc_offset; + desc = gc->vaddr + gc->proc_desc_offset; /* Update the tail so it is visible to GuC */ desc->tail = gc->wq_tail; @@ -582,7 +576,7 @@ static int guc_ring_doorbell(struct i915_guc_client *gc) db_exc.cookie = 1; /* pointer of current doorbell cacheline */ - db = gc->client_base + gc->doorbell_offset; + db = gc->vaddr + gc->doorbell_offset; while (attempt--) { /* lets ring the doorbell */ @@ -724,14 +718,14 @@ guc_client_free(struct drm_i915_private *dev_priv, * Be sure to drop any locks */ - if (client->client_base) { + if (client->vaddr) { /* * If we got as far as setting up a doorbell, make sure we * shut it down before unmapping & deallocating the memory. */ guc_disable_doorbell(guc, client); - kunmap(kmap_to_page(client->client_base)); + i915_gem_object_unpin_map(client->vma->obj); } i915_vma_unpin_and_release(&client->vma); @@ -820,6 +814,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv, struct i915_guc_client *client; struct intel_guc *guc = &dev_priv->guc; struct i915_vma *vma; + void *vaddr; uint16_t db_id; client = kzalloc(sizeof(*client), GFP_KERNEL); @@ -846,7 +841,12 @@ guc_client_alloc(struct drm_i915_private *dev_priv, /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ client->vma = vma; - client->client_base = kmap(i915_vma_first_page(vma)); + + vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); + if (IS_ERR(vaddr)) + goto err; + + client->vaddr = vaddr; spin_lock_init(&client->wq_lock); client->wq_offset = GUC_DB_SIZE; diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 49ced0b..0053258 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -64,7 +64,7 @@ struct drm_i915_gem_request; */ struct i915_guc_client { struct i915_vma *vma; - void *client_base; /* first page (only) of above */ + void *vaddr; struct i915_gem_context *owner; struct intel_guc *guc; -- cgit v1.1 From a44342acde304425fa70fd4f06c0e662a79ba5aa Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 3 Nov 2016 20:08:52 +0000 Subject: drm/i915: Fix test on inputs for vma_compare() When supplying a view to vma_compare() it is required that the supplied i915_address_space is the global GTT. I tested the VMA instead (which is the current position in the rbtree and maybe from any address space). Reported-by: Matthew Auld Tested-by: Matthew Auld Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98579 Fixes: db6c2b4151f2 ("drm/i915: Store the vma in an rbtree...") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Matthew Auld Link: http://patchwork.freedesktop.org/patch/msgid/20161103200852.23431-1-chris@chris-wilson.co.uk Reviewed-by: Matthew Auld --- drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 0c1f5b4..cad6de6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3425,7 +3425,7 @@ static inline long vma_compare(struct i915_vma *vma, struct i915_address_space *vm, const struct i915_ggtt_view *view) { - GEM_BUG_ON(view && !i915_vma_is_ggtt(vma)); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); if (vma->vm != vm) return vma->vm - vm; -- cgit v1.1 From 2c3a3f44dc13a7c964e93385e1c1ca848656bed0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Nov 2016 10:30:01 +0000 Subject: drm/i915: Fix pages pin counting around swizzle quirk commit bc0629a76726 ("drm/i915: Track pages pinned due to swizzling quirk") fixed one problem, but revealed a whole lot more. The root cause of the pin count mismatch for the swizzle quirk (for L-shaped memory on gen3/4) was that we were incrementing the pages_pin_count upon getting the backing pages but then overwriting the pages_pin_count to set it to 1 afterwards. With a little bit of adjustment to satisfy the GEM_BUG_ON sanitychecks, the fix is to replace the explicit atomic_set with an atomic_inc. v2: Consistently use atomics (not mix atomics and helpers) within the lowlevel get_pages routines. This makes the atomic operations much clearer. Fixes: 1233e2db199d ("drm/i915: Move object backing storage manipulation") Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161104103001.27643-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 47 +++++++++++++++++++--------------- drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++ drivers/gpu/drm/i915/i915_gem_tiling.c | 1 + 3 files changed, 34 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1f995ce..0dbf38c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2324,12 +2324,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) if (i915_gem_object_needs_bit17_swizzle(obj)) i915_gem_object_do_bit_17_swizzle(obj, st); - if (i915_gem_object_is_tiled(obj) && - dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { - __i915_gem_object_pin_pages(obj); - obj->mm.quirked = true; - } - return st; err_pages: @@ -2362,12 +2356,21 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.get_page.sg_idx = 0; obj->mm.pages = pages; + + if (i915_gem_object_is_tiled(obj) && + to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + GEM_BUG_ON(obj->mm.quirked); + __i915_gem_object_pin_pages(obj); + obj->mm.quirked = true; + } } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) { struct sg_table *pages; + GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); + if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) { DRM_DEBUG("Attempting to obtain a purgeable object\n"); return -EFAULT; @@ -2396,16 +2399,14 @@ int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj) if (err) return err; - if (likely(obj->mm.pages)) { - __i915_gem_object_pin_pages(obj); - goto unlock; - } + if (unlikely(!obj->mm.pages)) { + err = ____i915_gem_object_get_pages(obj); + if (err) + goto unlock; - GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); - - err = ____i915_gem_object_get_pages(obj); - if (!err) - atomic_set_release(&obj->mm.pages_pin_count, 1); + smp_mb__before_atomic(); + } + atomic_inc(&obj->mm.pages_pin_count); unlock: mutex_unlock(&obj->mm.lock); @@ -2476,12 +2477,14 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, pinned = true; if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) { - ret = ____i915_gem_object_get_pages(obj); - if (ret) - goto err_unlock; + if (unlikely(!obj->mm.pages)) { + ret = ____i915_gem_object_get_pages(obj); + if (ret) + goto err_unlock; - GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count)); - atomic_set_release(&obj->mm.pages_pin_count, 1); + smp_mb__before_atomic(); + } + atomic_inc(&obj->mm.pages_pin_count); pinned = false; } GEM_BUG_ON(!obj->mm.pages); @@ -2933,7 +2936,7 @@ int i915_vma_unbind(struct i915_vma *vma) goto destroy; GEM_BUG_ON(obj->bind_count == 0); - GEM_BUG_ON(!obj->mm.pages); + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); if (i915_vma_is_map_and_fenceable(vma)) { /* release the fence reg _after_ flushing */ @@ -3167,6 +3170,7 @@ search_free: list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); obj->bind_count++; + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; @@ -4100,6 +4104,7 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, obj->mm.quirked = false; } if (args->madv == I915_MADV_WILLNEED) { + GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index cad6de6..7531bca 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3711,6 +3711,13 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) { int ret = 0; + /* The vma->pages are only valid within the lifespan of the borrowed + * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so + * must be the vma->pages. A simple rule is that vma->pages must only + * be accessed when the obj->mm.pages are pinned. + */ + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj)); + if (vma->pages) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 1577e78..251d51b 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -269,6 +269,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, obj->mm.quirked = false; } if (!i915_gem_object_is_tiled(obj)) { + GEM_BUG_ON(!obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } -- cgit v1.1 From b30ce9e0552aa017ac6f2243f3c2d8e36fe52e69 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Tue, 1 Nov 2016 11:47:59 -0700 Subject: drm/i915/dp: BDW cdclk fix for DP audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to BSpec, cdclk for BDW has to be not less than 432 MHz with DP audio enabled, port width x4, and link rate HBR2 (5.4 GHz). With cdclk less than 432 MHz, enabling audio leads to pipe FIFO underruns and displays cycling on/off. From BSpec: "Display» BDW-SKL» dpr» [Register] DP_TP_CTL [BDW+,EXCLUDE(CHV)] Workaround : Do not use DisplayPort with CDCLK less than 432 MHz, audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else there may be audio corruption or screen corruption." Since, some DP configurations (e.g., MST) use port width x4 and HBR2 link rate, let's increase the cdclk to >= 432 MHz to enable audio for those cases. v4: Changed commit message v3: Combine BDW pixel rate adjustments into a function (Jani) v2: Restrict fix to BDW Retain the set cdclk across modesets (Ville) Cc: stable@vger.kernel.org Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Ville Syrjälä Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1478026080-2925-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_display.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 9758910..7f9970a 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10247,6 +10247,27 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) bxt_set_cdclk(to_i915(dev), req_cdclk); } +static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, + int pixel_rate) +{ + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ + if (crtc_state->ips_enabled) + pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + + /* BSpec says "Do not use DisplayPort with CDCLK less than + * 432 MHz, audio enabled, port width x4, and link rate + * HBR2 (5.4 GHz), or else there may be audio corruption or + * screen corruption." + */ + if (intel_crtc_has_dp_encoder(crtc_state) && + crtc_state->has_audio && + crtc_state->port_clock >= 540000 && + crtc_state->lane_count == 4) + pixel_rate = max(432000, pixel_rate); + + return pixel_rate; +} + /* compute the max rate for new configuration */ static int ilk_max_pixel_rate(struct drm_atomic_state *state) { @@ -10272,9 +10293,9 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) pixel_rate = ilk_pipe_pixel_rate(crtc_state); - /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) - pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); + if (IS_BROADWELL(dev_priv)) + pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, + pixel_rate); intel_state->min_pixclk[i] = pixel_rate; } -- cgit v1.1 From 9c7540241885838cfc7fa58c4a8bd75be0303ed1 Mon Sep 17 00:00:00 2001 From: Dhinakaran Pandiyan Date: Wed, 2 Nov 2016 13:13:21 -0700 Subject: drm/i915/dp: Extend BDW DP audio workaround to GEN9 platforms According to BSpec, cdclk for BDW has to be not less than 432 MHz with DP audio enabled, port width x4, and link rate HBR2 (5.4 GHz). With cdclk less than 432 MHz, enabling audio leads to pipe FIFO underruns and displays cycling on/off. Let's apply this work around to GEN9 platforms too, as it fixes the same issue. v2: Move drm_device to drm_i915_private conversion Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97907 Cc: stable@vger.kernel.org Cc: Libin Yang Signed-off-by: Dhinakaran Pandiyan Reviewed-by: Jani Nikula Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1478117601-19122-1-git-send-email-dhinakaran.pandiyan@intel.com --- drivers/gpu/drm/i915/intel_display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 7f9970a..92ab01f 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -10250,8 +10250,10 @@ static void bxt_modeset_commit_cdclk(struct drm_atomic_state *old_state) static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state, int pixel_rate) { + struct drm_i915_private *dev_priv = to_i915(crtc_state->base.crtc->dev); + /* pixel rate mustn't exceed 95% of cdclk with IPS on BDW */ - if (crtc_state->ips_enabled) + if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled) pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95); /* BSpec says "Do not use DisplayPort with CDCLK less than @@ -10293,7 +10295,7 @@ static int ilk_max_pixel_rate(struct drm_atomic_state *state) pixel_rate = ilk_pipe_pixel_rate(crtc_state); - if (IS_BROADWELL(dev_priv)) + if (IS_BROADWELL(dev_priv) || IS_GEN9(dev_priv)) pixel_rate = bdw_adjust_min_pipe_pixel_rate(crtc_state, pixel_rate); -- cgit v1.1 From f97f193613dc7b723fa1b7e187da0ba585a7f2de Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 1 Nov 2016 17:31:32 -0400 Subject: drm/i915: Remove redundant reprobe in i915_drm_resume Weine's investigation on benchmarking the suspend/resume process pointed out a lot of the time in suspend/resume is being spent reprobing. While the reprobing process is a lengthy one for good reason, we don't need to hold up the entire suspend/resume process while we wait for it to finish. Luckily as it turns out, we already trigger a full connector reprobe in i915_hpd_poll_init_work(), so we can just ditch reprobing in i915_drm_resume() entirely. This won't lead to less time spent resuming just yet since now the bottleneck will be waiting for the mode_config lock in drm_kms_helper_poll_enable(), since that will be held as long as i915_hpd_poll_init_work() is reprobing all of the connectors. But we'll address that in the next patch. Signed-off-by: Lyude Tested-by: David Weinehall Reviewed-by: David Weinehall Testcase: analyze_suspend.py -config config/suspend-callgraph.cfg -filter i915 --- drivers/gpu/drm/i915/i915_drv.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9e5a547..0e45665 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1607,8 +1607,6 @@ static int i915_drm_resume(struct drm_device *dev) * notifications. * */ intel_hpd_init(dev_priv); - /* Config may have changed between suspend and resume */ - drm_helper_hpd_irq_event(dev); intel_opregion_register(dev_priv); -- cgit v1.1 From e0b70061404f64f592d6d082a63c504b66d65c6e Mon Sep 17 00:00:00 2001 From: Lyude Date: Tue, 1 Nov 2016 21:06:30 -0400 Subject: drm/i915: Reinit polling before hpd when resuming Now that we don't run the connector reprobing from i915_drm_resume(), we need to make it so we don't have to wait for reprobing to finish so that we actually speed things up. In order to do this, we need to make sure that i915_drm_resume() doesn't get blocked by i915_hpd_poll_init_work() while trying to acquire the mode_config lock that drm_kms_helper_poll_enable() needs to acquire. The easiest way to do this is to just enable polling before hpd. This shouldn't break anything since at that point we have everything else we need for polling enabled. As well, this should result in a rather significant improvement in how quickly we can resume the system. Signed-off-by: Lyude Tested-by: David Weinehall Reviewed-by: David Weinehall Testcase: analyze_suspend.py -config config/suspend-callgraph.cfg -filter i915 --- drivers/gpu/drm/i915/i915_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 0e45665..b72c24f 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1600,6 +1600,8 @@ static int i915_drm_resume(struct drm_device *dev) intel_display_resume(dev); + drm_kms_helper_poll_enable(dev); + /* * ... but also need to make sure that hotplug processing * doesn't cause havoc. Like in the driver load code we don't @@ -1619,7 +1621,6 @@ static int i915_drm_resume(struct drm_device *dev) intel_opregion_notify_adapter(dev_priv, PCI_D0); intel_autoenable_gt_powersave(dev_priv); - drm_kms_helper_poll_enable(dev); enable_rpm_wakeref_asserts(dev_priv); -- cgit v1.1 From dfd2812e6d961c868ece32c733bc081ed5a9d3a7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 4 Nov 2016 16:12:41 +0000 Subject: drm/i915: Remove the vma from the object list upon close Currently, the vma is being unlink from the object lookup on destroy. However, we are meant to be decoupling it upon close so that the user cannot access the closed vma whilst it remains active on the GPU. [ 34.074858] kernel BUG at drivers/gpu/drm/i915/i915_gem_gtt.c:3561! [ 34.074875] invalid opcode: 0000 [#1] PREEMPT SMP [ 34.074888] Modules linked in: snd_hda_intel i915 x86_pkg_temp_thermal coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel lpc_ich mei_me mei snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec snd_hwdep snd_hda_core i2c_designware_platform i2c_designware_core snd_pcm e1000e ptp pps_core sdhci_acpi sdhci mmc_core i2c_hid [last unloaded: i915] [ 34.075010] CPU: 1 PID: 6224 Comm: gem_close_race Tainted: G U 4.9.0-rc3-CI-CI_DRM_1800+ #1 [ 34.075034] Hardware name: /NUC5i7RYB, BIOS RYBDWi35.86A.0355.2016.0224.1501 02/24/2016 [ 34.075057] task: ffff8802459a8040 task.stack: ffffc90000524000 [ 34.075074] RIP: 0010:[] [] i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915] [ 34.075118] RSP: 0018:ffffc90000527b68 EFLAGS: 00010202 [ 34.075135] RAX: ffff8802426c5e40 RBX: 0000000000000000 RCX: ffff8802447fc2a8 [ 34.075158] RDX: 0000000000000000 RSI: ffff8802447fc2a8 RDI: ffff880248a4a880 [ 34.075181] RBP: ffffc90000527b88 R08: 0000000000000008 R09: 0000000000000000 [ 34.075203] R10: 0000000000000001 R11: 0000000000000000 R12: ffff880248a4a880 [ 34.075225] R13: ffff8802447fc2a8 R14: ffff880243e9afa8 R15: ffff880248a4a9c8 [ 34.075248] FS: 00007f9b43e59740(0000) GS:ffff880256c80000(0000) knlGS:0000000000000000 [ 34.075273] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 34.075292] CR2: 00007f9b43419140 CR3: 000000024455d000 CR4: 00000000003406e0 [ 34.075314] Stack: [ 34.075323] 0000000000000000 ffffc90000527bd0 ffff880243cb8008 ffff880243e9afa8 [ 34.075353] ffffc90000527c08 ffffffffa03874c7 ffffc90000527bb8 ffff880243e9afa8 [ 34.075383] ffff880243e9afb0 ffffc90000527e10 ffff8802447fc2a8 ffff880243cb8040 [ 34.075414] Call Trace: [ 34.075435] [] eb_lookup_vmas.isra.7+0x247/0x330 [i915] [ 34.075468] [] i915_gem_do_execbuffer.isra.15+0x604/0x1a10 [i915] [ 34.075507] [] ? i915_gem_object_get_sg+0x347/0x380 [i915] [ 34.075532] [] ? __might_fault+0x3e/0x90 [ 34.075562] [] i915_gem_execbuffer2+0xc0/0x250 [i915] [ 34.075585] [] drm_ioctl+0x1f6/0x480 [ 34.075604] [] ? trace_hardirqs_on_thunk+0x1a/0x1c [ 34.075635] [] ? i915_gem_execbuffer+0x330/0x330 [i915] [ 34.075658] [] do_vfs_ioctl+0x8e/0x690 [ 34.075677] [] ? _raw_spin_unlock_irqrestore+0x3d/0x60 [ 34.075700] [] ? SyS_timer_settime+0x141/0x1e0 [ 34.075721] [] ? trace_hardirqs_on_caller+0x122/0x1b0 [ 34.075742] [] SyS_ioctl+0x3c/0x70 [ 34.075760] [] entry_SYSCALL_64_fastpath+0x1c/0xb1 [ 34.075781] Code: 44 a0 48 c7 c2 9a 7e 43 a0 be e0 0d 00 00 48 c7 c7 a0 45 44 a0 e8 55 b8 ce e0 48 85 db 74 a3 49 83 bd f8 03 00 00 00 74 99 0f 0b <0f> 0b 48 89 da 4c 89 ee 4c 89 e7 e8 04 a9 ff ff 48 89 da 49 89 [ 34.075955] RIP [] i915_gem_obj_lookup_or_create_vma+0x8c/0xc0 [i915] [ 34.075994] RSP Testcase: igt/gem_close_race/basic-threads Fixes: db6c2b4151f2 ("drm/i915: Store the vma in an rbtree...") Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/20161104161241.25871-1-chris@chris-wilson.co.uk Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_gtt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 7531bca..a5fafa3 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3403,7 +3403,6 @@ void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); list_del(&vma->vm_link); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -3416,7 +3415,9 @@ void i915_vma_close(struct i915_vma *vma) GEM_BUG_ON(i915_vma_is_closed(vma)); vma->flags |= I915_VMA_CLOSED; - list_del_init(&vma->obj_link); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); + if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) WARN_ON(i915_vma_unbind(vma)); } -- cgit v1.1 From 0ef723cbceb6dce8116e75d44c5b8679b2eba69a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 10:54:43 +0000 Subject: drm/i915: Round tile chunks up for constructing partial VMAs When we split a large object up into chunks for GTT faulting (because we can't fit the whole object into the aperture) we have to align our cuts with the fence registers. Each partial VMA must cover a complete set of tile rows or the offset into each partial VMA is not aligned with the whole image. Currently we enforce a minimum size on each partial VMA, but this minimum size itself was not aligned to the tile row causing distortion. Reported-by: Andreas Reis Reported-by: Chris Clayton Reported-by: Norbert Preining Tested-by: Norbert Preining Tested-by: Chris Clayton Fixes: 03af84fe7f48 ("drm/i915: Choose partial chunksize based on tile row size") Fixes: a61007a83a46 ("drm/i915: Fix partial GGTT faulting") # enabling patch Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98402 Testcase: igt/gem_mmap_gtt/medium-copy-odd Signed-off-by: Chris Wilson Cc: Joonas Lahtinen Cc: Jani Nikula Cc: # v4.9-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20161107105443.27855-1-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0dbf38c..33445ff 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1779,7 +1779,7 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) /* Use a partial view if it is bigger than available space */ chunk_size = MIN_CHUNK_PAGES; if (i915_gem_object_is_tiled(obj)) - chunk_size = max(chunk_size, tile_row_pages(obj)); + chunk_size = roundup(chunk_size, tile_row_pages(obj)); memset(&view, 0, sizeof(view)); view.type = I915_GGTT_VIEW_PARTIAL; -- cgit v1.1 From 767a222e47cc13239d38018887f911fec06169ea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 11:01:28 +0000 Subject: drm/i915: Limit Valleyview and earlier to only using mappable scanout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Valleyview appears to be limited to only scanning out from the first 512MiB of the Global GTT. Lets presume that this behaviour was inherited from the display block copied from g4x (not Ironlake) and all earlier generations are similarly affected, though testing suggests different symptoms. For simplicity, impose that these platforms must scanout from the mappable region. (For extra simplicity, use HAS_GMCH_DISPLAY even though this catches Cherryview which does not appear to be limited to the low aperture for its scanout.) v2: Use HAS_GMCH_DISPLAY() to more clearly convey my intent about limiting this workaround to the old style of display engine. v3: Update changelog to reflect testing by Ville Syrjälä v4: Include the changes to the comments as well Reported-by: Luis Botello Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98036 Fixes: 2efb813d5388 ("drm/i915: Fallback to using unmappable memory for scanout") Signed-off-by: Chris Wilson Cc: Akash Goel Cc: Joonas Lahtinen Cc: Ville Syrjälä Cc: # v4.9-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20161107110128.28762-1-chris@chris-wilson.co.uk Reviewed-by: Ville Syrjälä type == I915_GGTT_VIEW_NORMAL) vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, PIN_MAPPABLE | PIN_NONBLOCK); - if (IS_ERR(vma)) - vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + unsigned int flags; + + /* Valleyview is definitely limited to scanning out the first + * 512MiB. Lets presume this behaviour was inherited from the + * g4x display engine and that all earlier gen are similarly + * limited. Testing suggests that it is a little more + * complicated than this. For example, Cherryview appears quite + * happy to scanout from anywhere within its global aperture. + */ + flags = 0; + if (HAS_GMCH_DISPLAY(i915)) + flags = PIN_MAPPABLE; + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, flags); + } if (IS_ERR(vma)) goto err_unpin_display; -- cgit v1.1 From 5bd11a34e46afa1048bd5330673fb1508183f6a5 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Nov 2016 11:20:02 +0200 Subject: drm/i915: Avoid early GPU idling due to already pending idle work Atm, in case an idle work handler is already pending but haven't yet started to run, retiring a new request will not extend the active period as required, rather simply leaves the pending idle work to be scheduled at the original expiration time. This may lead to idling the GPU too early. Fix this by using the delayed-work scheduler alternative which makes sure the handler's expiration time is extended in this case. Cc: Chris Wilson Requested-by: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478510405-11799-1-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem_request.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 79b0046..0b3b051 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -1019,7 +1019,7 @@ void i915_gem_retire_requests(struct drm_i915_private *dev_priv) engine_retire_requests(engine); if (!dev_priv->gt.active_requests) - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.idle_work, - msecs_to_jiffies(100)); + mod_delayed_work(dev_priv->wq, + &dev_priv->gt.idle_work, + msecs_to_jiffies(100)); } -- cgit v1.1 From 93c97dc17ffc4a0651b5a939d1ef34897513b252 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Nov 2016 11:20:03 +0200 Subject: drm/i915: Avoid early GPU idling due to race with new request There is a small race where a new request can be submitted and retired after the idle worker started to run which leads to idling the GPU too early. Fix this by deferring the idling to the pending instance of the worker. This scenario was pointed out by Chris. Cc: Chris Wilson Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478510405-11799-2-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 490fd30..82170bc 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2766,6 +2766,13 @@ i915_gem_idle_work_handler(struct work_struct *work) goto out_rearm; } + /* + * New request retired after this work handler started, extend active + * period until next instance of the work. + */ + if (work_pending(work)) + goto out_unlock; + if (dev_priv->gt.active_requests) goto out_unlock; -- cgit v1.1 From 0cb5670baa3aad40aeec0c264a1ac1d89d67c6d7 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Nov 2016 11:20:04 +0200 Subject: drm/i915: Make sure engines are idle during GPU idling in LR mode We assume that the GPU is idle once receiving the seqno via the last request's user interrupt. In execlist mode the corresponding context completed interrupt can be delayed though and until this latter interrupt arrives we consider the request to be pending on the ELSP submit port. This can cause a problem during system suspend where this last request will be seen by the resume code as still pending. Such pending requests are normally replayed after a GPU reset, but during resume we reset both SW and HW tracking of the ring head/tail pointers, so replaying the pending request with its stale tail pointer will leave the ring in an inconsistent state. A subsequent request submission can lead then to the GPU executing from uninitialized area in the ring behind the above stale tail pointer. Fix this by making sure any pending request on the ELSP port is completed before suspending. I used a polling wait since the completion time I measured was <1ms and since normally we only need to wait during system suspend. GPU idling during runtime suspend is scheduled with a delay (currently 50-100ms) after the retirement of the last request at which point the context completed interrupt must have arrived already. The chance of this bug was increased by commit 1c777c5d1dcdf8fa0223fcff35fb387b5bb9517a Author: Imre Deak Date: Wed Oct 12 17:46:37 2016 +0300 drm/i915/hsw: Fix GPU hang during resume from S3-devices state but it could happen even without the explicit GPU reset, since we disable interrupts afterwards during the suspend sequence. v2: - Do an unlocked poll-wait first. (Chris) v3-4: - s/intel_lr_engines_idle/intel_execlists_idle/ and move i915.enable_execlists check to the new helper. (Chris) Cc: Chris Wilson Cc: Mika Kuoppala Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98470 Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478510405-11799-3-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 10 ++++++++++ drivers/gpu/drm/i915/intel_lrc.c | 22 ++++++++++++++++++++++ drivers/gpu/drm/i915/intel_lrc.h | 1 + 3 files changed, 33 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 82170bc..ae03cc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2752,6 +2752,13 @@ i915_gem_idle_work_handler(struct work_struct *work) if (!READ_ONCE(dev_priv->gt.awake)) return; + /* + * Wait for last execlists context complete, but bail out in case a + * new request is submitted. + */ + wait_for(READ_ONCE(dev_priv->gt.active_requests) || + intel_execlists_idle(dev_priv), 10); + if (READ_ONCE(dev_priv->gt.active_requests)) return; @@ -2776,6 +2783,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (dev_priv->gt.active_requests) goto out_unlock; + if (wait_for(intel_execlists_idle(dev_priv), 10)) + DRM_ERROR("Timeout waiting for engines to idle\n"); + for_each_engine(engine, dev_priv, id) i915_gem_batch_pool_fini(&engine->batch_pool); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fa3012c..dde04b764 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -522,6 +522,28 @@ static bool execlists_elsp_idle(struct intel_engine_cs *engine) return !engine->execlist_port[0].request; } +/** + * intel_execlists_idle() - Determine if all engine submission ports are idle + * @dev_priv: i915 device private + * + * Return true if there are no requests pending on any of the submission ports + * of any engines. + */ +bool intel_execlists_idle(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + if (!i915.enable_execlists) + return true; + + for_each_engine(engine, dev_priv, id) + if (!execlists_elsp_idle(engine)) + return false; + + return true; +} + static bool execlists_elsp_ready(struct intel_engine_cs *engine) { int port; diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 4fed816..c1f5461 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -95,5 +95,6 @@ uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, int intel_sanitize_enable_execlists(struct drm_i915_private *dev_priv, int enable_execlists); void intel_execlists_enable_submission(struct drm_i915_private *dev_priv); +bool intel_execlists_idle(struct drm_i915_private *dev_priv); #endif /* _INTEL_LRC_H_ */ -- cgit v1.1 From 31ab49abde41c4056b17ca3d63456a62964834c0 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 7 Nov 2016 11:20:05 +0200 Subject: drm/i915: Add assert for no pending GPU requests during suspend/resume in LR mode During resume we will reset the SW/HW tracking for each ring head/tail pointers and so are not prepared to replay any pending requests (as opposed to GPU reset time). Add an assert for this both to the suspend and the resume code. v2: - Check for ELSP port idle already during suspend and check !gt.awake during resume. (Chris) v3: - Move the !gt.awake check to i915_gem_resume(). v4: - s/intel_lr_engines_idle/intel_execlists_idle/ (Chris) Cc: Chris Wilson Cc: Mika Kuoppala Signed-off-by: Imre Deak Reviewed-by: Chris Wilson Link: http://patchwork.freedesktop.org/patch/msgid/1478510405-11799-4-git-send-email-imre.deak@intel.com --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ae03cc9..41e697e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4460,6 +4460,7 @@ int i915_gem_suspend(struct drm_device *dev) * reset the GPU back to its idle, low power state. */ WARN_ON(dev_priv->gt.awake); + WARN_ON(!intel_execlists_idle(dev_priv)); /* * Neither the BIOS, ourselves or any other kernel @@ -4496,6 +4497,8 @@ void i915_gem_resume(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + WARN_ON(dev_priv->gt.awake); + mutex_lock(&dev->struct_mutex); i915_gem_restore_gtt_mappings(dev); -- cgit v1.1 From 7aa6ca61ee5546d74b76610894924cdb0d4a1af0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Nov 2016 16:52:04 +0000 Subject: drm/i915: Mark CPU cache as dirty when used for rendering MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On LLC, or even snooped, machines rendering via the GPU ends up in the CPU cache. This cacheline dirt also needs to be flushed to main memory when moving to an incoherent domain, such as the display's scanout engine. Mostly, this happens because either the object is marked as dirty from its first use or is avoided by setting the object into the display domain from the start. v2: Treat WT as not requiring a clflush prior to use on the display engine as well. Fixes: 0f71979ab7fb ("drm/i915: Performed deferred clflush inside set-cache-level") References: https://bugs.freedesktop.org/show_bug.cgi?id=95414 Signed-off-by: Chris Wilson Cc: Jani Nikula Cc: Ville Syrjälä Cc: # v4.0+ Reviewed-by: Ville Syrjälä Link: http://patchwork.freedesktop.org/patch/msgid/20161107165204.7008-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 322c580..fb5b443 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1247,6 +1247,12 @@ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file, return ctx; } +static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj) +{ + return !(obj->cache_level == I915_CACHE_NONE || + obj->cache_level == I915_CACHE_WT); +} + void i915_vma_move_to_active(struct i915_vma *vma, struct drm_i915_gem_request *req, unsigned int flags) @@ -1276,6 +1282,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, /* update for the implicit flush after a batch */ obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; + if (!obj->cache_dirty && gpu_write_needs_clflush(obj)) + obj->cache_dirty = true; } if (flags & EXEC_OBJECT_NEEDS_FENCE) -- cgit v1.1 From 58e197d631d44f9f4817b8198b43132a40de1164 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 8 Nov 2016 07:51:35 +0100 Subject: drm/i915: Update DRIVER_DATE to 20161108 Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_drv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm') diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ae0217d..80775b8 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,8 +72,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20161024" -#define DRIVER_TIMESTAMP 1477290335 +#define DRIVER_DATE "20161108" +#define DRIVER_TIMESTAMP 1478587895 #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ -- cgit v1.1