diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-30 10:00:37 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-30 10:00:37 -0800 |
commit | 4a490b78cb7e0e5efa44425df72a9fedc1c36366 (patch) | |
tree | 8a867e39c4e555e4ba10772748b0bde8fe789e20 | |
parent | 8d91a42e54eebc43f4d8f6064751ccba73528275 (diff) | |
parent | d5757dbe79870d825d0dec30074d48683e1d7e9a (diff) | |
download | op-kernel-dev-4a490b78cb7e0e5efa44425df72a9fedc1c36366.zip op-kernel-dev-4a490b78cb7e0e5efa44425df72a9fedc1c36366.tar.gz |
Merge branch 'drm-next' of git://people.freedesktop.org/~airlied/linux
Pull DRM update from Dave Airlie:
"This is a bit larger due to me not bothering to do anything since
before Xmas, and other people working too hard after I had clearly
given up.
It's got the 3 main x86 driver fixes pulls, and a bunch of tegra
fixes, doesn't fix the Ironlake bug yet, but that does seem to be
getting closer.
- radeon: gpu reset fixes and userspace packet support
- i915: watermark fixes, workarounds, i830/845 fix,
- nouveau: nvd9/kepler microcode fixes, accel is now enabled and
working, gk106 support
- tegra: misc fixes."
* 'drm-next' of git://people.freedesktop.org/~airlied/linux: (34 commits)
Revert "drm: tegra: protect DC register access with mutex"
drm: tegra: program only one window during modeset
drm: tegra: clean out old gem prototypes
drm: tegra: remove redundant tegra2_tmds_config entry
drm: tegra: protect DC register access with mutex
drm: tegra: don't leave clients host1x member uninitialized
drm: tegra: fix front_porch <-> back_porch mixup
drm/nve0/graph: fix fuc, and enable acceleration on all known chipsets
drm/nvc0/graph: fix fuc, and enable acceleration on GF119
drm/nouveau/bios: cache ramcfg strap on later chipsets
drm/nouveau/mxm: silence output if no bios data
drm/nouveau/bios: parse/display extra version component
drm/nouveau/bios: implement opcode 0xa9
drm/nouveau/bios: update gpio parsing apis to match current design
drm/nouveau: initial support for GK106
drm/radeon: add WAIT_UNTIL to evergreen VM safe reg list
drm/i915: disable shrinker lock stealing for create_mmap_offset
drm/i915: optionally disable shrinker lock stealing
drm/i915: fix flags in dma buf exporting
drm/radeon: add support for MEM_WRITE packet
...
46 files changed, 880 insertions, 397 deletions
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 0761a03..2bf9670 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -184,19 +184,27 @@ EXPORT_SYMBOL(drm_mm_get_block_generic); * -ENOSPC if no suitable free area is available. The preallocated memory node * must be cleared. */ -int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment) +int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment, + unsigned long color) { struct drm_mm_node *hole_node; - hole_node = drm_mm_search_free(mm, size, alignment, false); + hole_node = drm_mm_search_free_generic(mm, size, alignment, + color, 0); if (!hole_node) return -ENOSPC; - drm_mm_insert_helper(hole_node, node, size, alignment, 0); - + drm_mm_insert_helper(hole_node, node, size, alignment, color); return 0; } +EXPORT_SYMBOL(drm_mm_insert_node_generic); + +int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment) +{ + return drm_mm_insert_node_generic(mm, node, size, alignment, 0); +} EXPORT_SYMBOL(drm_mm_insert_node); static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node, @@ -275,22 +283,31 @@ EXPORT_SYMBOL(drm_mm_get_block_range_generic); * -ENOSPC if no suitable free area is available. This is for range * restricted allocations. The preallocated memory node must be cleared. */ -int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, - unsigned long start, unsigned long end) +int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment, unsigned long color, + unsigned long start, unsigned long end) { struct drm_mm_node *hole_node; - hole_node = drm_mm_search_free_in_range(mm, size, alignment, - start, end, false); + hole_node = drm_mm_search_free_in_range_generic(mm, + size, alignment, color, + start, end, 0); if (!hole_node) return -ENOSPC; - drm_mm_insert_helper_range(hole_node, node, size, alignment, 0, + drm_mm_insert_helper_range(hole_node, node, + size, alignment, color, start, end); - return 0; } +EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic); + +int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long size, unsigned alignment, + unsigned long start, unsigned long end) +{ + return drm_mm_insert_node_in_range_generic(mm, node, size, alignment, 0, start, end); +} EXPORT_SYMBOL(drm_mm_insert_node_in_range); /** diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 8f63cd5..99daa89 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -989,6 +989,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_SECURE_BATCHES: value = capable(CAP_SYS_ADMIN); break; + case I915_PARAM_HAS_PINNED_BATCHES: + value = 1; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", param->param); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 557843d..ed30595 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -780,6 +780,7 @@ typedef struct drm_i915_private { struct i915_hw_ppgtt *aliasing_ppgtt; struct shrinker inactive_shrinker; + bool shrinker_no_lock_stealing; /** * List of objects currently involved in rendering. @@ -1100,6 +1101,7 @@ struct drm_i915_gem_object { */ atomic_t pending_flip; }; +#define to_gem_object(obj) (&((struct drm_i915_gem_object *)(obj))->base) #define to_intel_bo(x) container_of(x, struct drm_i915_gem_object, base) @@ -1166,6 +1168,9 @@ struct drm_i915_file_private { #define IS_IVB_GT1(dev) ((dev)->pci_device == 0x0156 || \ (dev)->pci_device == 0x0152 || \ (dev)->pci_device == 0x015a) +#define IS_SNB_GT1(dev) ((dev)->pci_device == 0x0102 || \ + (dev)->pci_device == 0x0106 || \ + (dev)->pci_device == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) @@ -1196,6 +1201,9 @@ struct drm_i915_file_private { #define HAS_OVERLAY(dev) (INTEL_INFO(dev)->has_overlay) #define OVERLAY_NEEDS_PHYSICAL(dev) (INTEL_INFO(dev)->overlay_needs_physical) +/* Early gen2 have a totally busted CS tlb and require pinned batches. */ +#define HAS_BROKEN_CS_TLB(dev) (IS_I830(dev) || IS_845G(dev)) + /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte * rows, which changed the alignment requirements and fence programming. */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 742206e..da3c82e3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1517,9 +1517,11 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) if (obj->base.map_list.map) return 0; + dev_priv->mm.shrinker_no_lock_stealing = true; + ret = drm_gem_create_mmap_offset(&obj->base); if (ret != -ENOSPC) - return ret; + goto out; /* Badly fragmented mmap space? The only way we can recover * space is by destroying unwanted objects. We can't randomly release @@ -1531,10 +1533,14 @@ static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj) i915_gem_purge(dev_priv, obj->base.size >> PAGE_SHIFT); ret = drm_gem_create_mmap_offset(&obj->base); if (ret != -ENOSPC) - return ret; + goto out; i915_gem_shrink_all(dev_priv); - return drm_gem_create_mmap_offset(&obj->base); + ret = drm_gem_create_mmap_offset(&obj->base); +out: + dev_priv->mm.shrinker_no_lock_stealing = false; + + return ret; } static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj) @@ -2890,7 +2896,7 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, { struct drm_device *dev = obj->base.dev; drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_mm_node *free_space; + struct drm_mm_node *node; u32 size, fence_size, fence_alignment, unfenced_alignment; bool mappable, fenceable; int ret; @@ -2936,66 +2942,54 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj, i915_gem_object_pin_pages(obj); + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (node == NULL) { + i915_gem_object_unpin_pages(obj); + return -ENOMEM; + } + search_free: if (map_and_fenceable) - free_space = drm_mm_search_free_in_range_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); + ret = drm_mm_insert_node_in_range_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level, + 0, dev_priv->mm.gtt_mappable_end); else - free_space = drm_mm_search_free_color(&dev_priv->mm.gtt_space, - size, alignment, obj->cache_level, - false); - - if (free_space != NULL) { - if (map_and_fenceable) - free_space = - drm_mm_get_block_range_generic(free_space, - size, alignment, obj->cache_level, - 0, dev_priv->mm.gtt_mappable_end, - false); - else - free_space = - drm_mm_get_block_generic(free_space, - size, alignment, obj->cache_level, - false); - } - if (free_space == NULL) { + ret = drm_mm_insert_node_generic(&dev_priv->mm.gtt_space, node, + size, alignment, obj->cache_level); + if (ret) { ret = i915_gem_evict_something(dev, size, alignment, obj->cache_level, map_and_fenceable, nonblocking); - if (ret) { - i915_gem_object_unpin_pages(obj); - return ret; - } + if (ret == 0) + goto search_free; - goto search_free; + i915_gem_object_unpin_pages(obj); + kfree(node); + return ret; } - if (WARN_ON(!i915_gem_valid_gtt_space(dev, - free_space, - obj->cache_level))) { + if (WARN_ON(!i915_gem_valid_gtt_space(dev, node, obj->cache_level))) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(free_space); + drm_mm_put_block(node); return -EINVAL; } ret = i915_gem_gtt_prepare_object(obj); if (ret) { i915_gem_object_unpin_pages(obj); - drm_mm_put_block(free_space); + drm_mm_put_block(node); return ret; } list_move_tail(&obj->gtt_list, &dev_priv->mm.bound_list); list_add_tail(&obj->mm_list, &dev_priv->mm.inactive_list); - obj->gtt_space = free_space; - obj->gtt_offset = free_space->start; + obj->gtt_space = node; + obj->gtt_offset = node->start; fenceable = - free_space->size == fence_size && - (free_space->start & (fence_alignment - 1)) == 0; + node->size == fence_size && + (node->start & (fence_alignment - 1)) == 0; mappable = obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end; @@ -4392,6 +4386,9 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc) if (!mutex_is_locked_by(&dev->struct_mutex, current)) return 0; + if (dev_priv->mm.shrinker_no_lock_stealing) + return 0; + unlock = false; } diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index 773ef77..7be4241 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -226,7 +226,7 @@ struct dma_buf *i915_gem_prime_export(struct drm_device *dev, { struct drm_i915_gem_object *obj = to_intel_bo(gem_obj); - return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600); + return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, flags); } static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index ee8f97f..d6a994a 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -808,6 +808,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, flags |= I915_DISPATCH_SECURE; } + if (args->flags & I915_EXEC_IS_PINNED) + flags |= I915_DISPATCH_PINNED; switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index a4dc97f..2220dec 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -1087,6 +1087,18 @@ i915_error_first_batchbuffer(struct drm_i915_private *dev_priv, if (!ring->get_seqno) return NULL; + if (HAS_BROKEN_CS_TLB(dev_priv->dev)) { + u32 acthd = I915_READ(ACTHD); + + if (WARN_ON(ring->id != RCS)) + return NULL; + + obj = ring->private; + if (acthd >= obj->gtt_offset && + acthd < obj->gtt_offset + obj->base.size) + return i915_error_object_create(dev_priv, obj); + } + seqno = ring->get_seqno(ring, false); list_for_each_entry(obj, &dev_priv->mm.active_list, mm_list) { if (obj->ring != ring) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 3f75cfa..186ee5c 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -517,6 +517,7 @@ * the enables for writing to the corresponding low bit. */ #define _3D_CHICKEN 0x02084 +#define _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB (1 << 10) #define _3D_CHICKEN2 0x0208c /* Disables pipelining of read flushes past the SF-WIZ interface. * Required on all Ironlake steppings according to the B-Spec, but the @@ -532,7 +533,8 @@ # define MI_FLUSH_ENABLE (1 << 12) #define GEN6_GT_MODE 0x20d0 -#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_GT_MODE_HI (1 << 9) +#define GEN6_TD_FOUR_ROW_DISPATCH_DISABLE (1 << 5) #define GFX_MODE 0x02520 #define GFX_MODE_GEN7 0x0229c diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 5d127e0..a9fb046 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -8144,10 +8144,6 @@ intel_modeset_stage_output_state(struct drm_device *dev, DRM_DEBUG_KMS("encoder changed, full mode switch\n"); config->mode_changed = true; } - - /* Disable all disconnected encoders. */ - if (connector->base.status == connector_status_disconnected) - connector->new_encoder = NULL; } /* connector->new_encoder is now updated for all connectors. */ @@ -9167,6 +9163,23 @@ static void intel_sanitize_encoder(struct intel_encoder *encoder) * the crtc fixup. */ } +static void i915_redisable_vga(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + u32 vga_reg; + + if (HAS_PCH_SPLIT(dev)) + vga_reg = CPU_VGACNTRL; + else + vga_reg = VGACNTRL; + + if (I915_READ(vga_reg) != VGA_DISP_DISABLE) { + DRM_DEBUG_KMS("Something enabled VGA plane, disabling it\n"); + I915_WRITE(vga_reg, VGA_DISP_DISABLE); + POSTING_READ(vga_reg); + } +} + /* Scan out the current hw modeset state, sanitizes it and maps it into the drm * and i915 state tracking structures. */ void intel_modeset_setup_hw_state(struct drm_device *dev, @@ -9275,6 +9288,8 @@ void intel_modeset_setup_hw_state(struct drm_device *dev, intel_set_mode(&crtc->base, &crtc->base.mode, crtc->base.x, crtc->base.y, crtc->base.fb); } + + i915_redisable_vga(dev); } else { intel_modeset_update_staged_output_state(dev); } diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 496caa7..e6f54ff 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -405,7 +405,7 @@ void intel_update_fbc(struct drm_device *dev) * - going to an unsupported config (interlace, pixel multiply, etc.) */ list_for_each_entry(tmp_crtc, &dev->mode_config.crtc_list, head) { - if (tmp_crtc->enabled && + if (to_intel_crtc(tmp_crtc)->active && !to_intel_crtc(tmp_crtc)->primary_disabled && tmp_crtc->fb) { if (crtc) { @@ -992,7 +992,7 @@ static struct drm_crtc *single_enabled_crtc(struct drm_device *dev) struct drm_crtc *crtc, *enabled = NULL; list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { if (enabled) return NULL; enabled = crtc; @@ -1086,7 +1086,7 @@ static bool g4x_compute_wm0(struct drm_device *dev, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *cursor_wm = cursor->guard_size; *plane_wm = display->guard_size; return false; @@ -1215,7 +1215,7 @@ static bool vlv_compute_drain_latency(struct drm_device *dev, int entries; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) return false; clock = crtc->mode.clock; /* VESA DOT Clock */ @@ -1286,6 +1286,7 @@ static void valleyview_update_wm(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int planea_wm, planeb_wm, cursora_wm, cursorb_wm; int plane_sr, cursor_sr; + int ignore_plane_sr, ignore_cursor_sr; unsigned int enabled = 0; vlv_update_drain_latency(dev); @@ -1302,17 +1303,23 @@ static void valleyview_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &valleyview_wm_info, &valleyview_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &ignore_cursor_sr) && + g4x_compute_srwm(dev, ffs(enabled) - 1, + 2*sr_latency_ns, + &valleyview_wm_info, + &valleyview_cursor_wm_info, + &ignore_plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF_VLV, FW_CSPWRDWNEN); - else + } else { I915_WRITE(FW_BLC_SELF_VLV, I915_READ(FW_BLC_SELF_VLV) & ~FW_CSPWRDWNEN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1352,17 +1359,18 @@ static void g4x_update_wm(struct drm_device *dev) &planeb_wm, &cursorb_wm)) enabled |= 2; - plane_sr = cursor_sr = 0; if (single_plane_enabled(enabled) && g4x_compute_srwm(dev, ffs(enabled) - 1, sr_latency_ns, &g4x_wm_info, &g4x_cursor_wm_info, - &plane_sr, &cursor_sr)) + &plane_sr, &cursor_sr)) { I915_WRITE(FW_BLC_SELF, FW_BLC_SELF_EN); - else + } else { I915_WRITE(FW_BLC_SELF, I915_READ(FW_BLC_SELF) & ~FW_BLC_SELF_EN); + plane_sr = cursor_sr = 0; + } DRM_DEBUG_KMS("Setting FIFO watermarks - A: plane=%d, cursor=%d, B: plane=%d, cursor=%d, SR: plane=%d, cursor=%d\n", planea_wm, cursora_wm, @@ -1468,7 +1476,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 0); crtc = intel_get_crtc_for_plane(dev, 0); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -1482,7 +1490,7 @@ static void i9xx_update_wm(struct drm_device *dev) fifo_size = dev_priv->display.get_fifo_size(dev, 1); crtc = intel_get_crtc_for_plane(dev, 1); - if (crtc->enabled && crtc->fb) { + if (to_intel_crtc(crtc)->active && crtc->fb) { int cpp = crtc->fb->bits_per_pixel / 8; if (IS_GEN2(dev)) cpp = 4; @@ -1811,8 +1819,110 @@ static void sandybridge_update_wm(struct drm_device *dev) enabled |= 2; } - if ((dev_priv->num_pipe == 3) && - g4x_compute_wm0(dev, 2, + /* + * Calculate and update the self-refresh watermark only when one + * display plane is used. + * + * SNB support 3 levels of watermark. + * + * WM1/WM2/WM2 watermarks have to be enabled in the ascending order, + * and disabled in the descending order + * + */ + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + if (!single_plane_enabled(enabled) || + dev_priv->sprite_scaling_enabled) + return; + enabled = ffs(enabled) - 1; + + /* WM1 */ + if (!ironlake_compute_srwm(dev, 1, enabled, + SNB_READ_WM1_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM1_LP_ILK, + WM1_LP_SR_EN | + (SNB_READ_WM1_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM2 */ + if (!ironlake_compute_srwm(dev, 2, enabled, + SNB_READ_WM2_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM2_LP_ILK, + WM2_LP_EN | + (SNB_READ_WM2_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); + + /* WM3 */ + if (!ironlake_compute_srwm(dev, 3, enabled, + SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &fbc_wm, &plane_wm, &cursor_wm)) + return; + + I915_WRITE(WM3_LP_ILK, + WM3_LP_EN | + (SNB_READ_WM3_LATENCY() << WM1_LP_LATENCY_SHIFT) | + (fbc_wm << WM1_LP_FBC_SHIFT) | + (plane_wm << WM1_LP_SR_SHIFT) | + cursor_wm); +} + +static void ivybridge_update_wm(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int latency = SNB_READ_WM0_LATENCY() * 100; /* In unit 0.1us */ + u32 val; + int fbc_wm, plane_wm, cursor_wm; + int ignore_fbc_wm, ignore_plane_wm, ignore_cursor_wm; + unsigned int enabled; + + enabled = 0; + if (g4x_compute_wm0(dev, 0, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEA_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEA_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe A -" + " plane %d, " "cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 1; + } + + if (g4x_compute_wm0(dev, 1, + &sandybridge_display_wm_info, latency, + &sandybridge_cursor_wm_info, latency, + &plane_wm, &cursor_wm)) { + val = I915_READ(WM0_PIPEB_ILK); + val &= ~(WM0_PIPE_PLANE_MASK | WM0_PIPE_CURSOR_MASK); + I915_WRITE(WM0_PIPEB_ILK, val | + ((plane_wm << WM0_PIPE_PLANE_SHIFT) | cursor_wm)); + DRM_DEBUG_KMS("FIFO watermarks For pipe B -" + " plane %d, cursor: %d\n", + plane_wm, cursor_wm); + enabled |= 2; + } + + if (g4x_compute_wm0(dev, 2, &sandybridge_display_wm_info, latency, &sandybridge_cursor_wm_info, latency, &plane_wm, &cursor_wm)) { @@ -1875,12 +1985,17 @@ static void sandybridge_update_wm(struct drm_device *dev) (plane_wm << WM1_LP_SR_SHIFT) | cursor_wm); - /* WM3 */ + /* WM3, note we have to correct the cursor latency */ if (!ironlake_compute_srwm(dev, 3, enabled, SNB_READ_WM3_LATENCY() * 500, &sandybridge_display_srwm_info, &sandybridge_cursor_srwm_info, - &fbc_wm, &plane_wm, &cursor_wm)) + &fbc_wm, &plane_wm, &ignore_cursor_wm) || + !ironlake_compute_srwm(dev, 3, enabled, + 2 * SNB_READ_WM3_LATENCY() * 500, + &sandybridge_display_srwm_info, + &sandybridge_cursor_srwm_info, + &ignore_fbc_wm, &ignore_plane_wm, &cursor_wm)) return; I915_WRITE(WM3_LP_ILK, @@ -1929,7 +2044,7 @@ sandybridge_compute_sprite_wm(struct drm_device *dev, int plane, int entries, tlb_miss; crtc = intel_get_crtc_for_plane(dev, plane); - if (crtc->fb == NULL || !crtc->enabled) { + if (crtc->fb == NULL || !to_intel_crtc(crtc)->active) { *sprite_wm = display->guard_size; return false; } @@ -3471,6 +3586,15 @@ static void gen6_init_clock_gating(struct drm_device *dev) I915_READ(ILK_DISPLAY_CHICKEN2) | ILK_ELPIN_409_SELECT); + /* WaDisableHiZPlanesWhenMSAAEnabled */ + I915_WRITE(_3D_CHICKEN, + _MASKED_BIT_ENABLE(_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB)); + + /* WaSetupGtModeTdRowDispatch */ + if (IS_SNB_GT1(dev)) + I915_WRITE(GEN6_GT_MODE, + _MASKED_BIT_ENABLE(GEN6_TD_FOUR_ROW_DISPATCH_DISABLE)); + I915_WRITE(WM3_LP_ILK, 0); I915_WRITE(WM2_LP_ILK, 0); I915_WRITE(WM1_LP_ILK, 0); @@ -3999,7 +4123,7 @@ void intel_init_pm(struct drm_device *dev) } else if (IS_IVYBRIDGE(dev)) { /* FIXME: detect B0+ stepping and use auto training */ if (SNB_READ_WM0_LATENCY()) { - dev_priv->display.update_wm = sandybridge_update_wm; + dev_priv->display.update_wm = ivybridge_update_wm; dev_priv->display.update_sprite_wm = sandybridge_update_sprite_wm; } else { DRM_DEBUG_KMS("Failed to read display plane latency. " diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2346b92..ae253e0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -547,9 +547,14 @@ static int init_render_ring(struct intel_ring_buffer *ring) static void render_ring_cleanup(struct intel_ring_buffer *ring) { + struct drm_device *dev = ring->dev; + if (!ring->private) return; + if (HAS_BROKEN_CS_TLB(dev)) + drm_gem_object_unreference(to_gem_object(ring->private)); + cleanup_pipe_control(ring); } @@ -969,6 +974,8 @@ i965_dispatch_execbuffer(struct intel_ring_buffer *ring, return 0; } +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT (256*1024) static int i830_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 len, @@ -976,15 +983,47 @@ i830_dispatch_execbuffer(struct intel_ring_buffer *ring, { int ret; - ret = intel_ring_begin(ring, 4); - if (ret) - return ret; + if (flags & I915_DISPATCH_PINNED) { + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; - intel_ring_emit(ring, MI_BATCH_BUFFER); - intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); - intel_ring_emit(ring, offset + len - 8); - intel_ring_emit(ring, 0); - intel_ring_advance(ring); + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, offset + len - 8); + intel_ring_emit(ring, MI_NOOP); + intel_ring_advance(ring); + } else { + struct drm_i915_gem_object *obj = ring->private; + u32 cs_offset = obj->gtt_offset; + + if (len > I830_BATCH_LIMIT) + return -ENOSPC; + + ret = intel_ring_begin(ring, 9+3); + if (ret) + return ret; + /* Blit the batch (which has now all relocs applied) to the stable batch + * scratch bo area (so that the CS never stumbles over its tlb + * invalidation bug) ... */ + intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | + XY_SRC_COPY_BLT_WRITE_ALPHA | + XY_SRC_COPY_BLT_WRITE_RGB); + intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); + intel_ring_emit(ring, cs_offset); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 4096); + intel_ring_emit(ring, offset); + intel_ring_emit(ring, MI_FLUSH); + + /* ... and execute it. */ + intel_ring_emit(ring, MI_BATCH_BUFFER); + intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); + intel_ring_emit(ring, cs_offset + len - 8); + intel_ring_advance(ring); + } return 0; } @@ -1596,6 +1635,27 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->init = init_render_ring; ring->cleanup = render_ring_cleanup; + /* Workaround batchbuffer to combat CS tlb bug. */ + if (HAS_BROKEN_CS_TLB(dev)) { + struct drm_i915_gem_object *obj; + int ret; + + obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); + if (obj == NULL) { + DRM_ERROR("Failed to allocate batch bo\n"); + return -ENOMEM; + } + + ret = i915_gem_object_pin(obj, 0, true, false); + if (ret != 0) { + drm_gem_object_unreference(&obj->base); + DRM_ERROR("Failed to ping batch bo\n"); + return ret; + } + + ring->private = obj; + } + return intel_init_ring_buffer(dev, ring); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 526182e..6af87cd 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -94,6 +94,7 @@ struct intel_ring_buffer { u32 offset, u32 length, unsigned flags); #define I915_DISPATCH_SECURE 0x1 +#define I915_DISPATCH_PINNED 0x2 void (*cleanup)(struct intel_ring_buffer *ring); int (*sync_to)(struct intel_ring_buffer *ring, struct intel_ring_buffer *to, diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc index 7b715fd..62ab231 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc @@ -57,6 +57,11 @@ chipsets: .b16 #nve4_gpc_mmio_tail .b16 #nve4_tpc_mmio_head .b16 #nve4_tpc_mmio_tail +.b8 0xe6 0 0 0 +.b16 #nve4_gpc_mmio_head +.b16 #nve4_gpc_mmio_tail +.b16 #nve4_tpc_mmio_head +.b16 #nve4_tpc_mmio_tail .b8 0 0 0 0 // GPC mmio lists diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h index 26c2165..09ee470 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/gpcnve0.fuc.h @@ -34,13 +34,16 @@ uint32_t nve0_grgpc_data[] = { 0x00000000, /* 0x0064: chipsets */ 0x000000e4, - 0x01040080, - 0x014c0104, + 0x0110008c, + 0x01580110, 0x000000e7, - 0x01040080, - 0x014c0104, + 0x0110008c, + 0x01580110, + 0x000000e6, + 0x0110008c, + 0x01580110, 0x00000000, -/* 0x0080: nve4_gpc_mmio_head */ +/* 0x008c: nve4_gpc_mmio_head */ 0x00000380, 0x04000400, 0x0800040c, @@ -74,8 +77,8 @@ uint32_t nve0_grgpc_data[] = { 0x14003100, 0x000031d0, 0x040031e0, -/* 0x0104: nve4_gpc_mmio_tail */ -/* 0x0104: nve4_tpc_mmio_head */ +/* 0x0110: nve4_gpc_mmio_tail */ +/* 0x0110: nve4_tpc_mmio_head */ 0x00000048, 0x00000064, 0x00000088, diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc index acfc457..0bcfa4d 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc @@ -754,6 +754,16 @@ ctx_mmio_exec: // on load it means: "a save preceeded this load" // ctx_xfer: + // according to mwk, some kind of wait for idle + mov $r15 0xc00 + shl b32 $r15 6 + mov $r14 4 + iowr I[$r15 + 0x200] $r14 + ctx_xfer_idle: + iord $r14 I[$r15 + 0x000] + and $r14 0x2000 + bra ne #ctx_xfer_idle + bra not $p1 #ctx_xfer_pre bra $p2 #ctx_xfer_pre_load ctx_xfer_pre: diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h index 85a8d55..bb03d2a 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnvc0.fuc.h @@ -799,79 +799,80 @@ uint32_t nvc0_grhub_code[] = { 0x01fa0613, 0xf803f806, /* 0x0829: ctx_xfer */ - 0x0611f400, -/* 0x082f: ctx_xfer_pre */ - 0xf01102f4, - 0x21f510f7, - 0x21f50698, - 0x11f40631, -/* 0x083d: ctx_xfer_pre_load */ - 0x02f7f01c, - 0x065721f5, - 0x066621f5, - 0x067821f5, - 0x21f5f4bd, - 0x21f50657, -/* 0x0856: ctx_xfer_exec */ - 0x019806b8, - 0x1427f116, - 0x0624b604, - 0xf10020d0, - 0xf0a500e7, - 0x1fb941e3, - 0x8d21f402, - 0xf004e0b6, - 0x2cf001fc, - 0x0124b602, - 0xf405f2fd, - 0x17f18d21, - 0x13f04afc, - 0x0c27f002, - 0xf50012d0, - 0xf1020721, - 0xf047fc27, - 0x20d00223, - 0x012cf000, - 0xd00320b6, - 0xacf00012, - 0x06a5f001, - 0x9800b7f0, - 0x0d98140c, - 0x00e7f015, - 0x015c21f5, - 0xf508a7f0, - 0xf5010321, - 0xf4020721, - 0xa7f02201, - 0xc921f40c, - 0x0a1017f1, - 0xf00614b6, - 0x12d00527, -/* 0x08dd: ctx_xfer_post_save_wait */ - 0x0012cf00, - 0xf40522fd, - 0x02f4fa1b, -/* 0x08e9: ctx_xfer_post */ - 0x02f7f032, - 0x065721f5, - 0x21f5f4bd, - 0x21f50698, - 0x21f50226, - 0xf4bd0666, - 0x065721f5, - 0x981011f4, - 0x11fd8001, - 0x070bf405, - 0x07df21f5, -/* 0x0914: ctx_xfer_no_post_mmio */ - 0x064921f5, -/* 0x0918: ctx_xfer_done */ - 0x000000f8, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, + 0x00f7f100, + 0x06f4b60c, + 0xd004e7f0, +/* 0x0836: ctx_xfer_idle */ + 0xfecf80fe, + 0x00e4f100, + 0xf91bf420, + 0xf40611f4, +/* 0x0846: ctx_xfer_pre */ + 0xf7f01102, + 0x9821f510, + 0x3121f506, + 0x1c11f406, +/* 0x0854: ctx_xfer_pre_load */ + 0xf502f7f0, + 0xf5065721, + 0xf5066621, + 0xbd067821, + 0x5721f5f4, + 0xb821f506, +/* 0x086d: ctx_xfer_exec */ + 0x16019806, + 0x041427f1, + 0xd00624b6, + 0xe7f10020, + 0xe3f0a500, + 0x021fb941, + 0xb68d21f4, + 0xfcf004e0, + 0x022cf001, + 0xfd0124b6, + 0x21f405f2, + 0xfc17f18d, + 0x0213f04a, + 0xd00c27f0, + 0x21f50012, + 0x27f10207, + 0x23f047fc, + 0x0020d002, + 0xb6012cf0, + 0x12d00320, + 0x01acf000, + 0xf006a5f0, + 0x0c9800b7, + 0x150d9814, + 0xf500e7f0, + 0xf0015c21, + 0x21f508a7, + 0x21f50103, + 0x01f40207, + 0x0ca7f022, + 0xf1c921f4, + 0xb60a1017, + 0x27f00614, + 0x0012d005, +/* 0x08f4: ctx_xfer_post_save_wait */ + 0xfd0012cf, + 0x1bf40522, + 0x3202f4fa, +/* 0x0900: ctx_xfer_post */ + 0xf502f7f0, + 0xbd065721, + 0x9821f5f4, + 0x2621f506, + 0x6621f502, + 0xf5f4bd06, + 0xf4065721, + 0x01981011, + 0x0511fd80, + 0xf5070bf4, +/* 0x092b: ctx_xfer_no_post_mmio */ + 0xf507df21, +/* 0x092f: ctx_xfer_done */ + 0xf8064921, 0x00000000, 0x00000000, 0x00000000, diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc index 138eeaa..7fe9d7c 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc @@ -44,6 +44,9 @@ chipsets: .b8 0xe7 0 0 0 .b16 #nve4_hub_mmio_head .b16 #nve4_hub_mmio_tail +.b8 0xe6 0 0 0 +.b16 #nve4_hub_mmio_head +.b16 #nve4_hub_mmio_tail .b8 0 0 0 0 nve4_hub_mmio_head: @@ -680,6 +683,16 @@ ctx_mmio_exec: // on load it means: "a save preceeded this load" // ctx_xfer: + // according to mwk, some kind of wait for idle + mov $r15 0xc00 + shl b32 $r15 6 + mov $r14 4 + iowr I[$r15 + 0x200] $r14 + ctx_xfer_idle: + iord $r14 I[$r15 + 0x000] + and $r14 0x2000 + bra ne #ctx_xfer_idle + bra not $p1 #ctx_xfer_pre bra $p2 #ctx_xfer_pre_load ctx_xfer_pre: diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h index decf0c6..e3421af 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/fuc/hubnve0.fuc.h @@ -30,11 +30,13 @@ uint32_t nve0_grhub_data[] = { 0x00000000, /* 0x005c: chipsets */ 0x000000e4, - 0x013c0070, + 0x01440078, 0x000000e7, - 0x013c0070, + 0x01440078, + 0x000000e6, + 0x01440078, 0x00000000, -/* 0x0070: nve4_hub_mmio_head */ +/* 0x0078: nve4_hub_mmio_head */ 0x0417e91c, 0x04400204, 0x18404010, @@ -86,9 +88,7 @@ uint32_t nve0_grhub_data[] = { 0x00408840, 0x08408900, 0x00408980, -/* 0x013c: nve4_hub_mmio_tail */ - 0x00000000, - 0x00000000, +/* 0x0144: nve4_hub_mmio_tail */ 0x00000000, 0x00000000, 0x00000000, @@ -781,77 +781,78 @@ uint32_t nve0_grhub_code[] = { 0x0613f002, 0xf80601fa, /* 0x07fb: ctx_xfer */ - 0xf400f803, - 0x02f40611, -/* 0x0801: ctx_xfer_pre */ - 0x10f7f00d, - 0x067221f5, -/* 0x080b: ctx_xfer_pre_load */ - 0xf01c11f4, - 0x21f502f7, - 0x21f50631, - 0x21f50640, - 0xf4bd0652, - 0x063121f5, - 0x069221f5, -/* 0x0824: ctx_xfer_exec */ - 0xf1160198, - 0xb6041427, - 0x20d00624, - 0x00e7f100, - 0x41e3f0a5, - 0xf4021fb9, - 0xe0b68d21, - 0x01fcf004, - 0xb6022cf0, - 0xf2fd0124, - 0x8d21f405, - 0x4afc17f1, - 0xf00213f0, - 0x12d00c27, - 0x0721f500, - 0xfc27f102, - 0x0223f047, - 0xf00020d0, - 0x20b6012c, - 0x0012d003, - 0xf001acf0, - 0xb7f006a5, - 0x140c9800, - 0xf0150d98, - 0x21f500e7, - 0xa7f0015c, - 0x0321f508, - 0x0721f501, - 0x2201f402, - 0xf40ca7f0, - 0x17f1c921, - 0x14b60a10, - 0x0527f006, -/* 0x08ab: ctx_xfer_post_save_wait */ - 0xcf0012d0, - 0x22fd0012, - 0xfa1bf405, -/* 0x08b7: ctx_xfer_post */ - 0xf02e02f4, - 0x21f502f7, - 0xf4bd0631, - 0x067221f5, - 0x022621f5, - 0x064021f5, - 0x21f5f4bd, - 0x11f40631, - 0x80019810, - 0xf40511fd, - 0x21f5070b, -/* 0x08e2: ctx_xfer_no_post_mmio */ -/* 0x08e2: ctx_xfer_done */ - 0x00f807b1, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, - 0x00000000, + 0xf100f803, + 0xb60c00f7, + 0xe7f006f4, + 0x80fed004, +/* 0x0808: ctx_xfer_idle */ + 0xf100fecf, + 0xf42000e4, + 0x11f4f91b, + 0x0d02f406, +/* 0x0818: ctx_xfer_pre */ + 0xf510f7f0, + 0xf4067221, +/* 0x0822: ctx_xfer_pre_load */ + 0xf7f01c11, + 0x3121f502, + 0x4021f506, + 0x5221f506, + 0xf5f4bd06, + 0xf5063121, +/* 0x083b: ctx_xfer_exec */ + 0x98069221, + 0x27f11601, + 0x24b60414, + 0x0020d006, + 0xa500e7f1, + 0xb941e3f0, + 0x21f4021f, + 0x04e0b68d, + 0xf001fcf0, + 0x24b6022c, + 0x05f2fd01, + 0xf18d21f4, + 0xf04afc17, + 0x27f00213, + 0x0012d00c, + 0x020721f5, + 0x47fc27f1, + 0xd00223f0, + 0x2cf00020, + 0x0320b601, + 0xf00012d0, + 0xa5f001ac, + 0x00b7f006, + 0x98140c98, + 0xe7f0150d, + 0x5c21f500, + 0x08a7f001, + 0x010321f5, + 0x020721f5, + 0xf02201f4, + 0x21f40ca7, + 0x1017f1c9, + 0x0614b60a, + 0xd00527f0, +/* 0x08c2: ctx_xfer_post_save_wait */ + 0x12cf0012, + 0x0522fd00, + 0xf4fa1bf4, +/* 0x08ce: ctx_xfer_post */ + 0xf7f02e02, + 0x3121f502, + 0xf5f4bd06, + 0xf5067221, + 0xf5022621, + 0xbd064021, + 0x3121f5f4, + 0x1011f406, + 0xfd800198, + 0x0bf40511, + 0xb121f507, +/* 0x08f9: ctx_xfer_no_post_mmio */ +/* 0x08f9: ctx_xfer_done */ + 0x0000f807, 0x00000000, }; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c index 47a0208..45aff5f 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.c @@ -516,18 +516,9 @@ nvc0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, { struct nouveau_device *device = nv_device(parent); struct nvc0_graph_priv *priv; - bool enable = true; int ret, i; - switch (device->chipset) { - case 0xd9: /* known broken without binary driver firmware */ - enable = false; - break; - default: - break; - } - - ret = nouveau_graph_create(parent, engine, oclass, enable, &priv); + ret = nouveau_graph_create(parent, engine, oclass, true, &priv); *pobject = nv_object(priv); if (ret) return ret; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h index 18d2210..a1e78de 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nvc0.h @@ -121,6 +121,7 @@ nvc0_graph_class(void *obj) return 0x9297; case 0xe4: case 0xe7: + case 0xe6: return 0xa097; default: return 0; diff --git a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c index 539d4c7..9f82e97 100644 --- a/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c +++ b/drivers/gpu/drm/nouveau/core/engine/graph/nve0.c @@ -203,7 +203,7 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, struct nvc0_graph_priv *priv; int ret, i; - ret = nouveau_graph_create(parent, engine, oclass, false, &priv); + ret = nouveau_graph_create(parent, engine, oclass, true, &priv); *pobject = nv_object(priv); if (ret) return ret; @@ -252,6 +252,7 @@ nve0_graph_ctor(struct nouveau_object *parent, struct nouveau_object *engine, priv->magic_not_rop_nr = 1; break; case 0xe7: + case 0xe6: priv->magic_not_rop_nr = 1; break; default: diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios.h index d145b25..5bd1ca8 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/bios.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios.h @@ -17,6 +17,7 @@ struct nouveau_bios { u8 chip; u8 minor; u8 micro; + u8 patch; } version; }; diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h index 2bf1780..e6563b5 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/gpio.h @@ -25,9 +25,11 @@ struct dcb_gpio_func { u8 param; }; -u16 dcb_gpio_table(struct nouveau_bios *); -u16 dcb_gpio_entry(struct nouveau_bios *, int idx, int ent, u8 *ver); -int dcb_gpio_parse(struct nouveau_bios *, int idx, u8 func, u8 line, +u16 dcb_gpio_table(struct nouveau_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len); +u16 dcb_gpio_entry(struct nouveau_bios *, int idx, int ent, u8 *ver, u8 *len); +u16 dcb_gpio_parse(struct nouveau_bios *, int idx, int ent, u8 *ver, u8 *len, struct dcb_gpio_func *); +u16 dcb_gpio_match(struct nouveau_bios *, int idx, u8 func, u8 line, + u8 *ver, u8 *len, struct dcb_gpio_func *); #endif diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h b/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h index e69a8bd..ca2f6bf 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/bios/init.h @@ -13,6 +13,7 @@ struct nvbios_init { u32 nested; u16 repeat; u16 repend; + u32 ramcfg; }; int nvbios_exec(struct nvbios_init *); diff --git a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h index 9ea2b12..b75e8f1 100644 --- a/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h +++ b/drivers/gpu/drm/nouveau/core/include/subdev/gpio.h @@ -11,7 +11,7 @@ struct nouveau_gpio { struct nouveau_subdev base; /* hardware interfaces */ - void (*reset)(struct nouveau_gpio *); + void (*reset)(struct nouveau_gpio *, u8 func); int (*drive)(struct nouveau_gpio *, int line, int dir, int out); int (*sense)(struct nouveau_gpio *, int line); void (*irq_enable)(struct nouveau_gpio *, int line, bool); diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c index dd11194..f621f69 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/base.c @@ -447,6 +447,7 @@ nouveau_bios_ctor(struct nouveau_object *parent, bios->version.chip = nv_ro08(bios, bit_i.offset + 2); bios->version.minor = nv_ro08(bios, bit_i.offset + 1); bios->version.micro = nv_ro08(bios, bit_i.offset + 0); + bios->version.patch = nv_ro08(bios, bit_i.offset + 4); } else if (bmp_version(bios)) { bios->version.major = nv_ro08(bios, bios->bmp_offset + 13); @@ -455,9 +456,9 @@ nouveau_bios_ctor(struct nouveau_object *parent, bios->version.micro = nv_ro08(bios, bios->bmp_offset + 10); } - nv_info(bios, "version %02x.%02x.%02x.%02x\n", + nv_info(bios, "version %02x.%02x.%02x.%02x.%02x\n", bios->version.major, bios->version.chip, - bios->version.minor, bios->version.micro); + bios->version.minor, bios->version.micro, bios->version.patch); return 0; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c index c90d4aa..c84e93f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/gpio.c @@ -27,84 +27,105 @@ #include <subdev/bios/gpio.h> u16 -dcb_gpio_table(struct nouveau_bios *bios) +dcb_gpio_table(struct nouveau_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len) { - u8 ver, hdr, cnt, len; - u16 dcb = dcb_table(bios, &ver, &hdr, &cnt, &len); + u16 data = 0x0000; + u16 dcb = dcb_table(bios, ver, hdr, cnt, len); if (dcb) { - if (ver >= 0x30 && hdr >= 0x0c) - return nv_ro16(bios, dcb + 0x0a); - if (ver >= 0x22 && nv_ro08(bios, dcb - 1) >= 0x13) - return nv_ro16(bios, dcb - 0x0f); + if (*ver >= 0x30 && *hdr >= 0x0c) + data = nv_ro16(bios, dcb + 0x0a); + else + if (*ver >= 0x22 && nv_ro08(bios, dcb - 1) >= 0x13) + data = nv_ro16(bios, dcb - 0x0f); + + if (data) { + *ver = nv_ro08(bios, data + 0x00); + if (*ver < 0x30) { + *hdr = 3; + *cnt = nv_ro08(bios, data + 0x02); + *len = nv_ro08(bios, data + 0x01); + } else + if (*ver <= 0x41) { + *hdr = nv_ro08(bios, data + 0x01); + *cnt = nv_ro08(bios, data + 0x02); + *len = nv_ro08(bios, data + 0x03); + } else { + data = 0x0000; + } + } } - return 0x0000; + return data; } u16 -dcb_gpio_entry(struct nouveau_bios *bios, int idx, int ent, u8 *ver) +dcb_gpio_entry(struct nouveau_bios *bios, int idx, int ent, u8 *ver, u8 *len) { - u16 gpio = dcb_gpio_table(bios); - if (gpio) { - *ver = nv_ro08(bios, gpio); - if (*ver < 0x30 && ent < nv_ro08(bios, gpio + 2)) - return gpio + 3 + (ent * nv_ro08(bios, gpio + 1)); - else if (ent < nv_ro08(bios, gpio + 2)) - return gpio + nv_ro08(bios, gpio + 1) + - (ent * nv_ro08(bios, gpio + 3)); - } + u8 hdr, cnt; + u16 gpio = !idx ? dcb_gpio_table(bios, ver, &hdr, &cnt, len) : 0x0000; + if (gpio && ent < cnt) + return gpio + hdr + (ent * *len); return 0x0000; } -int -dcb_gpio_parse(struct nouveau_bios *bios, int idx, u8 func, u8 line, +u16 +dcb_gpio_parse(struct nouveau_bios *bios, int idx, int ent, u8 *ver, u8 *len, struct dcb_gpio_func *gpio) { - u8 ver, hdr, cnt, len; - u16 entry; - int i = -1; - - while ((entry = dcb_gpio_entry(bios, idx, ++i, &ver))) { - if (ver < 0x40) { - u16 data = nv_ro16(bios, entry); + u16 data = dcb_gpio_entry(bios, idx, ent, ver, len); + if (data) { + if (*ver < 0x40) { + u16 info = nv_ro16(bios, data); *gpio = (struct dcb_gpio_func) { - .line = (data & 0x001f) >> 0, - .func = (data & 0x07e0) >> 5, - .log[0] = (data & 0x1800) >> 11, - .log[1] = (data & 0x6000) >> 13, - .param = !!(data & 0x8000), + .line = (info & 0x001f) >> 0, + .func = (info & 0x07e0) >> 5, + .log[0] = (info & 0x1800) >> 11, + .log[1] = (info & 0x6000) >> 13, + .param = !!(info & 0x8000), }; } else - if (ver < 0x41) { - u32 data = nv_ro32(bios, entry); + if (*ver < 0x41) { + u32 info = nv_ro32(bios, data); *gpio = (struct dcb_gpio_func) { - .line = (data & 0x0000001f) >> 0, - .func = (data & 0x0000ff00) >> 8, - .log[0] = (data & 0x18000000) >> 27, - .log[1] = (data & 0x60000000) >> 29, - .param = !!(data & 0x80000000), + .line = (info & 0x0000001f) >> 0, + .func = (info & 0x0000ff00) >> 8, + .log[0] = (info & 0x18000000) >> 27, + .log[1] = (info & 0x60000000) >> 29, + .param = !!(info & 0x80000000), }; } else { - u32 data = nv_ro32(bios, entry + 0); - u8 data1 = nv_ro32(bios, entry + 4); + u32 info = nv_ro32(bios, data + 0); + u8 info1 = nv_ro32(bios, data + 4); *gpio = (struct dcb_gpio_func) { - .line = (data & 0x0000003f) >> 0, - .func = (data & 0x0000ff00) >> 8, - .log[0] = (data1 & 0x30) >> 4, - .log[1] = (data1 & 0xc0) >> 6, - .param = !!(data & 0x80000000), + .line = (info & 0x0000003f) >> 0, + .func = (info & 0x0000ff00) >> 8, + .log[0] = (info1 & 0x30) >> 4, + .log[1] = (info1 & 0xc0) >> 6, + .param = !!(info & 0x80000000), }; } + } + + return data; +} +u16 +dcb_gpio_match(struct nouveau_bios *bios, int idx, u8 func, u8 line, + u8 *ver, u8 *len, struct dcb_gpio_func *gpio) +{ + u8 hdr, cnt, i = 0; + u16 data; + + while ((data = dcb_gpio_parse(bios, idx, i++, ver, len, gpio))) { if ((line == 0xff || line == gpio->line) && (func == 0xff || func == gpio->func)) - return 0; + return data; } /* DCB 2.2, fixed TVDAC GPIO data */ - if ((entry = dcb_table(bios, &ver, &hdr, &cnt, &len))) { - if (ver >= 0x22 && ver < 0x30 && func == DCB_GPIO_TVDAC0) { - u8 conf = nv_ro08(bios, entry - 5); - u8 addr = nv_ro08(bios, entry - 4); + if ((data = dcb_table(bios, ver, &hdr, &cnt, len))) { + if (*ver >= 0x22 && *ver < 0x30 && func == DCB_GPIO_TVDAC0) { + u8 conf = nv_ro08(bios, data - 5); + u8 addr = nv_ro08(bios, data - 4); if (conf & 0x01) { *gpio = (struct dcb_gpio_func) { .func = DCB_GPIO_TVDAC0, @@ -112,10 +133,11 @@ dcb_gpio_parse(struct nouveau_bios *bios, int idx, u8 func, u8 line, .log[0] = !!(conf & 0x02), .log[1] = !(conf & 0x02), }; - return 0; + *ver = 0x00; + return data; } } } - return -EINVAL; + return 0x0000; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c index ae168bb..2917d55 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/bios/init.c +++ b/drivers/gpu/drm/nouveau/core/subdev/bios/init.c @@ -2,11 +2,12 @@ #include <core/device.h> #include <subdev/bios.h> -#include <subdev/bios/conn.h> #include <subdev/bios/bmp.h> #include <subdev/bios/bit.h> +#include <subdev/bios/conn.h> #include <subdev/bios/dcb.h> #include <subdev/bios/dp.h> +#include <subdev/bios/gpio.h> #include <subdev/bios/init.h> #include <subdev/devinit.h> #include <subdev/clock.h> @@ -410,9 +411,25 @@ init_ram_restrict_group_count(struct nvbios_init *init) } static u8 +init_ram_restrict_strap(struct nvbios_init *init) +{ + /* This appears to be the behaviour of the VBIOS parser, and *is* + * important to cache the NV_PEXTDEV_BOOT0 on later chipsets to + * avoid fucking up the memory controller (somehow) by reading it + * on every INIT_RAM_RESTRICT_ZM_GROUP opcode. + * + * Preserving the non-caching behaviour on earlier chipsets just + * in case *not* re-reading the strap causes similar breakage. + */ + if (!init->ramcfg || init->bios->version.major < 0x70) + init->ramcfg = init_rd32(init, 0x101000); + return (init->ramcfg & 0x00000003c) >> 2; +} + +static u8 init_ram_restrict(struct nvbios_init *init) { - u32 strap = (init_rd32(init, 0x101000) & 0x0000003c) >> 2; + u8 strap = init_ram_restrict_strap(init); u16 table = init_ram_restrict_table(init); if (table) return nv_ro08(init->bios, table + strap); @@ -1781,7 +1798,7 @@ init_gpio(struct nvbios_init *init) init->offset += 1; if (init_exec(init) && gpio && gpio->reset) - gpio->reset(gpio); + gpio->reset(gpio, DCB_GPIO_UNUSED); } /** @@ -1995,6 +2012,47 @@ init_i2c_long_if(struct nvbios_init *init) init_exec_set(init, false); } +/** + * INIT_GPIO_NE - opcode 0xa9 + * + */ +static void +init_gpio_ne(struct nvbios_init *init) +{ + struct nouveau_bios *bios = init->bios; + struct nouveau_gpio *gpio = nouveau_gpio(bios); + struct dcb_gpio_func func; + u8 count = nv_ro08(bios, init->offset + 1); + u8 idx = 0, ver, len; + u16 data, i; + + trace("GPIO_NE\t"); + init->offset += 2; + + for (i = init->offset; i < init->offset + count; i++) + cont("0x%02x ", nv_ro08(bios, i)); + cont("\n"); + + while ((data = dcb_gpio_parse(bios, 0, idx++, &ver, &len, &func))) { + if (func.func != DCB_GPIO_UNUSED) { + for (i = init->offset; i < init->offset + count; i++) { + if (func.func == nv_ro08(bios, i)) + break; + } + + trace("\tFUNC[0x%02x]", func.func); + if (i == (init->offset + count)) { + cont(" *"); + if (init_exec(init) && gpio && gpio->reset) + gpio->reset(gpio, func.func); + } + cont("\n"); + } + } + + init->offset += count; +} + static struct nvbios_init_opcode { void (*exec)(struct nvbios_init *); } init_opcode[] = { @@ -2059,6 +2117,7 @@ static struct nvbios_init_opcode { [0x98] = { init_auxch }, [0x99] = { init_zm_auxch }, [0x9a] = { init_i2c_long_if }, + [0xa9] = { init_gpio_ne }, }; #define init_opcode_nr (sizeof(init_opcode) / sizeof(init_opcode[0])) diff --git a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c index 9b7881e..03a6528 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/device/nve0.c @@ -109,6 +109,34 @@ nve0_identify(struct nouveau_device *device) device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; break; + case 0xe6: + device->cname = "GK106"; + device->oclass[NVDEV_SUBDEV_VBIOS ] = &nouveau_bios_oclass; + device->oclass[NVDEV_SUBDEV_GPIO ] = &nvd0_gpio_oclass; + device->oclass[NVDEV_SUBDEV_I2C ] = &nouveau_i2c_oclass; + device->oclass[NVDEV_SUBDEV_CLOCK ] = &nvc0_clock_oclass; + device->oclass[NVDEV_SUBDEV_THERM ] = &nv50_therm_oclass; + device->oclass[NVDEV_SUBDEV_MXM ] = &nv50_mxm_oclass; + device->oclass[NVDEV_SUBDEV_DEVINIT] = &nv50_devinit_oclass; + device->oclass[NVDEV_SUBDEV_MC ] = &nvc0_mc_oclass; + device->oclass[NVDEV_SUBDEV_TIMER ] = &nv04_timer_oclass; + device->oclass[NVDEV_SUBDEV_FB ] = &nvc0_fb_oclass; + device->oclass[NVDEV_SUBDEV_LTCG ] = &nvc0_ltcg_oclass; + device->oclass[NVDEV_SUBDEV_IBUS ] = &nve0_ibus_oclass; + device->oclass[NVDEV_SUBDEV_INSTMEM] = &nv50_instmem_oclass; + device->oclass[NVDEV_SUBDEV_VM ] = &nvc0_vmmgr_oclass; + device->oclass[NVDEV_SUBDEV_BAR ] = &nvc0_bar_oclass; + device->oclass[NVDEV_ENGINE_DMAOBJ ] = &nvd0_dmaeng_oclass; + device->oclass[NVDEV_ENGINE_FIFO ] = &nve0_fifo_oclass; + device->oclass[NVDEV_ENGINE_SW ] = &nvc0_software_oclass; + device->oclass[NVDEV_ENGINE_GR ] = &nve0_graph_oclass; + device->oclass[NVDEV_ENGINE_DISP ] = &nve0_disp_oclass; + device->oclass[NVDEV_ENGINE_COPY0 ] = &nve0_copy0_oclass; + device->oclass[NVDEV_ENGINE_COPY1 ] = &nve0_copy1_oclass; + device->oclass[NVDEV_ENGINE_BSP ] = &nve0_bsp_oclass; + device->oclass[NVDEV_ENGINE_VP ] = &nve0_vp_oclass; + device->oclass[NVDEV_ENGINE_PPP ] = &nvc0_ppp_oclass; + break; default: nv_fatal(device, "unknown Kepler chipset\n"); return -EINVAL; diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c index acf818c..9fb0f9b 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/base.c @@ -43,10 +43,15 @@ static int nouveau_gpio_find(struct nouveau_gpio *gpio, int idx, u8 tag, u8 line, struct dcb_gpio_func *func) { + struct nouveau_bios *bios = nouveau_bios(gpio); + u8 ver, len; + u16 data; + if (line == 0xff && tag == 0xff) return -EINVAL; - if (!dcb_gpio_parse(nouveau_bios(gpio), idx, tag, line, func)) + data = dcb_gpio_match(bios, idx, tag, line, &ver, &len, func); + if (data) return 0; /* Apple iMac G4 NV18 */ @@ -265,7 +270,7 @@ nouveau_gpio_init(struct nouveau_gpio *gpio) int ret = nouveau_subdev_init(&gpio->base); if (ret == 0 && gpio->reset) { if (dmi_check_system(gpio_reset_ids)) - gpio->reset(gpio); + gpio->reset(gpio, DCB_GPIO_UNUSED); } return ret; } diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c index f3502c9..bf13a12 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c +++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nv50.c @@ -29,15 +29,15 @@ struct nv50_gpio_priv { }; static void -nv50_gpio_reset(struct nouveau_gpio *gpio) +nv50_gpio_reset(struct nouveau_gpio *gpio, u8 match) { struct nouveau_bios *bios = nouveau_bios(gpio); struct nv50_gpio_priv *priv = (void *)gpio; + u8 ver, len; u16 entry; - u8 ver; int ent = -1; - while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver))) { + while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) { static const u32 regs[] = { 0xe100, 0xe28c }; u32 data = nv_ro32(bios, entry); u8 line = (data & 0x0000001f); @@ -48,7 +48,8 @@ nv50_gpio_reset(struct nouveau_gpio *gpio) u32 val = (unk1 << 16) | unk0; u32 reg = regs[line >> 4]; line &= 0x0f; - if (func == 0xff) + if ( func == DCB_GPIO_UNUSED || + (match != DCB_GPIO_UNUSED && match != func)) continue; gpio->set(gpio, 0, func, line, defs); diff --git a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c index 8d18fca..83e8b8f 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c +++ b/drivers/gpu/drm/nouveau/core/subdev/gpio/nvd0.c @@ -29,15 +29,15 @@ struct nvd0_gpio_priv { }; static void -nvd0_gpio_reset(struct nouveau_gpio *gpio) +nvd0_gpio_reset(struct nouveau_gpio *gpio, u8 match) { struct nouveau_bios *bios = nouveau_bios(gpio); struct nvd0_gpio_priv *priv = (void *)gpio; + u8 ver, len; u16 entry; - u8 ver; int ent = -1; - while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver))) { + while ((entry = dcb_gpio_entry(bios, 0, ++ent, &ver, &len))) { u32 data = nv_ro32(bios, entry); u8 line = (data & 0x0000003f); u8 defs = !!(data & 0x00000080); @@ -45,7 +45,8 @@ nvd0_gpio_reset(struct nouveau_gpio *gpio) u8 unk0 = (data & 0x00ff0000) >> 16; u8 unk1 = (data & 0x1f000000) >> 24; - if (func == 0xff) + if ( func == DCB_GPIO_UNUSED || + (match != DCB_GPIO_UNUSED && match != func)) continue; gpio->set(gpio, 0, func, line, defs); diff --git a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c index 93e3ddf..e286e13 100644 --- a/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c +++ b/drivers/gpu/drm/nouveau/core/subdev/mxm/base.c @@ -260,7 +260,7 @@ nouveau_mxm_create_(struct nouveau_object *parent, data = mxm_table(bios, &ver, &len); if (!data || !(ver = nv_ro08(bios, data))) { - nv_info(mxm, "no VBIOS data, nothing to do\n"); + nv_debug(mxm, "no VBIOS data, nothing to do\n"); return 0; } diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 74c6b42..7a44566 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2654,6 +2654,35 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+4] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_MEM_WRITE: + { + u64 offset; + + if (pkt->count != 3) { + DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + return -EINVAL; + } + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+0); + offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; + if (offset & 0x7) { + DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + break; + } case PACKET3_COPY_DW: if (pkt->count != 4) { DRM_ERROR("bad COPY_DW (invalid count)\n"); @@ -3287,6 +3316,7 @@ static bool evergreen_vm_reg_valid(u32 reg) /* check config regs */ switch (reg) { + case WAIT_UNTIL: case GRBM_GFX_INDEX: case CP_STRMOUT_CNTL: case CP_COHER_CNTL: diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 0be768b..9ea13d0 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2294,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+4] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_MEM_WRITE: + { + u64 offset; + + if (pkt->count != 3) { + DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+0); + offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; + if (offset & 0x7) { + DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + break; + } case PACKET3_COPY_DW: if (pkt->count != 4) { DRM_ERROR("bad COPY_DW (invalid count)\n"); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5dc744d..9b9422c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -225,12 +225,13 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); +void radeon_fence_driver_force_completion(struct radeon_device *rdev); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 49b0659..cd75626 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1164,6 +1164,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) struct drm_crtc *crtc; struct drm_connector *connector; int i, r; + bool force_completion = false; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -1206,8 +1207,16 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) mutex_lock(&rdev->ring_lock); /* wait for gpu to finish processing current batch */ - for (i = 0; i < RADEON_NUM_RINGS; i++) - radeon_fence_wait_empty_locked(rdev, i); + for (i = 0; i < RADEON_NUM_RINGS; i++) { + r = radeon_fence_wait_empty_locked(rdev, i); + if (r) { + /* delay GPU reset to resume */ + force_completion = true; + } + } + if (force_completion) { + radeon_fence_driver_force_completion(rdev); + } mutex_unlock(&rdev->ring_lock); radeon_save_bios_scratch_regs(rdev); @@ -1338,7 +1347,6 @@ retry: } radeon_restore_bios_scratch_regs(rdev); - drm_helper_resume_force_mode(rdev->ddev); if (!r) { for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -1358,11 +1366,14 @@ retry: } } } else { + radeon_fence_driver_force_completion(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { kfree(ring_data[i]); } } + drm_helper_resume_force_mode(rdev->ddev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 9b1a727..ff75934 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -68,9 +68,10 @@ * 2.25.0 - eg+: new info request for num SE and num SH * 2.26.0 - r600-eg: fix htile size computation * 2.27.0 - r600-SI: Add CS ioctl support for async DMA + * 2.28.0 - r600-eg: Add MEM_WRITE packet support */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 27 +#define KMS_DRIVER_MINOR 28 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 410a975..3435625 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * Returns 0 if the fences have passed, error for all other cases. * Caller must hold ring lock. */ -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) { uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; + int r; - while(1) { - int r; - r = radeon_fence_wait_seq(rdev, seq, ring, false, false); + r = radeon_fence_wait_seq(rdev, seq, ring, false, false); + if (r) { if (r == -EDEADLK) { - mutex_unlock(&rdev->ring_lock); - r = radeon_gpu_reset(rdev); - mutex_lock(&rdev->ring_lock); - if (!r) - continue; - } - if (r) { - dev_err(rdev->dev, "error waiting for ring to become" - " idle (%d)\n", r); + return -EDEADLK; } - return; + dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", + ring, r); } + return 0; } /** @@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev) */ void radeon_fence_driver_fini(struct radeon_device *rdev) { - int ring; + int ring, r; mutex_lock(&rdev->ring_lock); for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { if (!rdev->fence_drv[ring].initialized) continue; - radeon_fence_wait_empty_locked(rdev, ring); + r = radeon_fence_wait_empty_locked(rdev, ring); + if (r) { + /* no need to trigger GPU reset as we are unloading */ + radeon_fence_driver_force_completion(rdev); + } wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; @@ -868,6 +866,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) mutex_unlock(&rdev->ring_lock); } +/** + * radeon_fence_driver_force_completion - force all fence waiter to complete + * + * @rdev: radeon device pointer + * + * In case of GPU reset failure make sure no process keep waiting on fence + * that will never complete. + */ +void radeon_fence_driver_force_completion(struct radeon_device *rdev) +{ + int ring; + + for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { + if (!rdev->fence_drv[ring].initialized) + continue; + radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + } +} + /* * Fence debugfs diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index aa14dbb..0bfa656 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -234,7 +234,7 @@ static void radeon_set_power_state(struct radeon_device *rdev) static void radeon_pm_set_clocks(struct radeon_device *rdev) { - int i; + int i, r; /* no need to take locks, etc. if nothing's going to change */ if ((rdev->pm.requested_clock_mode_index == rdev->pm.current_clock_mode_index) && @@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; - if (ring->ready) - radeon_fence_wait_empty_locked(rdev, i); + if (!ring->ready) { + continue; + } + r = radeon_fence_wait_empty_locked(rdev, i); + if (r) { + /* needs a GPU reset dont reset here */ + mutex_unlock(&rdev->ring_lock); + up_write(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->ddev->struct_mutex); + return; + } } radeon_unmap_vram_bos(rdev); diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 0744103..656b2e3 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -102,12 +102,12 @@ static int tegra_dc_set_timings(struct tegra_dc *dc, ((mode->hsync_end - mode->hsync_start) << 0); tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH); - value = ((mode->vsync_start - mode->vdisplay) << 16) | - ((mode->hsync_start - mode->hdisplay) << 0); - tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH); - value = ((mode->vtotal - mode->vsync_end) << 16) | ((mode->htotal - mode->hsync_end) << 0); + tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH); + + value = ((mode->vsync_start - mode->vdisplay) << 16) | + ((mode->hsync_start - mode->hdisplay) << 0); tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH); value = (mode->vdisplay << 16) | mode->hdisplay; @@ -221,8 +221,7 @@ static int tegra_crtc_mode_set(struct drm_crtc *crtc, win.stride = crtc->fb->pitches[0]; /* program window registers */ - value = tegra_dc_readl(dc, DC_CMD_DISPLAY_WINDOW_HEADER); - value |= WINDOW_A_SELECT; + value = WINDOW_A_SELECT; tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER); tegra_dc_writel(dc, win.fmt, DC_WIN_COLOR_DEPTH); diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h index 3a843a7..741b5dc 100644 --- a/drivers/gpu/drm/tegra/drm.h +++ b/drivers/gpu/drm/tegra/drm.h @@ -204,24 +204,6 @@ extern int tegra_output_parse_dt(struct tegra_output *output); extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output); extern int tegra_output_exit(struct tegra_output *output); -/* from gem.c */ -extern struct tegra_gem_object *tegra_gem_alloc(struct drm_device *drm, - size_t size); -extern int tegra_gem_handle_create(struct drm_device *drm, - struct drm_file *file, size_t size, - unsigned long flags, uint32_t *handle); -extern int tegra_gem_dumb_create(struct drm_file *file, struct drm_device *drm, - struct drm_mode_create_dumb *args); -extern int tegra_gem_dumb_map_offset(struct drm_file *file, - struct drm_device *drm, uint32_t handle, - uint64_t *offset); -extern int tegra_gem_dumb_destroy(struct drm_file *file, - struct drm_device *drm, uint32_t handle); -extern int tegra_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -extern int tegra_gem_init_object(struct drm_gem_object *obj); -extern void tegra_gem_free_object(struct drm_gem_object *obj); -extern struct vm_operations_struct tegra_gem_vm_ops; - /* from fb.c */ extern int tegra_drm_fb_init(struct drm_device *drm); extern void tegra_drm_fb_exit(struct drm_device *drm); diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index ab40164..e060c7e 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -149,7 +149,7 @@ struct tmds_config { }; static const struct tmds_config tegra2_tmds_config[] = { - { /* 480p modes */ + { /* slow pixel clock modes */ .pclk = 27000000, .pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) | SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) | @@ -163,21 +163,8 @@ static const struct tmds_config tegra2_tmds_config[] = { DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) | DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) | DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA), - }, { /* 720p modes */ - .pclk = 74250000, - .pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) | - SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) | - SOR_PLL_TX_REG_LOAD(3), - .pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN, - .pe_current = PE_CURRENT0(PE_CURRENT_6_0_mA) | - PE_CURRENT1(PE_CURRENT_6_0_mA) | - PE_CURRENT2(PE_CURRENT_6_0_mA) | - PE_CURRENT3(PE_CURRENT_6_0_mA), - .drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) | - DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) | - DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) | - DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA), - }, { /* 1080p modes */ + }, + { /* high pixel clock modes */ .pclk = UINT_MAX, .pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) | SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) | @@ -479,7 +466,7 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi, return; } - h_front_porch = mode->htotal - mode->hsync_end; + h_front_porch = mode->hsync_start - mode->hdisplay; memset(&frame, 0, sizeof(frame)); frame.r = HDMI_AVI_R_SAME; @@ -634,8 +621,8 @@ static int tegra_output_hdmi_enable(struct tegra_output *output) pclk = mode->clock * 1000; h_sync_width = mode->hsync_end - mode->hsync_start; - h_front_porch = mode->htotal - mode->hsync_end; - h_back_porch = mode->hsync_start - mode->hdisplay; + h_back_porch = mode->htotal - mode->hsync_end; + h_front_porch = mode->hsync_start - mode->hdisplay; err = regulator_enable(hdmi->vdd); if (err < 0) { diff --git a/drivers/gpu/drm/tegra/host1x.c b/drivers/gpu/drm/tegra/host1x.c index bdb97a5..5d17b11 100644 --- a/drivers/gpu/drm/tegra/host1x.c +++ b/drivers/gpu/drm/tegra/host1x.c @@ -239,6 +239,8 @@ int host1x_register_client(struct host1x *host1x, struct host1x_client *client) } } + client->host1x = host1x; + return 0; } diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index 06d7f79..0f4a366 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -158,12 +158,29 @@ static inline struct drm_mm_node *drm_mm_get_block_atomic_range( return drm_mm_get_block_range_generic(parent, size, alignment, 0, start, end, 1); } -extern int drm_mm_insert_node(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment); + +extern int drm_mm_insert_node(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment); extern int drm_mm_insert_node_in_range(struct drm_mm *mm, struct drm_mm_node *node, - unsigned long size, unsigned alignment, - unsigned long start, unsigned long end); + unsigned long size, + unsigned alignment, + unsigned long start, + unsigned long end); +extern int drm_mm_insert_node_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color); +extern int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, + struct drm_mm_node *node, + unsigned long size, + unsigned alignment, + unsigned long color, + unsigned long start, + unsigned long end); extern void drm_mm_put_block(struct drm_mm_node *cur); extern void drm_mm_remove_node(struct drm_mm_node *node); extern void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index b746a3c..c4d2e9c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -307,6 +307,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21 #define I915_PARAM_RSVD_FOR_FUTURE_USE 22 #define I915_PARAM_HAS_SECURE_BATCHES 23 +#define I915_PARAM_HAS_PINNED_BATCHES 24 typedef struct drm_i915_getparam { int param; @@ -677,6 +678,15 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_SECURE (1<<9) +/** Inform the kernel that the batch is and will always be pinned. This + * negates the requirement for a workaround to be performed to avoid + * an incoherent CS (such as can be found on 830/845). If this flag is + * not passed, the kernel will endeavour to make sure the batch is + * coherent with the CS before execution. If this flag is passed, + * userspace assumes the responsibility for ensuring the same. + */ +#define I915_EXEC_IS_PINNED (1<<10) + #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK |