diff options
author | Dave Airlie <airlied@redhat.com> | 2013-11-10 18:35:33 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2013-11-10 18:35:33 +1000 |
commit | ab0169bb5cc4a5c86756dde662087f9d12302eb0 (patch) | |
tree | 495e668337410f6763480ea1f010213f6399e38c | |
parent | 8d0a2215931f1ffd77aef65cae2c0becc3f5d560 (diff) | |
parent | 13b3a0a77625c09c84825ef6ba81d957ec207841 (diff) | |
download | op-kernel-dev-ab0169bb5cc4a5c86756dde662087f9d12302eb0.zip op-kernel-dev-ab0169bb5cc4a5c86756dde662087f9d12302eb0.tar.gz |
Merge tag 'bdw-stage1-2013-11-08-v2' of git://people.freedesktop.org/~danvet/drm-intel into drm-next
So here's the Broadwell pull request. From a kernel driver pov there's
two areas with big changes in Broadwell:
- Completely new enumerated interrupt bits. On the plus side it now looks
fairly unform and sane.
- Completely new pagetable layout.
To ensure minimal impact on existing platforms we've refactored both the
irq and low-level gtt handling code a lot in anticipation of the bdw push.
So now bdw enabling in these areas just plugs in a bunch of vfuncs.
Otherwise it's all fairly harmless adjusting of switch cases and
if-ladders to shovel bdw into the right blocks. So minimized impact on
existing platforms. I've also merged the bdw-stage1 branch into our
-nightly integration branch for the past week to make sure we don't break
anything.
Note that there's still quite a flurry or patches floating around, but
I've figured I'll push this out. I plan to keep the bdw fixes separate
from my usual -fixes stream so that you can reject them easily in case it
still looks like too much churn. Also, bdw is for now hidden behind the
preliminary hw enabling module option. So there's no real pressure to get
follow-up patches all into 3.13.
* tag 'bdw-stage1-2013-11-08-v2' of git://people.freedesktop.org/~danvet/drm-intel: (75 commits)
drm/i915: Mask the vblank interrupt on bdw by default
drm/i915: Wire up cpu fifo underrun reporting support for bdw
drm/i915: Optimize gen8_enable|disable_vblank functions
drm/i915: Wire up pipe CRC support for bdw
drm/i915: Wire up PCH interrupts for bdw
drm/i915: Wire up port A aux channel
drm/i915: Fix up the bdw pipe interrupt enable lists
drm/i915: Optimize pipe irq handling on bdw
drm/i915/bdw: Take render error interrupt out of the mask
drm/i915/bdw: Add BDW PCH check first
drm/i915: Use hsw_crt_get_config on BDW
drm/i915/bdw: Change dp aux timeout to 600us on DDIA
drm/i915/bdw: Enable trickle feed on Broadwell
drm/i915/bdw: WaSingleSubspanDispatchOnAALinesAndPoints
drm/i915/bdw: conservative SBE VUE cache mode
drm/i915/bdw: Limit SDE poly depth FIFO to 2
drm/i915/bdw: Sampler power bypass disable
ddrm/i915/bdw: Disable centroid pixel perf optimization
drm/i915/bdw: BWGTLB clock gate disable
drm/i915/bdw: Implement edp PSR workarounds
...
22 files changed, 1847 insertions, 126 deletions
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index b3cd3eb..96f958d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -313,6 +313,16 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) return gmch_ctrl << 25; /* 32 MB units */ } +static inline size_t gen8_stolen_size(int num, int slot, int func) +{ + u16 gmch_ctrl; + + gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL); + gmch_ctrl >>= BDW_GMCH_GMS_SHIFT; + gmch_ctrl &= BDW_GMCH_GMS_MASK; + return gmch_ctrl << 25; /* 32 MB units */ +} + typedef size_t (*stolen_size_fn)(int num, int slot, int func); static struct pci_device_id intel_stolen_ids[] __initdata = { @@ -336,6 +346,8 @@ static struct pci_device_id intel_stolen_ids[] __initdata = { INTEL_IVB_D_IDS(gen6_stolen_size), INTEL_HSW_D_IDS(gen6_stolen_size), INTEL_HSW_M_IDS(gen6_stolen_size), + INTEL_BDW_M_IDS(gen8_stolen_size), + INTEL_BDW_D_IDS(gen8_stolen_size) }; static void __init intel_graphics_stolen(int num, int slot, int func) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 4386622..6ed45a9 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -586,7 +586,53 @@ static int i915_interrupt_info(struct seq_file *m, void *data) if (ret) return ret; - if (IS_VALLEYVIEW(dev)) { + if (INTEL_INFO(dev)->gen >= 8) { + int i; + seq_printf(m, "Master Interrupt Control:\t%08x\n", + I915_READ(GEN8_MASTER_IRQ)); + + for (i = 0; i < 4; i++) { + seq_printf(m, "GT Interrupt IMR %d:\t%08x\n", + i, I915_READ(GEN8_GT_IMR(i))); + seq_printf(m, "GT Interrupt IIR %d:\t%08x\n", + i, I915_READ(GEN8_GT_IIR(i))); + seq_printf(m, "GT Interrupt IER %d:\t%08x\n", + i, I915_READ(GEN8_GT_IER(i))); + } + + for_each_pipe(i) { + seq_printf(m, "Pipe %c IMR:\t%08x\n", + pipe_name(i), + I915_READ(GEN8_DE_PIPE_IMR(i))); + seq_printf(m, "Pipe %c IIR:\t%08x\n", + pipe_name(i), + I915_READ(GEN8_DE_PIPE_IIR(i))); + seq_printf(m, "Pipe %c IER:\t%08x\n", + pipe_name(i), + I915_READ(GEN8_DE_PIPE_IER(i))); + } + + seq_printf(m, "Display Engine port interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_PORT_IMR)); + seq_printf(m, "Display Engine port interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_PORT_IIR)); + seq_printf(m, "Display Engine port interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_PORT_IER)); + + seq_printf(m, "Display Engine misc interrupt mask:\t%08x\n", + I915_READ(GEN8_DE_MISC_IMR)); + seq_printf(m, "Display Engine misc interrupt identity:\t%08x\n", + I915_READ(GEN8_DE_MISC_IIR)); + seq_printf(m, "Display Engine misc interrupt enable:\t%08x\n", + I915_READ(GEN8_DE_MISC_IER)); + + seq_printf(m, "PCU interrupt mask:\t%08x\n", + I915_READ(GEN8_PCU_IMR)); + seq_printf(m, "PCU interrupt identity:\t%08x\n", + I915_READ(GEN8_PCU_IIR)); + seq_printf(m, "PCU interrupt enable:\t%08x\n", + I915_READ(GEN8_PCU_IER)); + } else if (IS_VALLEYVIEW(dev)) { seq_printf(m, "Display IER:\t%08x\n", I915_READ(VLV_IER)); seq_printf(m, "Display IIR:\t%08x\n", @@ -658,7 +704,7 @@ static int i915_interrupt_info(struct seq_file *m, void *data) seq_printf(m, "Interrupts received: %d\n", atomic_read(&dev_priv->irq_received)); for_each_ring(ring, dev_priv, i) { - if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (INTEL_INFO(dev)->gen >= 6) { seq_printf(m, "Graphics Interrupt mask (%s): %08x\n", ring->name, I915_READ_IMR(ring)); @@ -1577,7 +1623,7 @@ static int i915_swizzle_info(struct seq_file *m, void *data) I915_READ16(C0DRB3)); seq_printf(m, "C1DRB3 = 0x%04x\n", I915_READ16(C1DRB3)); - } else if (IS_GEN6(dev) || IS_GEN7(dev)) { + } else if (INTEL_INFO(dev)->gen >= 6) { seq_printf(m, "MAD_DIMM_C0 = 0x%08x\n", I915_READ(MAD_DIMM_C0)); seq_printf(m, "MAD_DIMM_C1 = 0x%08x\n", @@ -1586,8 +1632,12 @@ static int i915_swizzle_info(struct seq_file *m, void *data) I915_READ(MAD_DIMM_C2)); seq_printf(m, "TILECTL = 0x%08x\n", I915_READ(TILECTL)); - seq_printf(m, "ARB_MODE = 0x%08x\n", - I915_READ(ARB_MODE)); + if (IS_GEN8(dev)) + seq_printf(m, "GAMTARBMODE = 0x%08x\n", + I915_READ(GAMTARBMODE)); + else + seq_printf(m, "ARB_MODE = 0x%08x\n", + I915_READ(ARB_MODE)); seq_printf(m, "DISP_ARB_CTL = 0x%08x\n", I915_READ(DISP_ARB_CTL)); } @@ -1596,18 +1646,37 @@ static int i915_swizzle_info(struct seq_file *m, void *data) return 0; } -static int i915_ppgtt_info(struct seq_file *m, void *data) +static void gen8_ppgtt_info(struct seq_file *m, struct drm_device *dev) { - struct drm_info_node *node = (struct drm_info_node *) m->private; - struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = dev->dev_private; struct intel_ring_buffer *ring; - int i, ret; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int unused, i; + if (!ppgtt) + return; + + seq_printf(m, "Page directories: %d\n", ppgtt->num_pd_pages); + seq_printf(m, "Page tables: %d\n", ppgtt->num_pt_pages); + for_each_ring(ring, dev_priv, unused) { + seq_printf(m, "%s\n", ring->name); + for (i = 0; i < 4; i++) { + u32 offset = 0x270 + i * 8; + u64 pdp = I915_READ(ring->mmio_base + offset + 4); + pdp <<= 32; + pdp |= I915_READ(ring->mmio_base + offset); + for (i = 0; i < 4; i++) + seq_printf(m, "\tPDP%d 0x%016llx\n", i, pdp); + } + } +} + +static void gen6_ppgtt_info(struct seq_file *m, struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + int i; - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; if (INTEL_INFO(dev)->gen == 6) seq_printf(m, "GFX_MODE: 0x%08x\n", I915_READ(GFX_MODE)); @@ -1626,6 +1695,22 @@ static int i915_ppgtt_info(struct seq_file *m, void *data) seq_printf(m, "pd gtt offset: 0x%08x\n", ppgtt->pd_offset); } seq_printf(m, "ECOCHK: 0x%08x\n", I915_READ(GAM_ECOCHK)); +} + +static int i915_ppgtt_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + + int ret = mutex_lock_interruptible(&dev->struct_mutex); + if (ret) + return ret; + + if (INTEL_INFO(dev)->gen >= 8) + gen8_ppgtt_info(m, dev); + else if (INTEL_INFO(dev)->gen >= 6) + gen6_ppgtt_info(m, dev); + mutex_unlock(&dev->struct_mutex); return 0; diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index a0804fa..989be12 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -336,6 +336,24 @@ static const struct intel_device_info intel_haswell_m_info = { .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, }; +static const struct intel_device_info intel_broadwell_d_info = { + .is_preliminary = 1, + .gen = 8, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, +}; + +static const struct intel_device_info intel_broadwell_m_info = { + .is_preliminary = 1, + .gen = 8, .is_mobile = 1, .num_pipes = 3, + .need_gfx_hws = 1, .has_hotplug = 1, + .ring_mask = RENDER_RING | BSD_RING | BLT_RING | VEBOX_RING, + .has_llc = 1, + .has_ddi = 1, +}; + /* * Make sure any device matches here are from most specific to most * general. For example, since the Quanta match is based on the subsystem @@ -367,7 +385,9 @@ static const struct intel_device_info intel_haswell_m_info = { INTEL_HSW_D_IDS(&intel_haswell_d_info), \ INTEL_HSW_M_IDS(&intel_haswell_m_info), \ INTEL_VLV_M_IDS(&intel_valleyview_m_info), \ - INTEL_VLV_D_IDS(&intel_valleyview_d_info) + INTEL_VLV_D_IDS(&intel_valleyview_d_info), \ + INTEL_BDW_M_IDS(&intel_broadwell_m_info), \ + INTEL_BDW_D_IDS(&intel_broadwell_d_info) static const struct pci_device_id pciidlist[] = { /* aka */ INTEL_PCI_IDS, @@ -428,6 +448,12 @@ void intel_detect_pch(struct drm_device *dev) DRM_DEBUG_KMS("Found LynxPoint PCH\n"); WARN_ON(!IS_HASWELL(dev)); WARN_ON(IS_ULT(dev)); + } else if (IS_BROADWELL(dev)) { + dev_priv->pch_type = PCH_LPT; + dev_priv->pch_id = + INTEL_PCH_LPT_LP_DEVICE_ID_TYPE; + DRM_DEBUG_KMS("This is Broadwell, assuming " + "LynxPoint LP PCH\n"); } else if (id == INTEL_PCH_LPT_LP_DEVICE_ID_TYPE) { dev_priv->pch_type = PCH_LPT; DRM_DEBUG_KMS("Found LynxPoint LP PCH\n"); @@ -452,6 +478,12 @@ bool i915_semaphore_is_enabled(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 6) return 0; + /* Until we get further testing... */ + if (IS_GEN8(dev)) { + WARN_ON(!i915_preliminary_hw_support); + return 0; + } + if (i915_semaphores >= 0) return i915_semaphores; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b0dd4ea..8600c31 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -118,6 +118,10 @@ enum intel_display_power_domain { #define HSW_ALWAYS_ON_POWER_DOMAINS ( \ BIT(POWER_DOMAIN_PIPE_A) | \ BIT(POWER_DOMAIN_TRANSCODER_EDP)) +#define BDW_ALWAYS_ON_POWER_DOMAINS ( \ + BIT(POWER_DOMAIN_PIPE_A) | \ + BIT(POWER_DOMAIN_TRANSCODER_EDP) | \ + BIT(POWER_DOMAIN_PIPE_A_PANEL_FITTER)) enum hpd_pin { HPD_NONE = 0, @@ -575,10 +579,21 @@ struct i915_gtt { struct i915_hw_ppgtt { struct i915_address_space base; unsigned num_pd_entries; - struct page **pt_pages; - uint32_t pd_offset; - dma_addr_t *pt_dma_addr; - + union { + struct page **pt_pages; + struct page *gen8_pt_pages; + }; + struct page *pd_pages; + int num_pd_pages; + int num_pt_pages; + union { + uint32_t pd_offset; + dma_addr_t pd_dma_addr[4]; + }; + union { + dma_addr_t *pt_dma_addr; + dma_addr_t *gen8_pt_dma_addr[4]; + }; int (*enable)(struct drm_device *dev); }; @@ -1322,7 +1337,10 @@ typedef struct drm_i915_private { struct mutex dpio_lock; /** Cached value of IMR to avoid reads in updating the bitfield */ - u32 irq_mask; + union { + u32 irq_mask; + u32 de_irq_mask[I915_MAX_PIPES]; + }; u32 gt_irq_mask; u32 pm_irq_mask; @@ -1733,6 +1751,7 @@ struct drm_i915_file_private { (dev)->pdev->device == 0x010A) #define IS_VALLEYVIEW(dev) (INTEL_INFO(dev)->is_valleyview) #define IS_HASWELL(dev) (INTEL_INFO(dev)->is_haswell) +#define IS_BROADWELL(dev) (INTEL_INFO(dev)->gen == 8) #define IS_MOBILE(dev) (INTEL_INFO(dev)->is_mobile) #define IS_HSW_EARLY_SDV(dev) (IS_HASWELL(dev) && \ ((dev)->pdev->device & 0xFF00) == 0x0C00) @@ -1754,6 +1773,7 @@ struct drm_i915_file_private { #define IS_GEN5(dev) (INTEL_INFO(dev)->gen == 5) #define IS_GEN6(dev) (INTEL_INFO(dev)->gen == 6) #define IS_GEN7(dev) (INTEL_INFO(dev)->gen == 7) +#define IS_GEN8(dev) (INTEL_INFO(dev)->gen == 8) #define RENDER_RING (1<<RCS) #define BSD_RING (1<<VCS) @@ -1790,12 +1810,12 @@ struct drm_i915_file_private { #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr) #define I915_HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc) -#define HAS_IPS(dev) (IS_ULT(dev)) +#define HAS_IPS(dev) (IS_ULT(dev) || IS_BROADWELL(dev)) #define HAS_DDI(dev) (INTEL_INFO(dev)->has_ddi) -#define HAS_POWER_WELL(dev) (IS_HASWELL(dev)) +#define HAS_POWER_WELL(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev)) #define HAS_FPGA_DBG_UNCLAIMED(dev) (INTEL_INFO(dev)->has_fpga_dbg) -#define HAS_PSR(dev) (IS_HASWELL(dev)) +#define HAS_PSR(dev) (IS_HASWELL(dev) || IS_BROADWELL(dev)) #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e7b39d7..12bbd5e 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2954,6 +2954,7 @@ static void i915_gem_write_fence(struct drm_device *dev, int reg, obj->stride, obj->tiling_mode); switch (INTEL_INFO(dev)->gen) { + case 8: case 7: case 6: case 5: @@ -4361,6 +4362,8 @@ void i915_gem_init_swizzling(struct drm_device *dev) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_SNB)); else if (IS_GEN7(dev)) I915_WRITE(ARB_MODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_IVB)); + else if (IS_GEN8(dev)) + I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_SWIZZLE_BDW)); else BUG(); } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index cc619c1..72a3df3 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -117,6 +117,9 @@ static int get_context_size(struct drm_device *dev) else ret = GEN7_CXT_TOTAL_SIZE(reg) * 64; break; + case 8: + ret = GEN8_CXT_TOTAL_SIZE; + break; default: BUG(); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..885d595 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -212,6 +212,7 @@ static int relocate_entry_cpu(struct drm_i915_gem_object *obj, struct drm_i915_gem_relocation_entry *reloc) { + struct drm_device *dev = obj->base.dev; uint32_t page_offset = offset_in_page(reloc->offset); char *vaddr; int ret = -EINVAL; @@ -223,6 +224,19 @@ relocate_entry_cpu(struct drm_i915_gem_object *obj, vaddr = kmap_atomic(i915_gem_object_get_page(obj, reloc->offset >> PAGE_SHIFT)); *(uint32_t *)(vaddr + page_offset) = reloc->delta; + + if (INTEL_INFO(dev)->gen >= 8) { + page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + + if (page_offset == 0) { + kunmap_atomic(vaddr); + vaddr = kmap_atomic(i915_gem_object_get_page(obj, + (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); + } + + *(uint32_t *)(vaddr + page_offset) = 0; + } + kunmap_atomic(vaddr); return 0; @@ -253,6 +267,21 @@ relocate_entry_gtt(struct drm_i915_gem_object *obj, reloc_entry = (uint32_t __iomem *) (reloc_page + offset_in_page(reloc->offset)); iowrite32(reloc->delta, reloc_entry); + + if (INTEL_INFO(dev)->gen >= 8) { + reloc_entry += 1; + + if (offset_in_page(reloc->offset + sizeof(uint32_t)) == 0) { + io_mapping_unmap_atomic(reloc_page); + reloc_page = io_mapping_map_atomic_wc( + dev_priv->gtt.mappable, + reloc->offset + sizeof(uint32_t)); + reloc_entry = reloc_page; + } + + iowrite32(0, reloc_entry); + } + io_mapping_unmap_atomic(reloc_page); return 0; @@ -323,7 +352,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return 0; /* Check that the relocation address is valid... */ - if (unlikely(reloc->offset > obj->base.size - 4)) { + if (unlikely(reloc->offset > + obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -1116,8 +1146,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure * batch" bit. Hence we need to pin secure batches into the global gtt. - * hsw should have this fixed, but let's be paranoid and do it - * unconditionally for now. */ + * hsw should have this fixed, but bdw mucks it up again. */ if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping) i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c4c42e7..3620a1b 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -30,6 +30,8 @@ #define GEN6_PPGTT_PD_ENTRIES 512 #define I915_PPGTT_PT_ENTRIES (PAGE_SIZE / sizeof(gen6_gtt_pte_t)) +typedef uint64_t gen8_gtt_pte_t; +typedef gen8_gtt_pte_t gen8_ppgtt_pde_t; /* PPGTT stuff */ #define GEN6_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0xff0)) @@ -57,6 +59,41 @@ #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb) #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6) +#define GEN8_PTES_PER_PAGE (PAGE_SIZE / sizeof(gen8_gtt_pte_t)) +#define GEN8_PDES_PER_PAGE (PAGE_SIZE / sizeof(gen8_ppgtt_pde_t)) +#define GEN8_LEGACY_PDPS 4 + +#define PPAT_UNCACHED_INDEX (_PAGE_PWT | _PAGE_PCD) +#define PPAT_CACHED_PDE_INDEX 0 /* WB LLC */ +#define PPAT_CACHED_INDEX _PAGE_PAT /* WB LLCeLLC */ +#define PPAT_DISPLAY_ELLC_INDEX _PAGE_PCD /* WT eLLC */ + +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr, + enum i915_cache_level level, + bool valid) +{ + gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0; + pte |= addr; + if (level != I915_CACHE_NONE) + pte |= PPAT_CACHED_INDEX; + else + pte |= PPAT_UNCACHED_INDEX; + return pte; +} + +static inline gen8_ppgtt_pde_t gen8_pde_encode(struct drm_device *dev, + dma_addr_t addr, + enum i915_cache_level level) +{ + gen8_ppgtt_pde_t pde = _PAGE_PRESENT | _PAGE_RW; + pde |= addr; + if (level != I915_CACHE_NONE) + pde |= PPAT_CACHED_PDE_INDEX; + else + pde |= PPAT_UNCACHED_INDEX; + return pde; +} + static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr, enum i915_cache_level level, bool valid) @@ -158,6 +195,257 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr, return pte; } +/* Broadwell Page Directory Pointer Descriptors */ +static int gen8_write_pdp(struct intel_ring_buffer *ring, unsigned entry, + uint64_t val) +{ + int ret; + + BUG_ON(entry >= 4); + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry)); + intel_ring_emit(ring, (u32)(val >> 32)); + intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); + intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry)); + intel_ring_emit(ring, (u32)(val)); + intel_ring_advance(ring); + + return 0; +} + +static int gen8_ppgtt_enable(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; + int i, j, ret; + + /* bit of a hack to find the actual last used pd */ + int used_pd = ppgtt->num_pd_entries / GEN8_PDES_PER_PAGE; + + for_each_ring(ring, dev_priv, j) { + I915_WRITE(RING_MODE_GEN7(ring), + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE)); + } + + for (i = used_pd - 1; i >= 0; i--) { + dma_addr_t addr = ppgtt->pd_dma_addr[i]; + for_each_ring(ring, dev_priv, j) { + ret = gen8_write_pdp(ring, i, addr); + if (ret) + return ret; + } + } + return 0; +} + +static void gen8_ppgtt_clear_range(struct i915_address_space *vm, + unsigned first_entry, + unsigned num_entries, + bool use_scratch) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + gen8_gtt_pte_t *pt_vaddr, scratch_pte; + unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; + unsigned first_pte = first_entry % GEN8_PTES_PER_PAGE; + unsigned last_pte, i; + + scratch_pte = gen8_pte_encode(ppgtt->base.scratch.addr, + I915_CACHE_LLC, use_scratch); + + while (num_entries) { + struct page *page_table = &ppgtt->gen8_pt_pages[act_pt]; + + last_pte = first_pte + num_entries; + if (last_pte > GEN8_PTES_PER_PAGE) + last_pte = GEN8_PTES_PER_PAGE; + + pt_vaddr = kmap_atomic(page_table); + + for (i = first_pte; i < last_pte; i++) + pt_vaddr[i] = scratch_pte; + + kunmap_atomic(pt_vaddr); + + num_entries -= last_pte - first_pte; + first_pte = 0; + act_pt++; + } +} + +static void gen8_ppgtt_insert_entries(struct i915_address_space *vm, + struct sg_table *pages, + unsigned first_entry, + enum i915_cache_level cache_level) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + gen8_gtt_pte_t *pt_vaddr; + unsigned act_pt = first_entry / GEN8_PTES_PER_PAGE; + unsigned act_pte = first_entry % GEN8_PTES_PER_PAGE; + struct sg_page_iter sg_iter; + + pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); + for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) { + dma_addr_t page_addr; + + page_addr = sg_dma_address(sg_iter.sg) + + (sg_iter.sg_pgoffset << PAGE_SHIFT); + pt_vaddr[act_pte] = gen8_pte_encode(page_addr, cache_level, + true); + if (++act_pte == GEN8_PTES_PER_PAGE) { + kunmap_atomic(pt_vaddr); + act_pt++; + pt_vaddr = kmap_atomic(&ppgtt->gen8_pt_pages[act_pt]); + act_pte = 0; + + } + } + kunmap_atomic(pt_vaddr); +} + +static void gen8_ppgtt_cleanup(struct i915_address_space *vm) +{ + struct i915_hw_ppgtt *ppgtt = + container_of(vm, struct i915_hw_ppgtt, base); + int i, j; + + for (i = 0; i < ppgtt->num_pd_pages ; i++) { + if (ppgtt->pd_dma_addr[i]) { + pci_unmap_page(ppgtt->base.dev->pdev, + ppgtt->pd_dma_addr[i], + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; + if (addr) + pci_unmap_page(ppgtt->base.dev->pdev, + addr, + PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + } + } + kfree(ppgtt->gen8_pt_dma_addr[i]); + } + + __free_pages(ppgtt->gen8_pt_pages, ppgtt->num_pt_pages << PAGE_SHIFT); + __free_pages(ppgtt->pd_pages, ppgtt->num_pd_pages << PAGE_SHIFT); +} + +/** + * GEN8 legacy ppgtt programming is accomplished through 4 PDP registers with a + * net effect resembling a 2-level page table in normal x86 terms. Each PDP + * represents 1GB of memory + * 4 * 512 * 512 * 4096 = 4GB legacy 32b address space. + * + * TODO: Do something with the size parameter + **/ +static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt, uint64_t size) +{ + struct page *pt_pages; + int i, j, ret = -ENOMEM; + const int max_pdp = DIV_ROUND_UP(size, 1 << 30); + const int num_pt_pages = GEN8_PDES_PER_PAGE * max_pdp; + + if (size % (1<<30)) + DRM_INFO("Pages will be wasted unless GTT size (%llu) is divisible by 1GB\n", size); + + /* FIXME: split allocation into smaller pieces. For now we only ever do + * this once, but with full PPGTT, the multiple contiguous allocations + * will be bad. + */ + ppgtt->pd_pages = alloc_pages(GFP_KERNEL, get_order(max_pdp << PAGE_SHIFT)); + if (!ppgtt->pd_pages) + return -ENOMEM; + + pt_pages = alloc_pages(GFP_KERNEL, get_order(num_pt_pages << PAGE_SHIFT)); + if (!pt_pages) { + __free_pages(ppgtt->pd_pages, get_order(max_pdp << PAGE_SHIFT)); + return -ENOMEM; + } + + ppgtt->gen8_pt_pages = pt_pages; + ppgtt->num_pd_pages = 1 << get_order(max_pdp << PAGE_SHIFT); + ppgtt->num_pt_pages = 1 << get_order(num_pt_pages << PAGE_SHIFT); + ppgtt->num_pd_entries = max_pdp * GEN8_PDES_PER_PAGE; + ppgtt->enable = gen8_ppgtt_enable; + ppgtt->base.clear_range = gen8_ppgtt_clear_range; + ppgtt->base.insert_entries = gen8_ppgtt_insert_entries; + ppgtt->base.cleanup = gen8_ppgtt_cleanup; + + BUG_ON(ppgtt->num_pd_pages > GEN8_LEGACY_PDPS); + + /* + * - Create a mapping for the page directories. + * - For each page directory: + * allocate space for page table mappings. + * map each page table + */ + for (i = 0; i < max_pdp; i++) { + dma_addr_t temp; + temp = pci_map_page(ppgtt->base.dev->pdev, + &ppgtt->pd_pages[i], 0, + PAGE_SIZE, PCI_DMA_BIDIRECTIONAL); + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) + goto err_out; + + ppgtt->pd_dma_addr[i] = temp; + + ppgtt->gen8_pt_dma_addr[i] = kmalloc(sizeof(dma_addr_t) * GEN8_PDES_PER_PAGE, GFP_KERNEL); + if (!ppgtt->gen8_pt_dma_addr[i]) + goto err_out; + + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + struct page *p = &pt_pages[i * GEN8_PDES_PER_PAGE + j]; + temp = pci_map_page(ppgtt->base.dev->pdev, + p, 0, PAGE_SIZE, + PCI_DMA_BIDIRECTIONAL); + + if (pci_dma_mapping_error(ppgtt->base.dev->pdev, temp)) + goto err_out; + + ppgtt->gen8_pt_dma_addr[i][j] = temp; + } + } + + /* For now, the PPGTT helper functions all require that the PDEs are + * plugged in correctly. So we do that now/here. For aliasing PPGTT, we + * will never need to touch the PDEs again */ + for (i = 0; i < max_pdp; i++) { + gen8_ppgtt_pde_t *pd_vaddr; + pd_vaddr = kmap_atomic(&ppgtt->pd_pages[i]); + for (j = 0; j < GEN8_PDES_PER_PAGE; j++) { + dma_addr_t addr = ppgtt->gen8_pt_dma_addr[i][j]; + pd_vaddr[j] = gen8_pde_encode(ppgtt->base.dev, addr, + I915_CACHE_LLC); + } + kunmap_atomic(pd_vaddr); + } + + ppgtt->base.clear_range(&ppgtt->base, 0, + ppgtt->num_pd_entries * GEN8_PTES_PER_PAGE, + true); + + DRM_DEBUG_DRIVER("Allocated %d pages for page directories (%d wasted)\n", + ppgtt->num_pd_pages, ppgtt->num_pd_pages - max_pdp); + DRM_DEBUG_DRIVER("Allocated %d pages for page tables (%lld wasted)\n", + ppgtt->num_pt_pages, + (ppgtt->num_pt_pages - num_pt_pages) + + size % (1<<30)); + return 0; + +err_out: + ppgtt->base.cleanup(&ppgtt->base); + return ret; +} + static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt) { struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private; @@ -410,6 +698,8 @@ static int i915_gem_init_aliasing_ppgtt(struct drm_device *dev) if (INTEL_INFO(dev)->gen < 8) ret = gen6_ppgtt_init(ppgtt); + else if (IS_GEN8(dev)) + ret = gen8_ppgtt_init(ppgtt, dev_priv->gtt.base.total); else BUG(); @@ -573,6 +863,57 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj) return 0; } +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte) +{ +#ifdef writeq + writeq(pte, addr); +#else + iowrite32((u32)pte, addr); + iowrite32(pte >> 32, addr + 4); +#endif +} + +static void gen8_ggtt_insert_entries(struct i915_address_space *vm, + struct sg_table *st, + unsigned int first_entry, + enum i915_cache_level level) +{ + struct drm_i915_private *dev_priv = vm->dev->dev_private; + gen8_gtt_pte_t __iomem *gtt_entries = + (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry; + int i = 0; + struct sg_page_iter sg_iter; + dma_addr_t addr; + + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) { + addr = sg_dma_address(sg_iter.sg) + + (sg_iter.sg_pgoffset << PAGE_SHIFT); + gen8_set_pte(>t_entries[i], + gen8_pte_encode(addr, level, true)); + i++; + } + + /* + * XXX: This serves as a posting read to make sure that the PTE has + * actually been updated. There is some concern that even though + * registers and PTEs are within the same BAR that they are potentially + * of NUMA access patterns. Therefore, even with the way we assume + * hardware should work, we must keep this posting read for paranoia. + */ + if (i != 0) + WARN_ON(readq(>t_entries[i-1]) + != gen8_pte_encode(addr, level, true)); + +#if 0 /* TODO: Still needed on GEN8? */ + /* This next bit makes the above posting read even more important. We + * want to flush the TLBs only after we're certain all the PTE updates + * have finished. + */ + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); +#endif +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -615,6 +956,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, POSTING_READ(GFX_FLSH_CNTL_GEN6); } +static void gen8_ggtt_clear_range(struct i915_address_space *vm, + unsigned int first_entry, + unsigned int num_entries, + bool use_scratch) +{ + struct drm_i915_private *dev_priv = vm->dev->dev_private; + gen8_gtt_pte_t scratch_pte, __iomem *gtt_base = + (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry; + const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry; + int i; + + if (WARN(num_entries > max_entries, + "First entry = %d; Num entries = %d (max=%d)\n", + first_entry, num_entries, max_entries)) + num_entries = max_entries; + + scratch_pte = gen8_pte_encode(vm->scratch.addr, + I915_CACHE_LLC, + use_scratch); + for (i = 0; i < num_entries; i++) + gen8_set_pte(>t_base[i], scratch_pte); + readl(gtt_base); +} + static void gen6_ggtt_clear_range(struct i915_address_space *vm, unsigned int first_entry, unsigned int num_entries, @@ -638,7 +1003,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, readl(gtt_base); } - static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, unsigned int pg_start, @@ -720,6 +1084,7 @@ static void i915_gtt_color_adjust(struct drm_mm_node *node, *end -= 4096; } } + void i915_gem_setup_global_gtt(struct drm_device *dev, unsigned long start, unsigned long mappable_end, @@ -817,7 +1182,8 @@ void i915_gem_init_global_gtt(struct drm_device *dev) DRM_ERROR("Aliased PPGTT setup failed %d\n", ret); drm_mm_takedown(&dev_priv->gtt.base.mm); - gtt_size += GEN6_PPGTT_PD_ENTRIES * PAGE_SIZE; + if (INTEL_INFO(dev)->gen < 8) + gtt_size += GEN6_PPGTT_PD_ENTRIES*PAGE_SIZE; } i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size); } @@ -867,6 +1233,15 @@ static inline unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) return snb_gmch_ctl << 20; } +static inline unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; + if (bdw_gmch_ctl) + bdw_gmch_ctl = 1 << bdw_gmch_ctl; + return bdw_gmch_ctl << 20; +} + static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) { snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT; @@ -874,6 +1249,108 @@ static inline size_t gen6_get_stolen_size(u16 snb_gmch_ctl) return snb_gmch_ctl << 25; /* 32 MB units */ } +static inline size_t gen8_get_stolen_size(u16 bdw_gmch_ctl) +{ + bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT; + bdw_gmch_ctl &= BDW_GMCH_GMS_MASK; + return bdw_gmch_ctl << 25; /* 32 MB units */ +} + +static int ggtt_probe_common(struct drm_device *dev, + size_t gtt_size) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + phys_addr_t gtt_bus_addr; + int ret; + + /* For Modern GENs the PTEs and register space are split in the BAR */ + gtt_bus_addr = pci_resource_start(dev->pdev, 0) + + (pci_resource_len(dev->pdev, 0) / 2); + + dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); + if (!dev_priv->gtt.gsm) { + DRM_ERROR("Failed to map the gtt page table\n"); + return -ENOMEM; + } + + ret = setup_scratch_page(dev); + if (ret) { + DRM_ERROR("Scratch setup failed\n"); + /* iounmap will also get called at remove, but meh */ + iounmap(dev_priv->gtt.gsm); + } + + return ret; +} + +/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability + * bits. When using advanced contexts each context stores its own PAT, but + * writing this data shouldn't be harmful even in those cases. */ +static void gen8_setup_private_ppat(struct drm_i915_private *dev_priv) +{ +#define GEN8_PPAT_UC (0<<0) +#define GEN8_PPAT_WC (1<<0) +#define GEN8_PPAT_WT (2<<0) +#define GEN8_PPAT_WB (3<<0) +#define GEN8_PPAT_ELLC_OVERRIDE (0<<2) +/* FIXME(BDW): Bspec is completely confused about cache control bits. */ +#define GEN8_PPAT_LLC (1<<2) +#define GEN8_PPAT_LLCELLC (2<<2) +#define GEN8_PPAT_LLCeLLC (3<<2) +#define GEN8_PPAT_AGE(x) (x<<4) +#define GEN8_PPAT(i, x) ((uint64_t) (x) << ((i) * 8)) + uint64_t pat; + + pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */ + GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */ + GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */ + GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */ + GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) | + GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) | + GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) | + GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3)); + + /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b + * write would work. */ + I915_WRITE(GEN8_PRIVATE_PAT, pat); + I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32); +} + +static int gen8_gmch_probe(struct drm_device *dev, + size_t *gtt_total, + size_t *stolen, + phys_addr_t *mappable_base, + unsigned long *mappable_end) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned int gtt_size; + u16 snb_gmch_ctl; + int ret; + + /* TODO: We're not aware of mappable constraints on gen8 yet */ + *mappable_base = pci_resource_start(dev->pdev, 2); + *mappable_end = pci_resource_len(dev->pdev, 2); + + if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39))) + pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39)); + + pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); + + *stolen = gen8_get_stolen_size(snb_gmch_ctl); + + gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl); + *gtt_total = (gtt_size / sizeof(gen8_gtt_pte_t)) << PAGE_SHIFT; + + gen8_setup_private_ppat(dev_priv); + + ret = ggtt_probe_common(dev, gtt_size); + + dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; + dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries; + + return ret; +} + static int gen6_gmch_probe(struct drm_device *dev, size_t *gtt_total, size_t *stolen, @@ -881,7 +1358,6 @@ static int gen6_gmch_probe(struct drm_device *dev, unsigned long *mappable_end) { struct drm_i915_private *dev_priv = dev->dev_private; - phys_addr_t gtt_bus_addr; unsigned int gtt_size; u16 snb_gmch_ctl; int ret; @@ -901,24 +1377,13 @@ static int gen6_gmch_probe(struct drm_device *dev, if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40))) pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40)); pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); *stolen = gen6_get_stolen_size(snb_gmch_ctl); - *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; - - /* For Modern GENs the PTEs and register space are split in the BAR */ - gtt_bus_addr = pci_resource_start(dev->pdev, 0) + - (pci_resource_len(dev->pdev, 0) / 2); - dev_priv->gtt.gsm = ioremap_wc(gtt_bus_addr, gtt_size); - if (!dev_priv->gtt.gsm) { - DRM_ERROR("Failed to map the gtt page table\n"); - return -ENOMEM; - } + gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl); + *gtt_total = (gtt_size / sizeof(gen6_gtt_pte_t)) << PAGE_SHIFT; - ret = setup_scratch_page(dev); - if (ret) - DRM_ERROR("Scratch setup failed\n"); + ret = ggtt_probe_common(dev, gtt_size); dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range; dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries; @@ -972,7 +1437,7 @@ int i915_gem_gtt_init(struct drm_device *dev) if (INTEL_INFO(dev)->gen <= 5) { gtt->gtt_probe = i915_gmch_probe; gtt->base.cleanup = i915_gmch_remove; - } else { + } else if (INTEL_INFO(dev)->gen < 8) { gtt->gtt_probe = gen6_gmch_probe; gtt->base.cleanup = gen6_gmch_remove; if (IS_HASWELL(dev) && dev_priv->ellc_size) @@ -985,6 +1450,9 @@ int i915_gem_gtt_init(struct drm_device *dev) gtt->base.pte_encode = ivb_pte_encode; else gtt->base.pte_encode = snb_pte_encode; + } else { + dev_priv->gtt.gtt_probe = gen8_gmch_probe; + dev_priv->gtt.base.cleanup = gen6_gmch_remove; } ret = gtt->gtt_probe(dev, >t->base.total, >t->stolen_size, diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a8bb213..79dcb8f 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -624,6 +624,7 @@ static void i915_gem_record_fences(struct drm_device *dev, /* Fences */ switch (INTEL_INFO(dev)->gen) { + case 8: case 7: case 6: for (i = 0; i < dev_priv->num_fence_regs; i++) @@ -1044,6 +1045,7 @@ void i915_get_extra_instdone(struct drm_device *dev, uint32_t *instdone) default: WARN_ONCE(1, "Unsupported platform\n"); case 7: + case 8: instdone[0] = I915_READ(GEN7_INSTDONE_1); instdone[1] = I915_READ(GEN7_SC_INSTDONE); instdone[2] = I915_READ(GEN7_SAMPLER_INSTDONE); diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index d26f652..5d1dedc 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -270,6 +270,21 @@ static void ivybridge_set_fifo_underrun_reporting(struct drm_device *dev, } } +static void broadwell_set_fifo_underrun_reporting(struct drm_device *dev, + enum pipe pipe, bool enable) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + assert_spin_locked(&dev_priv->irq_lock); + + if (enable) + dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_FIFO_UNDERRUN; + else + dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_FIFO_UNDERRUN; + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]); + POSTING_READ(GEN8_DE_PIPE_IMR(pipe)); +} + /** * ibx_display_interrupt_update - update SDEIMR * @dev_priv: driver private @@ -382,6 +397,8 @@ bool intel_set_cpu_fifo_underrun_reporting(struct drm_device *dev, ironlake_set_fifo_underrun_reporting(dev, pipe, enable); else if (IS_GEN7(dev)) ivybridge_set_fifo_underrun_reporting(dev, pipe, enable); + else if (IS_GEN8(dev)) + broadwell_set_fifo_underrun_reporting(dev, pipe, enable); done: spin_unlock_irqrestore(&dev_priv->irq_lock, flags); @@ -1151,6 +1168,56 @@ static void snb_gt_irq_handler(struct drm_device *dev, ivybridge_parity_error_irq_handler(dev, gt_iir); } +static irqreturn_t gen8_gt_irq_handler(struct drm_device *dev, + struct drm_i915_private *dev_priv, + u32 master_ctl) +{ + u32 rcs, bcs, vcs; + uint32_t tmp = 0; + irqreturn_t ret = IRQ_NONE; + + if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { + tmp = I915_READ(GEN8_GT_IIR(0)); + if (tmp) { + ret = IRQ_HANDLED; + rcs = tmp >> GEN8_RCS_IRQ_SHIFT; + bcs = tmp >> GEN8_BCS_IRQ_SHIFT; + if (rcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[RCS]); + if (bcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[BCS]); + I915_WRITE(GEN8_GT_IIR(0), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT0)!\n"); + } + + if (master_ctl & GEN8_GT_VCS1_IRQ) { + tmp = I915_READ(GEN8_GT_IIR(1)); + if (tmp) { + ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VCS1_IRQ_SHIFT; + if (vcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[VCS]); + I915_WRITE(GEN8_GT_IIR(1), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT1)!\n"); + } + + if (master_ctl & GEN8_GT_VECS_IRQ) { + tmp = I915_READ(GEN8_GT_IIR(3)); + if (tmp) { + ret = IRQ_HANDLED; + vcs = tmp >> GEN8_VECS_IRQ_SHIFT; + if (vcs & GT_RENDER_USER_INTERRUPT) + notify_ring(dev, &dev_priv->ring[VECS]); + I915_WRITE(GEN8_GT_IIR(3), tmp); + } else + DRM_ERROR("The master control interrupt lied (GT3)!\n"); + } + + return ret; +} + #define HPD_STORM_DETECT_PERIOD 1000 #define HPD_STORM_THRESHOLD 5 @@ -1724,6 +1791,117 @@ static irqreturn_t ironlake_irq_handler(int irq, void *arg) return ret; } +static irqreturn_t gen8_irq_handler(int irq, void *arg) +{ + struct drm_device *dev = arg; + struct drm_i915_private *dev_priv = dev->dev_private; + u32 master_ctl; + irqreturn_t ret = IRQ_NONE; + uint32_t tmp = 0; + enum pipe pipe; + + atomic_inc(&dev_priv->irq_received); + + master_ctl = I915_READ(GEN8_MASTER_IRQ); + master_ctl &= ~GEN8_MASTER_IRQ_CONTROL; + if (!master_ctl) + return IRQ_NONE; + + I915_WRITE(GEN8_MASTER_IRQ, 0); + POSTING_READ(GEN8_MASTER_IRQ); + + ret = gen8_gt_irq_handler(dev, dev_priv, master_ctl); + + if (master_ctl & GEN8_DE_MISC_IRQ) { + tmp = I915_READ(GEN8_DE_MISC_IIR); + if (tmp & GEN8_DE_MISC_GSE) + intel_opregion_asle_intr(dev); + else if (tmp) + DRM_ERROR("Unexpected DE Misc interrupt\n"); + else + DRM_ERROR("The master control interrupt lied (DE MISC)!\n"); + + if (tmp) { + I915_WRITE(GEN8_DE_MISC_IIR, tmp); + ret = IRQ_HANDLED; + } + } + + if (master_ctl & GEN8_DE_PORT_IRQ) { + tmp = I915_READ(GEN8_DE_PORT_IIR); + if (tmp & GEN8_AUX_CHANNEL_A) + dp_aux_irq_handler(dev); + else if (tmp) + DRM_ERROR("Unexpected DE Port interrupt\n"); + else + DRM_ERROR("The master control interrupt lied (DE PORT)!\n"); + + if (tmp) { + I915_WRITE(GEN8_DE_PORT_IIR, tmp); + ret = IRQ_HANDLED; + } + } + + for_each_pipe(pipe) { + uint32_t pipe_iir; + + if (!(master_ctl & GEN8_DE_PIPE_IRQ(pipe))) + continue; + + pipe_iir = I915_READ(GEN8_DE_PIPE_IIR(pipe)); + if (pipe_iir & GEN8_PIPE_VBLANK) + drm_handle_vblank(dev, pipe); + + if (pipe_iir & GEN8_PIPE_FLIP_DONE) { + intel_prepare_page_flip(dev, pipe); + intel_finish_page_flip_plane(dev, pipe); + } + + if (pipe_iir & GEN8_PIPE_CDCLK_CRC_DONE) + hsw_pipe_crc_irq_handler(dev, pipe); + + if (pipe_iir & GEN8_PIPE_FIFO_UNDERRUN) { + if (intel_set_cpu_fifo_underrun_reporting(dev, pipe, + false)) + DRM_DEBUG_DRIVER("Pipe %c FIFO underrun\n", + pipe_name(pipe)); + } + + if (pipe_iir & GEN8_DE_PIPE_IRQ_FAULT_ERRORS) { + DRM_ERROR("Fault errors on pipe %c\n: 0x%08x", + pipe_name(pipe), + pipe_iir & GEN8_DE_PIPE_IRQ_FAULT_ERRORS); + } + + if (pipe_iir) { + ret = IRQ_HANDLED; + I915_WRITE(GEN8_DE_PIPE_IIR(pipe), pipe_iir); + } else + DRM_ERROR("The master control interrupt lied (DE PIPE)!\n"); + } + + if (!HAS_PCH_NOP(dev) && master_ctl & GEN8_DE_PCH_IRQ) { + /* + * FIXME(BDW): Assume for now that the new interrupt handling + * scheme also closed the SDE interrupt handling race we've seen + * on older pch-split platforms. But this needs testing. + */ + u32 pch_iir = I915_READ(SDEIIR); + + cpt_irq_handler(dev, pch_iir); + + if (pch_iir) { + I915_WRITE(SDEIIR, pch_iir); + ret = IRQ_HANDLED; + } + } + + I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL); + POSTING_READ(GEN8_MASTER_IRQ); + + return ret; +} + static void i915_error_wake_up(struct drm_i915_private *dev_priv, bool reset_completed) { @@ -2077,6 +2255,22 @@ static int valleyview_enable_vblank(struct drm_device *dev, int pipe) return 0; } +static int gen8_enable_vblank(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + + if (!i915_pipe_enabled(dev, pipe)) + return -EINVAL; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + dev_priv->de_irq_mask[pipe] &= ~GEN8_PIPE_VBLANK; + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]); + POSTING_READ(GEN8_DE_PIPE_IMR(pipe)); + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + return 0; +} + /* Called from drm generic code, passed 'crtc' which * we use as a pipe index */ @@ -2125,6 +2319,21 @@ static void valleyview_disable_vblank(struct drm_device *dev, int pipe) spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); } +static void gen8_disable_vblank(struct drm_device *dev, int pipe) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long irqflags; + + if (!i915_pipe_enabled(dev, pipe)) + return; + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + dev_priv->de_irq_mask[pipe] |= GEN8_PIPE_VBLANK; + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]); + POSTING_READ(GEN8_DE_PIPE_IMR(pipe)); + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); +} + static u32 ring_last_seqno(struct intel_ring_buffer *ring) { @@ -2459,6 +2668,53 @@ static void valleyview_irq_preinstall(struct drm_device *dev) POSTING_READ(VLV_IER); } +static void gen8_irq_preinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + atomic_set(&dev_priv->irq_received, 0); + + I915_WRITE(GEN8_MASTER_IRQ, 0); + POSTING_READ(GEN8_MASTER_IRQ); + + /* IIR can theoretically queue up two events. Be paranoid */ +#define GEN8_IRQ_INIT_NDX(type, which) do { \ + I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \ + POSTING_READ(GEN8_##type##_IMR(which)); \ + I915_WRITE(GEN8_##type##_IER(which), 0); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + POSTING_READ(GEN8_##type##_IIR(which)); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + } while (0) + +#define GEN8_IRQ_INIT(type) do { \ + I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \ + POSTING_READ(GEN8_##type##_IMR); \ + I915_WRITE(GEN8_##type##_IER, 0); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + POSTING_READ(GEN8_##type##_IIR); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + } while (0) + + GEN8_IRQ_INIT_NDX(GT, 0); + GEN8_IRQ_INIT_NDX(GT, 1); + GEN8_IRQ_INIT_NDX(GT, 2); + GEN8_IRQ_INIT_NDX(GT, 3); + + for_each_pipe(pipe) { + GEN8_IRQ_INIT_NDX(DE_PIPE, pipe); + } + + GEN8_IRQ_INIT(DE_PORT); + GEN8_IRQ_INIT(DE_MISC); + GEN8_IRQ_INIT(PCU); +#undef GEN8_IRQ_INIT +#undef GEN8_IRQ_INIT_NDX + + POSTING_READ(GEN8_PCU_IIR); +} + static void ibx_hpd_irq_setup(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -2664,6 +2920,117 @@ static int valleyview_irq_postinstall(struct drm_device *dev) return 0; } +static void gen8_gt_irq_postinstall(struct drm_i915_private *dev_priv) +{ + int i; + + /* These are interrupts we'll toggle with the ring mask register */ + uint32_t gt_interrupts[] = { + GT_RENDER_USER_INTERRUPT << GEN8_RCS_IRQ_SHIFT | + GT_RENDER_L3_PARITY_ERROR_INTERRUPT | + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT, + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT | + GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT, + 0, + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT + }; + + for (i = 0; i < ARRAY_SIZE(gt_interrupts); i++) { + u32 tmp = I915_READ(GEN8_GT_IIR(i)); + if (tmp) + DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n", + i, tmp); + I915_WRITE(GEN8_GT_IMR(i), ~gt_interrupts[i]); + I915_WRITE(GEN8_GT_IER(i), gt_interrupts[i]); + } + POSTING_READ(GEN8_GT_IER(0)); +} + +static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = dev_priv->dev; + uint32_t de_pipe_masked = GEN8_PIPE_FLIP_DONE | + GEN8_PIPE_CDCLK_CRC_DONE | + GEN8_PIPE_FIFO_UNDERRUN | + GEN8_DE_PIPE_IRQ_FAULT_ERRORS; + uint32_t de_pipe_enables = de_pipe_masked | GEN8_PIPE_VBLANK; + int pipe; + dev_priv->de_irq_mask[PIPE_A] = ~de_pipe_masked; + dev_priv->de_irq_mask[PIPE_B] = ~de_pipe_masked; + dev_priv->de_irq_mask[PIPE_C] = ~de_pipe_masked; + + for_each_pipe(pipe) { + u32 tmp = I915_READ(GEN8_DE_PIPE_IIR(pipe)); + if (tmp) + DRM_ERROR("Interrupt (%d) should have been masked in pre-install 0x%08x\n", + pipe, tmp); + I915_WRITE(GEN8_DE_PIPE_IMR(pipe), dev_priv->de_irq_mask[pipe]); + I915_WRITE(GEN8_DE_PIPE_IER(pipe), de_pipe_enables); + } + POSTING_READ(GEN8_DE_PIPE_ISR(0)); + + I915_WRITE(GEN8_DE_PORT_IMR, ~GEN8_AUX_CHANNEL_A); + I915_WRITE(GEN8_DE_PORT_IER, GEN8_AUX_CHANNEL_A); + POSTING_READ(GEN8_DE_PORT_IER); +} + +static int gen8_irq_postinstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + gen8_gt_irq_postinstall(dev_priv); + gen8_de_irq_postinstall(dev_priv); + + ibx_irq_postinstall(dev); + + I915_WRITE(GEN8_MASTER_IRQ, DE_MASTER_IRQ_CONTROL); + POSTING_READ(GEN8_MASTER_IRQ); + + return 0; +} + +static void gen8_irq_uninstall(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + int pipe; + + if (!dev_priv) + return; + + atomic_set(&dev_priv->irq_received, 0); + + I915_WRITE(GEN8_MASTER_IRQ, 0); + +#define GEN8_IRQ_FINI_NDX(type, which) do { \ + I915_WRITE(GEN8_##type##_IMR(which), 0xffffffff); \ + I915_WRITE(GEN8_##type##_IER(which), 0); \ + I915_WRITE(GEN8_##type##_IIR(which), 0xffffffff); \ + } while (0) + +#define GEN8_IRQ_FINI(type) do { \ + I915_WRITE(GEN8_##type##_IMR, 0xffffffff); \ + I915_WRITE(GEN8_##type##_IER, 0); \ + I915_WRITE(GEN8_##type##_IIR, 0xffffffff); \ + } while (0) + + GEN8_IRQ_FINI_NDX(GT, 0); + GEN8_IRQ_FINI_NDX(GT, 1); + GEN8_IRQ_FINI_NDX(GT, 2); + GEN8_IRQ_FINI_NDX(GT, 3); + + for_each_pipe(pipe) { + GEN8_IRQ_FINI_NDX(DE_PIPE, pipe); + } + + GEN8_IRQ_FINI(DE_PORT); + GEN8_IRQ_FINI(DE_MISC); + GEN8_IRQ_FINI(PCU); +#undef GEN8_IRQ_FINI +#undef GEN8_IRQ_FINI_NDX + + POSTING_READ(GEN8_PCU_IIR); +} + static void valleyview_irq_uninstall(struct drm_device *dev) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; @@ -3443,6 +3810,14 @@ void intel_irq_init(struct drm_device *dev) dev->driver->enable_vblank = valleyview_enable_vblank; dev->driver->disable_vblank = valleyview_disable_vblank; dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup; + } else if (IS_GEN8(dev)) { + dev->driver->irq_handler = gen8_irq_handler; + dev->driver->irq_preinstall = gen8_irq_preinstall; + dev->driver->irq_postinstall = gen8_irq_postinstall; + dev->driver->irq_uninstall = gen8_irq_uninstall; + dev->driver->enable_vblank = gen8_enable_vblank; + dev->driver->disable_vblank = gen8_disable_vblank; + dev_priv->display.hpd_irq_setup = ibx_hpd_irq_setup; } else if (HAS_PCH_SPLIT(dev)) { dev->driver->irq_handler = ironlake_irq_handler; dev->driver->irq_preinstall = ironlake_irq_preinstall; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 04896da..f9eafb6 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -110,6 +110,9 @@ #define RING_PP_DIR_DCLV(ring) ((ring)->mmio_base+0x220) #define PP_DIR_DCLV_2G 0xffffffff +#define GEN8_RING_PDP_UDW(ring, n) ((ring)->mmio_base+0x270 + ((n) * 8 + 4)) +#define GEN8_RING_PDP_LDW(ring, n) ((ring)->mmio_base+0x270 + (n) * 8) + #define GAM_ECOCHK 0x4090 #define ECOCHK_SNB_BIT (1<<10) #define HSW_ECOCHK_ARB_PRIO_SOL (1<<6) @@ -247,6 +250,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) #define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ #define MI_SEMAPHORE_GLOBAL_GTT (1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) @@ -655,6 +659,9 @@ #define ARB_MODE 0x04030 #define ARB_MODE_SWIZZLE_SNB (1<<4) #define ARB_MODE_SWIZZLE_IVB (1<<5) +#define GAMTARBMODE 0x04a08 +#define ARB_MODE_BWGTLB_DISABLE (1<<9) +#define ARB_MODE_SWIZZLE_BDW (1<<1) #define RENDER_HWS_PGA_GEN7 (0x04080) #define RING_FAULT_REG(ring) (0x4094 + 0x100*(ring)->id) #define RING_FAULT_GTTSEL_MASK (1<<11) @@ -662,6 +669,7 @@ #define RING_FAULT_FAULT_TYPE(x) ((x >> 1) & 0x3) #define RING_FAULT_VALID (1<<0) #define DONE_REG 0x40b0 +#define GEN8_PRIVATE_PAT 0x40e0 #define BSD_HWS_PGA_GEN7 (0x04180) #define BLT_HWS_PGA_GEN7 (0x04280) #define VEBOX_HWS_PGA_GEN7 (0x04380) @@ -741,6 +749,7 @@ #define FPGA_DBG_RM_NOCLAIM (1<<31) #define DERRMR 0x44050 +/* Note that HBLANK events are reserved on bdw+ */ #define DERRMR_PIPEA_SCANLINE (1<<0) #define DERRMR_PIPEA_PRI_FLIP_DONE (1<<1) #define DERRMR_PIPEA_SPR_FLIP_DONE (1<<2) @@ -774,6 +783,7 @@ #define _3D_CHICKEN3 0x02090 #define _3D_CHICKEN_SF_DISABLE_OBJEND_CULL (1 << 10) #define _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL (1 << 5) +#define _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x) ((x)<<1) #define MI_MODE 0x0209c # define VS_TIMER_DISPATCH (1 << 6) @@ -1820,6 +1830,9 @@ * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) +/* Same as Haswell, but 72064 bytes now. */ +#define GEN8_CXT_TOTAL_SIZE (18 * PAGE_SIZE) + #define VLV_CLK_CTL2 0x101104 #define CLK_CTL2_CZCOUNT_30NS_SHIFT 28 @@ -1950,8 +1963,8 @@ #define BCLRPAT(pipe) _PIPE(pipe, _BCLRPAT_A, _BCLRPAT_B) #define VSYNCSHIFT(trans) _TRANSCODER(trans, _VSYNCSHIFT_A, _VSYNCSHIFT_B) -/* HSW eDP PSR registers */ -#define EDP_PSR_BASE(dev) 0x64800 +/* HSW+ eDP PSR registers */ +#define EDP_PSR_BASE(dev) (IS_HASWELL(dev) ? 0x64800 : 0x6f800) #define EDP_PSR_CTL(dev) (EDP_PSR_BASE(dev) + 0) #define EDP_PSR_ENABLE (1<<31) #define EDP_PSR_LINK_DISABLE (0<<27) @@ -3241,6 +3254,18 @@ #define PIPEFRAMEPIXEL(pipe) _PIPE(pipe, _PIPEAFRAMEPIXEL, _PIPEBFRAMEPIXEL) #define PIPESTAT(pipe) _PIPE(pipe, _PIPEASTAT, _PIPEBSTAT) +#define _PIPE_MISC_A 0x70030 +#define _PIPE_MISC_B 0x71030 +#define PIPEMISC_DITHER_BPC_MASK (7<<5) +#define PIPEMISC_DITHER_8_BPC (0<<5) +#define PIPEMISC_DITHER_10_BPC (1<<5) +#define PIPEMISC_DITHER_6_BPC (2<<5) +#define PIPEMISC_DITHER_12_BPC (3<<5) +#define PIPEMISC_DITHER_ENABLE (1<<4) +#define PIPEMISC_DITHER_TYPE_MASK (3<<2) +#define PIPEMISC_DITHER_TYPE_SP (0<<2) +#define PIPEMISC(pipe) _PIPE(pipe, _PIPE_MISC_A, _PIPE_MISC_B) + #define VLV_DPFLIPSTAT (VLV_DISPLAY_BASE + 0x70028) #define PIPEB_LINE_COMPARE_INT_EN (1<<29) #define PIPEB_HLINE_INT_EN (1<<28) @@ -3371,6 +3396,7 @@ #define WM1_LP_LATENCY_MASK (0x7f<<24) #define WM1_LP_FBC_MASK (0xf<<20) #define WM1_LP_FBC_SHIFT 20 +#define WM1_LP_FBC_SHIFT_BDW 19 #define WM1_LP_SR_MASK (0x7ff<<8) #define WM1_LP_SR_SHIFT 8 #define WM1_LP_CURSOR_MASK (0xff) @@ -4011,6 +4037,71 @@ #define GTIIR 0x44018 #define GTIER 0x4401c +#define GEN8_MASTER_IRQ 0x44200 +#define GEN8_MASTER_IRQ_CONTROL (1<<31) +#define GEN8_PCU_IRQ (1<<30) +#define GEN8_DE_PCH_IRQ (1<<23) +#define GEN8_DE_MISC_IRQ (1<<22) +#define GEN8_DE_PORT_IRQ (1<<20) +#define GEN8_DE_PIPE_C_IRQ (1<<18) +#define GEN8_DE_PIPE_B_IRQ (1<<17) +#define GEN8_DE_PIPE_A_IRQ (1<<16) +#define GEN8_DE_PIPE_IRQ(pipe) (1<<(16+pipe)) +#define GEN8_GT_VECS_IRQ (1<<6) +#define GEN8_GT_VCS2_IRQ (1<<3) +#define GEN8_GT_VCS1_IRQ (1<<2) +#define GEN8_GT_BCS_IRQ (1<<1) +#define GEN8_GT_RCS_IRQ (1<<0) + +#define GEN8_GT_ISR(which) (0x44300 + (0x10 * (which))) +#define GEN8_GT_IMR(which) (0x44304 + (0x10 * (which))) +#define GEN8_GT_IIR(which) (0x44308 + (0x10 * (which))) +#define GEN8_GT_IER(which) (0x4430c + (0x10 * (which))) + +#define GEN8_BCS_IRQ_SHIFT 16 +#define GEN8_RCS_IRQ_SHIFT 0 +#define GEN8_VCS2_IRQ_SHIFT 16 +#define GEN8_VCS1_IRQ_SHIFT 0 +#define GEN8_VECS_IRQ_SHIFT 0 + +#define GEN8_DE_PIPE_ISR(pipe) (0x44400 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IMR(pipe) (0x44404 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IIR(pipe) (0x44408 + (0x10 * (pipe))) +#define GEN8_DE_PIPE_IER(pipe) (0x4440c + (0x10 * (pipe))) +#define GEN8_PIPE_FIFO_UNDERRUN (1 << 31) +#define GEN8_PIPE_CDCLK_CRC_ERROR (1 << 29) +#define GEN8_PIPE_CDCLK_CRC_DONE (1 << 28) +#define GEN8_PIPE_CURSOR_FAULT (1 << 10) +#define GEN8_PIPE_SPRITE_FAULT (1 << 9) +#define GEN8_PIPE_PRIMARY_FAULT (1 << 8) +#define GEN8_PIPE_SPRITE_FLIP_DONE (1 << 5) +#define GEN8_PIPE_FLIP_DONE (1 << 4) +#define GEN8_PIPE_SCAN_LINE_EVENT (1 << 2) +#define GEN8_PIPE_VSYNC (1 << 1) +#define GEN8_PIPE_VBLANK (1 << 0) +#define GEN8_DE_PIPE_IRQ_FAULT_ERRORS \ + (GEN8_PIPE_CURSOR_FAULT | \ + GEN8_PIPE_SPRITE_FAULT | \ + GEN8_PIPE_PRIMARY_FAULT) + +#define GEN8_DE_PORT_ISR 0x44440 +#define GEN8_DE_PORT_IMR 0x44444 +#define GEN8_DE_PORT_IIR 0x44448 +#define GEN8_DE_PORT_IER 0x4444c +#define GEN8_PORT_DP_A_HOTPLUG (1 << 3) +#define GEN8_AUX_CHANNEL_A (1 << 0) + +#define GEN8_DE_MISC_ISR 0x44460 +#define GEN8_DE_MISC_IMR 0x44464 +#define GEN8_DE_MISC_IIR 0x44468 +#define GEN8_DE_MISC_IER 0x4446c +#define GEN8_DE_MISC_GSE (1 << 27) + +#define GEN8_PCU_ISR 0x444e0 +#define GEN8_PCU_IMR 0x444e4 +#define GEN8_PCU_IIR 0x444e8 +#define GEN8_PCU_IER 0x444ec + #define ILK_DISPLAY_CHICKEN2 0x42004 /* Required on all Ironlake and Sandybridge according to the B-Spec. */ #define ILK_ELPIN_409_SELECT (1 << 25) @@ -4036,8 +4127,14 @@ # define CHICKEN3_DGMG_DONE_FIX_DISABLE (1 << 2) #define CHICKEN_PAR1_1 0x42080 +#define DPA_MASK_VBLANK_SRD (1 << 15) #define FORCE_ARB_IDLE_PLANES (1 << 14) +#define _CHICKEN_PIPESL_1_A 0x420b0 +#define _CHICKEN_PIPESL_1_B 0x420b4 +#define DPRS_MASK_VBLANK_SRD (1 << 0) +#define CHICKEN_PIPESL_1(pipe) _PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B) + #define DISP_ARB_CTL 0x45000 #define DISP_TILE_SURFACE_SWIZZLING (1<<13) #define DISP_FBC_WM_DIS (1<<15) @@ -4048,6 +4145,8 @@ /* GEN7 chicken */ #define GEN7_COMMON_SLICE_CHICKEN1 0x7010 # define GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC ((1<<10) | (1<<26)) +#define COMMON_SLICE_CHICKEN2 0x7014 +# define GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE (1<<0) #define GEN7_L3CNTLREG1 0xB01C #define GEN7_WA_FOR_GEN7_L3_CONTROL 0x3C4FFF8C @@ -4876,6 +4975,7 @@ #define GEN6_PCODE_WRITE_D_COMP 0x11 #define GEN6_ENCODE_RC6_VID(mv) (((mv) - 245) / 5) #define GEN6_DECODE_RC6_VID(vids) (((vids) * 5) + 245) +#define DISPLAY_IPS_CONTROL 0x19 #define GEN6_PCODE_DATA 0x138128 #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 @@ -4913,6 +5013,7 @@ #define GEN7_HALF_SLICE_CHICKEN1 0xe100 /* IVB GT1 + VLV */ #define GEN7_HALF_SLICE_CHICKEN1_GT2 0xf100 #define GEN7_MAX_PS_THREAD_DEP (8<<12) +#define GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE (1<<10) #define GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE (1<<3) #define GEN7_ROW_CHICKEN2 0xe4f4 @@ -4922,6 +5023,10 @@ #define HSW_ROW_CHICKEN3 0xe49c #define HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE (1 << 6) +#define HALF_SLICE_CHICKEN3 0xe184 +#define GEN8_CENTROID_PIXEL_OPT_DIS (1<<8) +#define GEN8_SAMPLER_POWER_BYPASS_DIS (1<<1) + #define G4X_AUD_VID_DID (dev_priv->info->display_mmio_offset + 0x62020) #define INTEL_AUDIO_DEVCL 0x808629FB #define INTEL_AUDIO_DEVBLC 0x80862801 @@ -5139,6 +5244,7 @@ #define DDI_BUF_CTL_B 0x64100 #define DDI_BUF_CTL(port) _PORT(port, DDI_BUF_CTL_A, DDI_BUF_CTL_B) #define DDI_BUF_CTL_ENABLE (1<<31) +/* Haswell */ #define DDI_BUF_EMP_400MV_0DB_HSW (0<<24) /* Sel0 */ #define DDI_BUF_EMP_400MV_3_5DB_HSW (1<<24) /* Sel1 */ #define DDI_BUF_EMP_400MV_6DB_HSW (2<<24) /* Sel2 */ @@ -5148,6 +5254,16 @@ #define DDI_BUF_EMP_600MV_6DB_HSW (6<<24) /* Sel6 */ #define DDI_BUF_EMP_800MV_0DB_HSW (7<<24) /* Sel7 */ #define DDI_BUF_EMP_800MV_3_5DB_HSW (8<<24) /* Sel8 */ +/* Broadwell */ +#define DDI_BUF_EMP_400MV_0DB_BDW (0<<24) /* Sel0 */ +#define DDI_BUF_EMP_400MV_3_5DB_BDW (1<<24) /* Sel1 */ +#define DDI_BUF_EMP_400MV_6DB_BDW (2<<24) /* Sel2 */ +#define DDI_BUF_EMP_600MV_0DB_BDW (3<<24) /* Sel3 */ +#define DDI_BUF_EMP_600MV_3_5DB_BDW (4<<24) /* Sel4 */ +#define DDI_BUF_EMP_600MV_6DB_BDW (5<<24) /* Sel5 */ +#define DDI_BUF_EMP_800MV_0DB_BDW (6<<24) /* Sel6 */ +#define DDI_BUF_EMP_800MV_3_5DB_BDW (7<<24) /* Sel7 */ +#define DDI_BUF_EMP_1200MV_0DB_BDW (8<<24) /* Sel8 */ #define DDI_BUF_EMP_MASK (0xf<<24) #define DDI_BUF_PORT_REVERSAL (1<<16) #define DDI_BUF_IS_IDLE (1<<7) @@ -5257,6 +5373,9 @@ #define LCPLL_PLL_LOCK (1<<30) #define LCPLL_CLK_FREQ_MASK (3<<26) #define LCPLL_CLK_FREQ_450 (0<<26) +#define LCPLL_CLK_FREQ_54O_BDW (1<<26) +#define LCPLL_CLK_FREQ_337_5_BDW (2<<26) +#define LCPLL_CLK_FREQ_675_BDW (3<<26) #define LCPLL_CD_CLOCK_DISABLE (1<<25) #define LCPLL_CD2X_CLOCK_DISABLE (1<<23) #define LCPLL_POWER_DOWN_ALLOW (1<<22) diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 2e01bd3..b5b1b9b 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -822,16 +822,15 @@ void intel_crt_init(struct drm_device *dev) crt->base.mode_set = intel_crt_mode_set; crt->base.disable = intel_disable_crt; crt->base.enable = intel_enable_crt; - if (IS_HASWELL(dev)) - crt->base.get_config = hsw_crt_get_config; - else - crt->base.get_config = intel_crt_get_config; if (I915_HAS_HOTPLUG(dev)) crt->base.hpd_pin = HPD_CRT; - if (HAS_DDI(dev)) + if (HAS_DDI(dev)) { + crt->base.get_config = hsw_crt_get_config; crt->base.get_hw_state = intel_ddi_get_hw_state; - else + } else { + crt->base.get_config = intel_crt_get_config; crt->base.get_hw_state = intel_crt_get_hw_state; + } intel_connector->get_hw_state = intel_connector_get_hw_state; drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs); diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 31f4fe2..1591576 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -72,6 +72,45 @@ static const u32 hsw_ddi_translations_hdmi[] = { 0x80FFFFFF, 0x00030002, /* 11: 1000 1000 0 */ }; +static const u32 bdw_ddi_translations_edp[] = { + 0x00FFFFFF, 0x00000012, /* DP parameters */ + 0x00EBAFFF, 0x00020011, + 0x00C71FFF, 0x0006000F, + 0x00FFFFFF, 0x00020011, + 0x00DB6FFF, 0x0005000F, + 0x00BEEFFF, 0x000A000C, + 0x00FFFFFF, 0x0005000F, + 0x00DB6FFF, 0x000A000C, + 0x00FFFFFF, 0x000A000C, + 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ +}; + +static const u32 bdw_ddi_translations_dp[] = { + 0x00FFFFFF, 0x0007000E, /* DP parameters */ + 0x00D75FFF, 0x000E000A, + 0x00BEFFFF, 0x00140006, + 0x00FFFFFF, 0x000E000A, + 0x00D75FFF, 0x00180004, + 0x80CB2FFF, 0x001B0002, + 0x00F7DFFF, 0x00180004, + 0x80D75FFF, 0x001B0002, + 0x80FFFFFF, 0x001B0002, + 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ +}; + +static const u32 bdw_ddi_translations_fdi[] = { + 0x00FFFFFF, 0x0001000E, /* FDI parameters */ + 0x00D75FFF, 0x0004000A, + 0x00C30FFF, 0x00070006, + 0x00AAAFFF, 0x000C0000, + 0x00FFFFFF, 0x0004000A, + 0x00D75FFF, 0x00090004, + 0x00C30FFF, 0x000C0000, + 0x00FFFFFF, 0x00070006, + 0x00D75FFF, 0x000C0000, + 0x00FFFFFF, 0x00140006 /* HDMI parameters 800mV 0dB*/ +}; + enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) { struct drm_encoder *encoder = &intel_encoder->base; @@ -92,8 +131,9 @@ enum port intel_ddi_get_encoder_port(struct intel_encoder *intel_encoder) } } -/* On Haswell, DDI port buffers must be programmed with correct values - * in advance. The buffer values are different for FDI and DP modes, +/* + * Starting with Haswell, DDI port buffers must be programmed with correct + * values in advance. The buffer values are different for FDI and DP modes, * but the HDMI/DVI fields are shared among those. So we program the DDI * in either FDI or DP modes only, as HDMI connections will work with both * of those @@ -103,10 +143,47 @@ static void intel_prepare_ddi_buffers(struct drm_device *dev, enum port port) struct drm_i915_private *dev_priv = dev->dev_private; u32 reg; int i; - const u32 *ddi_translations = (port == PORT_E) ? - hsw_ddi_translations_fdi : - hsw_ddi_translations_dp; int hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + const u32 *ddi_translations_fdi; + const u32 *ddi_translations_dp; + const u32 *ddi_translations_edp; + const u32 *ddi_translations; + + if (IS_BROADWELL(dev)) { + ddi_translations_fdi = bdw_ddi_translations_fdi; + ddi_translations_dp = bdw_ddi_translations_dp; + ddi_translations_edp = bdw_ddi_translations_edp; + } else if (IS_HASWELL(dev)) { + ddi_translations_fdi = hsw_ddi_translations_fdi; + ddi_translations_dp = hsw_ddi_translations_dp; + ddi_translations_edp = hsw_ddi_translations_dp; + } else { + WARN(1, "ddi translation table missing\n"); + ddi_translations_edp = bdw_ddi_translations_dp; + ddi_translations_fdi = bdw_ddi_translations_fdi; + ddi_translations_dp = bdw_ddi_translations_dp; + } + + switch (port) { + case PORT_A: + ddi_translations = ddi_translations_edp; + break; + case PORT_B: + case PORT_C: + ddi_translations = ddi_translations_dp; + break; + case PORT_D: + if (intel_dpd_is_edp(dev)) + ddi_translations = ddi_translations_edp; + else + ddi_translations = ddi_translations_dp; + break; + case PORT_E: + ddi_translations = ddi_translations_fdi; + break; + default: + BUG(); + } for (i = 0, reg = DDI_BUF_TRANS(port); i < ARRAY_SIZE(hsw_ddi_translations_fdi); i++) { @@ -756,7 +833,8 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_encoder *intel_encoder = intel_ddi_get_crtc_encoder(crtc); struct drm_encoder *encoder = &intel_encoder->base; - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; enum pipe pipe = intel_crtc->pipe; enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder; enum port port = intel_ddi_get_encoder_port(intel_encoder); @@ -792,10 +870,11 @@ void intel_ddi_enable_transcoder_func(struct drm_crtc *crtc) if (cpu_transcoder == TRANSCODER_EDP) { switch (pipe) { case PIPE_A: - /* Can only use the always-on power well for eDP when - * not using the panel fitter, and when not using motion - * blur mitigation (which we don't support). */ - if (intel_crtc->config.pch_pfit.enabled) + /* On Haswell, can only use the always-on power well for + * eDP when not using the panel fitter, and when not + * using motion blur mitigation (which we don't + * support). */ + if (IS_HASWELL(dev) && intel_crtc->config.pch_pfit.enabled) temp |= TRANS_DDI_EDP_INPUT_A_ONOFF; else temp |= TRANS_DDI_EDP_INPUT_A_ON; @@ -1156,18 +1235,29 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder) int intel_ddi_get_cdclk_freq(struct drm_i915_private *dev_priv) { + struct drm_device *dev = dev_priv->dev; uint32_t lcpll = I915_READ(LCPLL_CTL); + uint32_t freq = lcpll & LCPLL_CLK_FREQ_MASK; - if (lcpll & LCPLL_CD_SOURCE_FCLK) + if (lcpll & LCPLL_CD_SOURCE_FCLK) { return 800000; - else if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT) + } else if (I915_READ(HSW_FUSE_STRAP) & HSW_CDCLK_LIMIT) { return 450000; - else if ((lcpll & LCPLL_CLK_FREQ_MASK) == LCPLL_CLK_FREQ_450) + } else if (freq == LCPLL_CLK_FREQ_450) { return 450000; - else if (IS_ULT(dev_priv->dev)) - return 337500; - else - return 540000; + } else if (IS_HASWELL(dev)) { + if (IS_ULT(dev)) + return 337500; + else + return 540000; + } else { + if (freq == LCPLL_CLK_FREQ_54O_BDW) + return 540000; + else if (freq == LCPLL_CLK_FREQ_337_5_BDW) + return 337500; + else + return 675000; + } } void intel_ddi_pll_init(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index e92f170..3cddd50 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -2156,7 +2156,7 @@ static int ironlake_update_plane(struct drm_crtc *crtc, else dspcntr &= ~DISPPLANE_TILED; - if (IS_HASWELL(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) dspcntr &= ~DISPPLANE_TRICKLE_FEED_DISABLE; else dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; @@ -2176,7 +2176,7 @@ static int ironlake_update_plane(struct drm_crtc *crtc, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_MODIFY_DISPBASE(DSPSURF(plane), i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); - if (IS_HASWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); } else { I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); @@ -3393,15 +3393,26 @@ void hsw_enable_ips(struct intel_crtc *crtc) * only after intel_enable_plane. And intel_enable_plane already waits * for a vblank, so all we need to do here is to enable the IPS bit. */ assert_plane_enabled(dev_priv, crtc->plane); - I915_WRITE(IPS_CTL, IPS_ENABLE); - - /* The bit only becomes 1 in the next vblank, so this wait here is - * essentially intel_wait_for_vblank. If we don't have this and don't - * wait for vblanks until the end of crtc_enable, then the HW state - * readout code will complain that the expected IPS_CTL value is not the - * one we read. */ - if (wait_for(I915_READ_NOTRACE(IPS_CTL) & IPS_ENABLE, 50)) - DRM_ERROR("Timed out waiting for IPS enable\n"); + if (IS_BROADWELL(crtc->base.dev)) { + mutex_lock(&dev_priv->rps.hw_lock); + WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0xc0000000)); + mutex_unlock(&dev_priv->rps.hw_lock); + /* Quoting Art Runyan: "its not safe to expect any particular + * value in IPS_CTL bit 31 after enabling IPS through the + * mailbox." Therefore we need to defer waiting on the state + * change. + * TODO: need to fix this for state checker + */ + } else { + I915_WRITE(IPS_CTL, IPS_ENABLE); + /* The bit only becomes 1 in the next vblank, so this wait here + * is essentially intel_wait_for_vblank. If we don't have this + * and don't wait for vblanks until the end of crtc_enable, then + * the HW state readout code will complain that the expected + * IPS_CTL value is not the one we read. */ + if (wait_for(I915_READ_NOTRACE(IPS_CTL) & IPS_ENABLE, 50)) + DRM_ERROR("Timed out waiting for IPS enable\n"); + } } void hsw_disable_ips(struct intel_crtc *crtc) @@ -3413,7 +3424,12 @@ void hsw_disable_ips(struct intel_crtc *crtc) return; assert_plane_enabled(dev_priv, crtc->plane); - I915_WRITE(IPS_CTL, 0); + if (IS_BROADWELL(crtc->base.dev)) { + mutex_lock(&dev_priv->rps.hw_lock); + WARN_ON(sandybridge_pcode_write(dev_priv, DISPLAY_IPS_CONTROL, 0)); + mutex_unlock(&dev_priv->rps.hw_lock); + } else + I915_WRITE(IPS_CTL, 0); POSTING_READ(IPS_CTL); /* We need to wait for a vblank before we can disable the plane. */ @@ -4244,7 +4260,7 @@ static bool ironlake_check_fdi_lanes(struct drm_device *dev, enum pipe pipe, return false; } - if (IS_HASWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { if (pipe_config->fdi_lanes > 2) { DRM_DEBUG_KMS("only 2 lanes on haswell, required: %i lanes\n", pipe_config->fdi_lanes); @@ -5818,14 +5834,16 @@ static void intel_set_pipe_csc(struct drm_crtc *crtc) static void haswell_set_pipeconf(struct drm_crtc *crtc) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + enum pipe pipe = intel_crtc->pipe; enum transcoder cpu_transcoder = intel_crtc->config.cpu_transcoder; uint32_t val; val = 0; - if (intel_crtc->config.dither) + if (IS_HASWELL(dev) && intel_crtc->config.dither) val |= (PIPECONF_DITHER_EN | PIPECONF_DITHER_TYPE_SP); if (intel_crtc->config.adjusted_mode.flags & DRM_MODE_FLAG_INTERLACE) @@ -5838,6 +5856,33 @@ static void haswell_set_pipeconf(struct drm_crtc *crtc) I915_WRITE(GAMMA_MODE(intel_crtc->pipe), GAMMA_MODE_MODE_8BIT); POSTING_READ(GAMMA_MODE(intel_crtc->pipe)); + + if (IS_BROADWELL(dev)) { + val = 0; + + switch (intel_crtc->config.pipe_bpp) { + case 18: + val |= PIPEMISC_DITHER_6_BPC; + break; + case 24: + val |= PIPEMISC_DITHER_8_BPC; + break; + case 30: + val |= PIPEMISC_DITHER_10_BPC; + break; + case 36: + val |= PIPEMISC_DITHER_12_BPC; + break; + default: + /* Case prevented by pipe_config_set_bpp. */ + BUG(); + } + + if (intel_crtc->config.dither) + val |= PIPEMISC_DITHER_ENABLE | PIPEMISC_DITHER_TYPE_SP; + + I915_WRITE(PIPEMISC(pipe), val); + } } static bool ironlake_compute_clocks(struct drm_crtc *crtc, @@ -7159,7 +7204,7 @@ static void ivb_update_cursor(struct drm_crtc *crtc, u32 base) cntl &= ~(CURSOR_MODE | MCURSOR_GAMMA_ENABLE); cntl |= CURSOR_MODE_DISABLE; } - if (IS_HASWELL(dev)) { + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { cntl |= CURSOR_PIPE_CSC_ENABLE; cntl &= ~CURSOR_TRICKLE_FEED_DISABLE; } @@ -7215,7 +7260,7 @@ static void intel_crtc_update_cursor(struct drm_crtc *crtc, if (!visible && !intel_crtc->cursor_visible) return; - if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { + if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev) || IS_BROADWELL(dev)) { I915_WRITE(CURPOS_IVB(pipe), pos); ivb_update_cursor(crtc, base); } else { @@ -10337,7 +10382,7 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.write_eld = ironlake_write_eld; dev_priv->display.modeset_global_resources = ivb_modeset_global_resources; - } else if (IS_HASWELL(dev)) { + } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { dev_priv->display.fdi_link_train = hsw_fdi_link_train; dev_priv->display.write_eld = haswell_write_eld; dev_priv->display.modeset_global_resources = @@ -10369,6 +10414,7 @@ static void intel_init_display(struct drm_device *dev) dev_priv->display.queue_flip = intel_gen6_queue_flip; break; case 7: + case 8: /* FIXME(BDW): Check that the gen8 RCS flip works. */ dev_priv->display.queue_flip = intel_gen7_queue_flip; break; } diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 045d464..eb8139d 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -405,6 +405,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, uint32_t status; int try, precharge, clock = 0; bool has_aux_irq = INTEL_INFO(dev)->gen >= 5 && !IS_VALLEYVIEW(dev); + uint32_t timeout; /* dp aux is extremely sensitive to irq latency, hence request the * lowest possible wakeup latency and so prevent the cpu from going into @@ -419,6 +420,11 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, else precharge = 5; + if (IS_BROADWELL(dev) && ch_ctl == DPA_AUX_CH_CTL) + timeout = DP_AUX_CH_CTL_TIME_OUT_600us; + else + timeout = DP_AUX_CH_CTL_TIME_OUT_400us; + intel_aux_display_runtime_get(dev_priv); /* Try to wait for any previous AUX channel activity */ @@ -454,7 +460,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp, I915_WRITE(ch_ctl, DP_AUX_CH_CTL_SEND_BUSY | (has_aux_irq ? DP_AUX_CH_CTL_INTERRUPT : 0) | - DP_AUX_CH_CTL_TIME_OUT_400us | + timeout | (send_bytes << DP_AUX_CH_CTL_MESSAGE_SIZE_SHIFT) | (precharge << DP_AUX_CH_CTL_PRECHARGE_2US_SHIFT) | (aux_clock_divider << DP_AUX_CH_CTL_BIT_CLOCK_2X_SHIFT) | @@ -1610,6 +1616,7 @@ static void intel_edp_psr_enable_source(struct intel_dp *intel_dp) uint32_t max_sleep_time = 0x1f; uint32_t idle_frames = 1; uint32_t val = 0x0; + const uint32_t link_entry_time = EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES; if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT) { val |= EDP_PSR_LINK_STANDBY; @@ -1620,7 +1627,7 @@ static void intel_edp_psr_enable_source(struct intel_dp *intel_dp) val |= EDP_PSR_LINK_DISABLE; I915_WRITE(EDP_PSR_CTL(dev), val | - EDP_PSR_MIN_LINK_ENTRY_TIME_8_LINES | + IS_BROADWELL(dev) ? 0 : link_entry_time | max_sleep_time << EDP_PSR_MAX_SLEEP_TIME_SHIFT | idle_frames << EDP_PSR_IDLE_FRAME_SHIFT | EDP_PSR_ENABLE); @@ -1957,7 +1964,7 @@ intel_dp_voltage_max(struct intel_dp *intel_dp) struct drm_device *dev = intel_dp_to_dev(intel_dp); enum port port = dp_to_dig_port(intel_dp)->port; - if (IS_VALLEYVIEW(dev)) + if (IS_VALLEYVIEW(dev) || IS_BROADWELL(dev)) return DP_TRAIN_VOLTAGE_SWING_1200; else if (IS_GEN7(dev) && port == PORT_A) return DP_TRAIN_VOLTAGE_SWING_800; @@ -1973,7 +1980,18 @@ intel_dp_pre_emphasis_max(struct intel_dp *intel_dp, uint8_t voltage_swing) struct drm_device *dev = intel_dp_to_dev(intel_dp); enum port port = dp_to_dig_port(intel_dp)->port; - if (HAS_DDI(dev)) { + if (IS_BROADWELL(dev)) { + switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { + case DP_TRAIN_VOLTAGE_SWING_400: + case DP_TRAIN_VOLTAGE_SWING_600: + return DP_TRAIN_PRE_EMPHASIS_6; + case DP_TRAIN_VOLTAGE_SWING_800: + return DP_TRAIN_PRE_EMPHASIS_3_5; + case DP_TRAIN_VOLTAGE_SWING_1200: + default: + return DP_TRAIN_PRE_EMPHASIS_0; + } + } else if (IS_HASWELL(dev)) { switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) { case DP_TRAIN_VOLTAGE_SWING_400: return DP_TRAIN_PRE_EMPHASIS_9_5; @@ -2285,6 +2303,41 @@ intel_hsw_signal_levels(uint8_t train_set) } } +static uint32_t +intel_bdw_signal_levels(uint8_t train_set) +{ + int signal_levels = train_set & (DP_TRAIN_VOLTAGE_SWING_MASK | + DP_TRAIN_PRE_EMPHASIS_MASK); + switch (signal_levels) { + case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_0: + return DDI_BUF_EMP_400MV_0DB_BDW; /* Sel0 */ + case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_3_5: + return DDI_BUF_EMP_400MV_3_5DB_BDW; /* Sel1 */ + case DP_TRAIN_VOLTAGE_SWING_400 | DP_TRAIN_PRE_EMPHASIS_6: + return DDI_BUF_EMP_400MV_6DB_BDW; /* Sel2 */ + + case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_0: + return DDI_BUF_EMP_600MV_0DB_BDW; /* Sel3 */ + case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_3_5: + return DDI_BUF_EMP_600MV_3_5DB_BDW; /* Sel4 */ + case DP_TRAIN_VOLTAGE_SWING_600 | DP_TRAIN_PRE_EMPHASIS_6: + return DDI_BUF_EMP_600MV_6DB_BDW; /* Sel5 */ + + case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_0: + return DDI_BUF_EMP_800MV_0DB_BDW; /* Sel6 */ + case DP_TRAIN_VOLTAGE_SWING_800 | DP_TRAIN_PRE_EMPHASIS_3_5: + return DDI_BUF_EMP_800MV_3_5DB_BDW; /* Sel7 */ + + case DP_TRAIN_VOLTAGE_SWING_1200 | DP_TRAIN_PRE_EMPHASIS_0: + return DDI_BUF_EMP_1200MV_0DB_BDW; /* Sel8 */ + + default: + DRM_DEBUG_KMS("Unsupported voltage swing/pre-emphasis level:" + "0x%x\n", signal_levels); + return DDI_BUF_EMP_400MV_0DB_BDW; /* Sel0 */ + } +} + /* Properly updates "DP" with the correct signal levels. */ static void intel_dp_set_signal_levels(struct intel_dp *intel_dp, uint32_t *DP) @@ -2295,7 +2348,10 @@ intel_dp_set_signal_levels(struct intel_dp *intel_dp, uint32_t *DP) uint32_t signal_levels, mask; uint8_t train_set = intel_dp->train_set[0]; - if (HAS_DDI(dev)) { + if (IS_BROADWELL(dev)) { + signal_levels = intel_bdw_signal_levels(train_set); + mask = DDI_BUF_EMP_MASK; + } else if (IS_HASWELL(dev)) { signal_levels = intel_hsw_signal_levels(train_set); mask = DDI_BUF_EMP_MASK; } else if (IS_VALLEYVIEW(dev)) { diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 51a8336..03f9ca7 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -847,7 +847,7 @@ static int hdmi_portclock_limit(struct intel_hdmi *hdmi) if (IS_G4X(dev)) return 165000; - else if (IS_HASWELL(dev)) + else if (IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8) return 300000; else return 225000; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 09ac9e7..0a07d7c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2291,7 +2291,9 @@ static uint32_t ilk_compute_fbc_wm(const struct hsw_pipe_wm_parameters *params, static unsigned int ilk_display_fifo_size(const struct drm_device *dev) { - if (INTEL_INFO(dev)->gen >= 7) + if (INTEL_INFO(dev)->gen >= 8) + return 3072; + else if (INTEL_INFO(dev)->gen >= 7) return 768; else return 512; @@ -2336,7 +2338,9 @@ static unsigned int ilk_plane_wm_max(const struct drm_device *dev, } /* clamp to max that the registers can hold */ - if (INTEL_INFO(dev)->gen >= 7) + if (INTEL_INFO(dev)->gen >= 8) + max = level == 0 ? 255 : 2047; + else if (INTEL_INFO(dev)->gen >= 7) /* IVB/HSW primary/sprite plane watermarks */ max = level == 0 ? 127 : 1023; else if (!is_sprite) @@ -2366,10 +2370,13 @@ static unsigned int ilk_cursor_wm_max(const struct drm_device *dev, } /* Calculate the maximum FBC watermark */ -static unsigned int ilk_fbc_wm_max(void) +static unsigned int ilk_fbc_wm_max(struct drm_device *dev) { /* max that registers can hold */ - return 15; + if (INTEL_INFO(dev)->gen >= 8) + return 31; + else + return 15; } static void ilk_compute_wm_maximums(struct drm_device *dev, @@ -2381,7 +2388,7 @@ static void ilk_compute_wm_maximums(struct drm_device *dev, max->pri = ilk_plane_wm_max(dev, level, config, ddb_partitioning, false); max->spr = ilk_plane_wm_max(dev, level, config, ddb_partitioning, true); max->cur = ilk_cursor_wm_max(dev, level, config); - max->fbc = ilk_fbc_wm_max(); + max->fbc = ilk_fbc_wm_max(dev); } static bool ilk_validate_wm_level(int level, @@ -2722,10 +2729,18 @@ static void hsw_compute_wm_results(struct drm_device *dev, if (!r->enable) break; - results->wm_lp[wm_lp - 1] = HSW_WM_LP_VAL(level * 2, - r->fbc_val, - r->pri_val, - r->cur_val); + results->wm_lp[wm_lp - 1] = WM3_LP_EN | + ((level * 2) << WM1_LP_LATENCY_SHIFT) | + (r->pri_val << WM1_LP_SR_SHIFT) | + r->cur_val; + + if (INTEL_INFO(dev)->gen >= 8) + results->wm_lp[wm_lp - 1] |= + r->fbc_val << WM1_LP_FBC_SHIFT_BDW; + else + results->wm_lp[wm_lp - 1] |= + r->fbc_val << WM1_LP_FBC_SHIFT; + results->wm_lp_spr[wm_lp - 1] = r->spr_val; } @@ -3747,6 +3762,78 @@ static void gen6_enable_rps_interrupts(struct drm_device *dev) I915_WRITE(GEN6_PMINTRMSK, ~enabled_intrs); } +static void gen8_enable_rps(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_ring_buffer *ring; + uint32_t rc6_mask = 0, rp_state_cap; + int unused; + + /* 1a: Software RC state - RC0 */ + I915_WRITE(GEN6_RC_STATE, 0); + + /* 1c & 1d: Get forcewake during program sequence. Although the driver + * hasn't enabled a state yet where we need forcewake, BIOS may have.*/ + gen6_gt_force_wake_get(dev_priv); + + /* 2a: Disable RC states. */ + I915_WRITE(GEN6_RC_CONTROL, 0); + + rp_state_cap = I915_READ(GEN6_RP_STATE_CAP); + + /* 2b: Program RC6 thresholds.*/ + I915_WRITE(GEN6_RC6_WAKE_RATE_LIMIT, 40 << 16); + I915_WRITE(GEN6_RC_EVALUATION_INTERVAL, 125000); /* 12500 * 1280ns */ + I915_WRITE(GEN6_RC_IDLE_HYSTERSIS, 25); /* 25 * 1280ns */ + for_each_ring(ring, dev_priv, unused) + I915_WRITE(RING_MAX_IDLE(ring->mmio_base), 10); + I915_WRITE(GEN6_RC_SLEEP, 0); + I915_WRITE(GEN6_RC6_THRESHOLD, 50000); /* 50/125ms per EI */ + + /* 3: Enable RC6 */ + if (intel_enable_rc6(dev) & INTEL_RC6_ENABLE) + rc6_mask = GEN6_RC_CTL_RC6_ENABLE; + DRM_INFO("RC6 %s\n", (rc6_mask & GEN6_RC_CTL_RC6_ENABLE) ? "on" : "off"); + I915_WRITE(GEN6_RC_CONTROL, GEN6_RC_CTL_HW_ENABLE | + GEN6_RC_CTL_EI_MODE(1) | + rc6_mask); + + /* 4 Program defaults and thresholds for RPS*/ + I915_WRITE(GEN6_RPNSWREQ, HSW_FREQUENCY(10)); /* Request 500 MHz */ + I915_WRITE(GEN6_RC_VIDEO_FREQ, HSW_FREQUENCY(12)); /* Request 600 MHz */ + /* NB: Docs say 1s, and 1000000 - which aren't equivalent */ + I915_WRITE(GEN6_RP_DOWN_TIMEOUT, 100000000 / 128); /* 1 second timeout */ + + /* Docs recommend 900MHz, and 300 MHz respectively */ + I915_WRITE(GEN6_RP_INTERRUPT_LIMITS, + dev_priv->rps.max_delay << 24 | + dev_priv->rps.min_delay << 16); + + I915_WRITE(GEN6_RP_UP_THRESHOLD, 7600000 / 128); /* 76ms busyness per EI, 90% */ + I915_WRITE(GEN6_RP_DOWN_THRESHOLD, 31300000 / 128); /* 313ms busyness per EI, 70%*/ + I915_WRITE(GEN6_RP_UP_EI, 66000); /* 84.48ms, XXX: random? */ + I915_WRITE(GEN6_RP_DOWN_EI, 350000); /* 448ms, XXX: random? */ + + I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10); + + /* 5: Enable RPS */ + I915_WRITE(GEN6_RP_CONTROL, + GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + /* 6: Ring frequency + overclocking (our driver does this later */ + + gen6_set_rps(dev, (I915_READ(GEN6_GT_PERF_STATUS) & 0xff00) >> 8); + + gen6_enable_rps_interrupts(dev); + + gen6_gt_force_wake_put(dev_priv); +} + static void gen6_enable_rps(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -3909,7 +3996,10 @@ void gen6_update_ring_freq(struct drm_device *dev) int diff = dev_priv->rps.max_delay - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_HASWELL(dev)) { + if (INTEL_INFO(dev)->gen >= 8) { + /* max(2 * GT, DDR). NB: GT is 50MHz units */ + ring_freq = max(min_ring_freq, gpu_freq); + } else if (IS_HASWELL(dev)) { ring_freq = mult_frac(gpu_freq, 5, 4); ring_freq = max(min_ring_freq, ring_freq); /* leave ia_freq as the default, chosen by cpufreq */ @@ -4873,6 +4963,9 @@ static void intel_gen6_powersave_work(struct work_struct *work) if (IS_VALLEYVIEW(dev)) { valleyview_enable_rps(dev); + } else if (IS_BROADWELL(dev)) { + gen8_enable_rps(dev); + gen6_update_ring_freq(dev); } else { gen6_enable_rps(dev); gen6_update_ring_freq(dev); @@ -5181,6 +5274,50 @@ static void lpt_suspend_hw(struct drm_device *dev) } } +static void gen8_init_clock_gating(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + enum pipe i; + + I915_WRITE(WM3_LP_ILK, 0); + I915_WRITE(WM2_LP_ILK, 0); + I915_WRITE(WM1_LP_ILK, 0); + + /* FIXME(BDW): Check all the w/a, some might only apply to + * pre-production hw. */ + + WARN(!i915_preliminary_hw_support, + "GEN8_CENTROID_PIXEL_OPT_DIS not be needed for production\n"); + I915_WRITE(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_CENTROID_PIXEL_OPT_DIS)); + I915_WRITE(HALF_SLICE_CHICKEN3, + _MASKED_BIT_ENABLE(GEN8_SAMPLER_POWER_BYPASS_DIS)); + I915_WRITE(GAMTARBMODE, _MASKED_BIT_ENABLE(ARB_MODE_BWGTLB_DISABLE)); + + I915_WRITE(_3D_CHICKEN3, + _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(2)); + + I915_WRITE(COMMON_SLICE_CHICKEN2, + _MASKED_BIT_ENABLE(GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE)); + + I915_WRITE(GEN7_HALF_SLICE_CHICKEN1, + _MASKED_BIT_ENABLE(GEN7_SINGLE_SUBSCAN_DISPATCH_ENABLE)); + + /* WaSwitchSolVfFArbitrationPriority */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | HSW_ECOCHK_ARB_PRIO_SOL); + + /* WaPsrDPAMaskVBlankInSRD */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD); + + /* WaPsrDPRSUnmaskVBlankInSRD */ + for_each_pipe(i) { + I915_WRITE(CHICKEN_PIPESL_1(i), + I915_READ(CHICKEN_PIPESL_1(i) | + DPRS_MASK_VBLANK_SRD)); + } +} + static void haswell_init_clock_gating(struct drm_device *dev) { struct drm_i915_private *dev_priv = dev->dev_private; @@ -5511,7 +5648,9 @@ static bool is_always_on_power_domain(struct drm_device *dev, BUG_ON(BIT(domain) & ~POWER_DOMAIN_MASK); - if (IS_HASWELL(dev)) { + if (IS_BROADWELL(dev)) { + always_on_domains = BDW_ALWAYS_ON_POWER_DOMAINS; + } else if (IS_HASWELL(dev)) { always_on_domains = HSW_ALWAYS_ON_POWER_DOMAINS; } else { WARN_ON(1); @@ -5833,6 +5972,8 @@ void intel_init_pm(struct drm_device *dev) dev_priv->display.update_wm = NULL; } dev_priv->display.init_clock_gating = haswell_init_clock_gating; + } else if (INTEL_INFO(dev)->gen == 8) { + dev_priv->display.init_clock_gating = gen8_init_clock_gating; } else dev_priv->display.update_wm = NULL; } else if (IS_VALLEYVIEW(dev)) { @@ -5995,4 +6136,3 @@ void intel_pm_init(struct drm_device *dev) INIT_DELAYED_WORK(&dev_priv->rps.delayed_resume_work, intel_gen6_powersave_work); } - diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 2dec134..b620337 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -360,6 +360,47 @@ gen7_render_ring_flush(struct intel_ring_buffer *ring, return 0; } +static int +gen8_render_ring_flush(struct intel_ring_buffer *ring, + u32 invalidate_domains, u32 flush_domains) +{ + u32 flags = 0; + u32 scratch_addr = ring->scratch.gtt_offset + 128; + int ret; + + flags |= PIPE_CONTROL_CS_STALL; + + if (flush_domains) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + } + if (invalidate_domains) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + } + + ret = intel_ring_begin(ring, 6); + if (ret) + return ret; + + intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); + intel_ring_emit(ring, flags); + intel_ring_emit(ring, scratch_addr); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_emit(ring, 0); + intel_ring_advance(ring); + + return 0; + +} + static void ring_write_tail(struct intel_ring_buffer *ring, u32 value) { @@ -1066,6 +1107,52 @@ hsw_vebox_put_irq(struct intel_ring_buffer *ring) spin_unlock_irqrestore(&dev_priv->irq_lock, flags); } +static bool +gen8_ring_get_irq(struct intel_ring_buffer *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + if (!dev->irq_enabled) + return false; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (ring->irq_refcount++ == 0) { + if (HAS_L3_DPF(dev) && ring->id == RCS) { + I915_WRITE_IMR(ring, + ~(ring->irq_enable_mask | + GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); + } else { + I915_WRITE_IMR(ring, ~ring->irq_enable_mask); + } + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + + return true; +} + +static void +gen8_ring_put_irq(struct intel_ring_buffer *ring) +{ + struct drm_device *dev = ring->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + unsigned long flags; + + spin_lock_irqsave(&dev_priv->irq_lock, flags); + if (--ring->irq_refcount == 0) { + if (HAS_L3_DPF(dev) && ring->id == RCS) { + I915_WRITE_IMR(ring, + ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT); + } else { + I915_WRITE_IMR(ring, ~0); + } + POSTING_READ(RING_IMR(ring->mmio_base)); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + static int i965_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length, @@ -1624,6 +1711,8 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, return ret; cmd = MI_FLUSH_DW; + if (INTEL_INFO(ring->dev)->gen >= 8) + cmd += 1; /* * Bspec vol 1c.5 - video engine command streamer: * "If ENABLED, all TLBs will be invalidated once the flush @@ -1635,9 +1724,38 @@ static int gen6_bsd_ring_flush(struct intel_ring_buffer *ring, MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; intel_ring_emit(ring, cmd); intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); + if (INTEL_INFO(ring->dev)->gen >= 8) { + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } + intel_ring_advance(ring); + return 0; +} + +static int +gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, + u32 offset, u32 len, + unsigned flags) +{ + struct drm_i915_private *dev_priv = ring->dev->dev_private; + bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && + !(flags & I915_DISPATCH_SECURE); + int ret; + + ret = intel_ring_begin(ring, 4); + if (ret) + return ret; + + /* FIXME(BDW): Address space and security selectors. */ + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, offset); intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); intel_ring_advance(ring); + return 0; } @@ -1697,6 +1815,8 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, return ret; cmd = MI_FLUSH_DW; + if (INTEL_INFO(ring->dev)->gen >= 8) + cmd += 1; /* * Bspec vol 1c.3 - blitter engine command streamer: * "If ENABLED, all TLBs will be invalidated once the flush @@ -1708,8 +1828,13 @@ static int gen6_ring_flush(struct intel_ring_buffer *ring, MI_FLUSH_DW_OP_STOREDW; intel_ring_emit(ring, cmd); intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); - intel_ring_emit(ring, 0); - intel_ring_emit(ring, MI_NOOP); + if (INTEL_INFO(ring->dev)->gen >= 8) { + intel_ring_emit(ring, 0); /* upper addr */ + intel_ring_emit(ring, 0); /* value */ + } else { + intel_ring_emit(ring, 0); + intel_ring_emit(ring, MI_NOOP); + } intel_ring_advance(ring); if (IS_GEN7(dev) && flush) @@ -1732,8 +1857,14 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->flush = gen7_render_ring_flush; if (INTEL_INFO(dev)->gen == 6) ring->flush = gen6_render_ring_flush; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; + if (INTEL_INFO(dev)->gen >= 8) { + ring->flush = gen8_render_ring_flush; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + } else { + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + } ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; @@ -1775,6 +1906,8 @@ int intel_init_render_ring_buffer(struct drm_device *dev) ring->write_tail = ring_write_tail; if (IS_HASWELL(dev)) ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; + else if (IS_GEN8(dev)) + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; else if (INTEL_INFO(dev)->gen >= 6) ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; else if (INTEL_INFO(dev)->gen >= 4) @@ -1888,7 +2021,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->id = VCS; ring->write_tail = ring_write_tail; - if (IS_GEN6(dev) || IS_GEN7(dev)) { + if (INTEL_INFO(dev)->gen >= 6) { ring->mmio_base = GEN6_BSD_RING_BASE; /* gen6 bsd needs a special wa for tail updates */ if (IS_GEN6(dev)) @@ -1897,10 +2030,20 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = + gen8_ring_dispatch_execbuffer; + } else { + ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + ring->dispatch_execbuffer = + gen6_ring_dispatch_execbuffer; + } ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VR; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_INVALID; @@ -1946,10 +2089,18 @@ int intel_init_blt_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; - ring->irq_get = gen6_ring_get_irq; - ring->irq_put = gen6_ring_put_irq; - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + } else { + ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; + ring->irq_get = gen6_ring_get_irq; + ring->irq_put = gen6_ring_put_irq; + ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + } ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_BR; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_BV; @@ -1978,10 +2129,19 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev) ring->add_request = gen6_add_request; ring->get_seqno = gen6_ring_get_seqno; ring->set_seqno = ring_set_seqno; - ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; - ring->irq_get = hsw_vebox_get_irq; - ring->irq_put = hsw_vebox_put_irq; - ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + + if (INTEL_INFO(dev)->gen >= 8) { + ring->irq_enable_mask = + GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; + ring->irq_get = gen8_ring_get_irq; + ring->irq_put = gen8_ring_put_irq; + ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; + } else { + ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; + ring->irq_get = hsw_vebox_get_irq; + ring->irq_put = hsw_vebox_put_irq; + ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; + } ring->sync_to = gen6_ring_sync; ring->semaphore_register[RCS] = MI_SEMAPHORE_SYNC_VER; ring->semaphore_register[VCS] = MI_SEMAPHORE_SYNC_VEV; diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index 07b13dc..b9fabf8 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -260,14 +260,14 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, if (obj->tiling_mode != I915_TILING_NONE) sprctl |= SPRITE_TILED; - if (IS_HASWELL(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE; else sprctl |= SPRITE_TRICKLE_FEED_DISABLE; sprctl |= SPRITE_ENABLE; - if (IS_HASWELL(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) sprctl |= SPRITE_PIPE_CSC_ENABLE; intel_update_sprite_watermarks(plane, crtc, src_w, pixel_size, true, @@ -306,7 +306,7 @@ ivb_update_plane(struct drm_plane *plane, struct drm_crtc *crtc, /* HSW consolidates SPRTILEOFF and SPRLINOFF into a single SPROFFSET * register */ - if (IS_HASWELL(dev)) + if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); else if (obj->tiling_mode != I915_TILING_NONE) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); @@ -1092,6 +1092,7 @@ intel_plane_init(struct drm_device *dev, enum pipe pipe, int plane) break; case 7: + case 8: if (IS_IVYBRIDGE(dev)) { intel_plane->can_scale = true; intel_plane->max_downscale = 2; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index f6fae35..f9883ce 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -93,7 +93,7 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) { u32 forcewake_ack; - if (IS_HASWELL(dev_priv->dev)) + if (IS_HASWELL(dev_priv->dev) || IS_GEN8(dev_priv->dev)) forcewake_ack = FORCEWAKE_ACK_HSW; else forcewake_ack = FORCEWAKE_MT_ACK; @@ -112,7 +112,8 @@ static void __gen6_gt_force_wake_mt_get(struct drm_i915_private *dev_priv) DRM_ERROR("Timed out waiting for forcewake to ack request.\n"); /* WaRsForcewakeWaitTC0:ivb,hsw */ - __gen6_gt_wait_for_thread_c0(dev_priv); + if (INTEL_INFO(dev_priv->dev)->gen < 8) + __gen6_gt_wait_for_thread_c0(dev_priv); } static void gen6_gt_check_fifodbg(struct drm_i915_private *dev_priv) @@ -459,6 +460,46 @@ hsw_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ } +static const u32 gen8_shadowed_regs[] = { + FORCEWAKE_MT, + GEN6_RPNSWREQ, + GEN6_RC_VIDEO_FREQ, + RING_TAIL(RENDER_RING_BASE), + RING_TAIL(GEN6_BSD_RING_BASE), + RING_TAIL(VEBOX_RING_BASE), + RING_TAIL(BLT_RING_BASE), + /* TODO: Other registers are not yet used */ +}; + +static bool is_gen8_shadowed(struct drm_i915_private *dev_priv, u32 reg) +{ + int i; + for (i = 0; i < ARRAY_SIZE(gen8_shadowed_regs); i++) + if (reg == gen8_shadowed_regs[i]) + return true; + + return false; +} + +#define __gen8_write(x) \ +static void \ +gen8_write##x(struct drm_i915_private *dev_priv, off_t reg, u##x val, bool trace) { \ + bool __needs_put = !is_gen8_shadowed(dev_priv, reg); \ + REG_WRITE_HEADER; \ + if (__needs_put) { \ + dev_priv->uncore.funcs.force_wake_get(dev_priv); \ + } \ + __raw_i915_write##x(dev_priv, reg, val); \ + if (__needs_put) { \ + dev_priv->uncore.funcs.force_wake_put(dev_priv); \ + } \ + spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags); \ +} + +__gen8_write(8) +__gen8_write(16) +__gen8_write(32) +__gen8_write(64) __hsw_write(8) __hsw_write(16) __hsw_write(32) @@ -476,6 +517,7 @@ __gen4_write(16) __gen4_write(32) __gen4_write(64) +#undef __gen8_write #undef __hsw_write #undef __gen6_write #undef __gen5_write @@ -492,7 +534,7 @@ void intel_uncore_init(struct drm_device *dev) if (IS_VALLEYVIEW(dev)) { dev_priv->uncore.funcs.force_wake_get = vlv_force_wake_get; dev_priv->uncore.funcs.force_wake_put = vlv_force_wake_put; - } else if (IS_HASWELL(dev)) { + } else if (IS_HASWELL(dev) || IS_GEN8(dev)) { dev_priv->uncore.funcs.force_wake_get = __gen6_gt_force_wake_mt_get; dev_priv->uncore.funcs.force_wake_put = __gen6_gt_force_wake_mt_put; } else if (IS_IVYBRIDGE(dev)) { @@ -534,6 +576,16 @@ void intel_uncore_init(struct drm_device *dev) } switch (INTEL_INFO(dev)->gen) { + default: + dev_priv->uncore.funcs.mmio_writeb = gen8_write8; + dev_priv->uncore.funcs.mmio_writew = gen8_write16; + dev_priv->uncore.funcs.mmio_writel = gen8_write32; + dev_priv->uncore.funcs.mmio_writeq = gen8_write64; + dev_priv->uncore.funcs.mmio_readb = gen6_read8; + dev_priv->uncore.funcs.mmio_readw = gen6_read16; + dev_priv->uncore.funcs.mmio_readl = gen6_read32; + dev_priv->uncore.funcs.mmio_readq = gen6_read64; + break; case 7: case 6: if (IS_HASWELL(dev)) { diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index 3abfa6e..97d5497 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -49,6 +49,10 @@ extern bool i915_gpu_turbo_disable(void); #define SNB_GMCH_GGMS_MASK 0x3 #define SNB_GMCH_GMS_SHIFT 3 /* Graphics Mode Select */ #define SNB_GMCH_GMS_MASK 0x1f +#define BDW_GMCH_GGMS_SHIFT 6 +#define BDW_GMCH_GGMS_MASK 0x3 +#define BDW_GMCH_GMS_SHIFT 8 +#define BDW_GMCH_GMS_MASK 0xff #define I830_GMCH_CTRL 0x52 diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h index 8a10f5c..940ece4 100644 --- a/include/drm/i915_pciids.h +++ b/include/drm/i915_pciids.h @@ -208,4 +208,29 @@ #define INTEL_VLV_D_IDS(info) \ INTEL_VGA_DEVICE(0x0155, info) +#define _INTEL_BDW_M(gt, id, info) \ + INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) +#define _INTEL_BDW_D(gt, id, info) \ + INTEL_VGA_DEVICE((((gt) - 1) << 4) | (id), info) + +#define _INTEL_BDW_M_IDS(gt, info) \ + _INTEL_BDW_M(gt, 0x1602, info), /* ULT */ \ + _INTEL_BDW_M(gt, 0x1606, info), /* ULT */ \ + _INTEL_BDW_M(gt, 0x160B, info), /* Iris */ \ + _INTEL_BDW_M(gt, 0x160E, info) /* ULX */ + +#define _INTEL_BDW_D_IDS(gt, info) \ + _INTEL_BDW_D(gt, 0x160A, info), /* Server */ \ + _INTEL_BDW_D(gt, 0x160D, info) /* Workstation */ + +#define INTEL_BDW_M_IDS(info) \ + _INTEL_BDW_M_IDS(1, info), \ + _INTEL_BDW_M_IDS(2, info), \ + _INTEL_BDW_M_IDS(3, info) + +#define INTEL_BDW_D_IDS(info) \ + _INTEL_BDW_D_IDS(1, info), \ + _INTEL_BDW_D_IDS(2, info), \ + _INTEL_BDW_D_IDS(3, info) + #endif /* _I915_PCIIDS_H */ |