diff options
53 files changed, 4259 insertions, 3309 deletions
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index de7501e..8b8852b 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -317,16 +317,11 @@ static phys_addr_t __init i85x_stolen_base(int num, int slot, int func, static phys_addr_t __init i865_stolen_base(int num, int slot, int func, size_t stolen_size) { - u16 toud; + u16 toud = 0; - /* - * FIXME is the graphics stolen memory region - * always at TOUD? Ie. is it always the last - * one to be allocated by the BIOS? - */ toud = read_pci_config_16(0, 0, 0, I865_TOUD); - return (phys_addr_t)toud << 16; + return (phys_addr_t)(toud << 16) + i845_tseg_size(); } static phys_addr_t __init gen3_stolen_base(int num, int slot, int func, diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 4431129..0f7d28a 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -845,6 +845,8 @@ void intel_gtt_insert_page(dma_addr_t addr, unsigned int flags) { intel_private.driver->write_entry(addr, pg, flags); + if (intel_private.driver->chipset_flush) + intel_private.driver->chipset_flush(); } EXPORT_SYMBOL(intel_gtt_insert_page); diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 193ff2d..4054c94 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -47,7 +47,7 @@ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ -obj-$(CONFIG_DRM_I915) += i915/ +obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index dda724f..a7da246 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -3,12 +3,16 @@ # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror +subdir-ccflags-y += \ + $(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA) # Please keep these build lists sorted! # core driver code i915-y := i915_drv.o \ i915_irq.o \ + i915_memcpy.o \ + i915_mm.o \ i915_params.o \ i915_pci.o \ i915_suspend.o \ @@ -110,6 +114,6 @@ i915-y += intel_gvt.o include $(src)/gvt/Makefile endif -obj-$(CONFIG_DRM_I915) += i915.o +obj-$(CONFIG_DRM_I915) += i915.o CFLAGS_i915_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 1db829c..3c72b3b 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -86,24 +86,25 @@ * general bitmasking mechanism. */ -#define STD_MI_OPCODE_MASK 0xFF800000 -#define STD_3D_OPCODE_MASK 0xFFFF0000 -#define STD_2D_OPCODE_MASK 0xFFC00000 -#define STD_MFX_OPCODE_MASK 0xFFFF0000 +#define STD_MI_OPCODE_SHIFT (32 - 9) +#define STD_3D_OPCODE_SHIFT (32 - 16) +#define STD_2D_OPCODE_SHIFT (32 - 10) +#define STD_MFX_OPCODE_SHIFT (32 - 16) +#define MIN_OPCODE_SHIFT 16 #define CMD(op, opm, f, lm, fl, ...) \ { \ .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ - .cmd = { (op), (opm) }, \ + .cmd = { (op), ~0u << (opm) }, \ .length = { (lm) }, \ __VA_ARGS__ \ } /* Convenience macros to compress the tables */ -#define SMI STD_MI_OPCODE_MASK -#define S3D STD_3D_OPCODE_MASK -#define S2D STD_2D_OPCODE_MASK -#define SMFX STD_MFX_OPCODE_MASK +#define SMI STD_MI_OPCODE_SHIFT +#define S3D STD_3D_OPCODE_SHIFT +#define S2D STD_2D_OPCODE_SHIFT +#define SMFX STD_MFX_OPCODE_SHIFT #define F true #define S CMD_DESC_SKIP #define R CMD_DESC_REJECT @@ -350,6 +351,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), }; +static const struct drm_i915_cmd_descriptor noop_desc = + CMD(MI_NOOP, SMI, F, 1, S); + #undef CMD #undef SMI #undef S3D @@ -458,6 +462,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG32(GEN7_GPGPU_DISPATCHDIMX), REG32(GEN7_GPGPU_DISPATCHDIMY), REG32(GEN7_GPGPU_DISPATCHDIMZ), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 0), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 1), REG64_IDX(GEN7_SO_NUM_PRIMS_WRITTEN, 2), @@ -473,6 +478,7 @@ static const struct drm_i915_reg_descriptor gen7_render_regs[] = { REG32(GEN7_L3SQCREG1), REG32(GEN7_L3CNTLREG2), REG32(GEN7_L3CNTLREG3), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; static const struct drm_i915_reg_descriptor hsw_render_regs[] = { @@ -502,7 +508,10 @@ static const struct drm_i915_reg_descriptor hsw_render_regs[] = { }; static const struct drm_i915_reg_descriptor gen7_blt_regs[] = { + REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), + REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), REG32(BCS_SWCTRL), + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), }; static const struct drm_i915_reg_descriptor ivb_master_regs[] = { @@ -691,12 +700,26 @@ struct cmd_node { * non-opcode bits being set. But if we don't include those bits, some 3D * commands may hash to the same bucket due to not including opcode bits that * make the command unique. For now, we will risk hashing to the same bucket. - * - * If we attempt to generate a perfect hash, we should be able to look at bits - * 31:29 of a command from a batch buffer and use the full mask for that - * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this. */ -#define CMD_HASH_MASK STD_MI_OPCODE_MASK +static inline u32 cmd_header_key(u32 x) +{ + u32 shift; + + switch (x >> INSTR_CLIENT_SHIFT) { + default: + case INSTR_MI_CLIENT: + shift = STD_MI_OPCODE_SHIFT; + break; + case INSTR_RC_CLIENT: + shift = STD_3D_OPCODE_SHIFT; + break; + case INSTR_BC_CLIENT: + shift = STD_2D_OPCODE_SHIFT; + break; + } + + return x >> shift; +} static int init_hash_table(struct intel_engine_cs *engine, const struct drm_i915_cmd_table *cmd_tables, @@ -720,7 +743,7 @@ static int init_hash_table(struct intel_engine_cs *engine, desc_node->desc = desc; hash_add(engine->cmd_hash, &desc_node->node, - desc->cmd.value & CMD_HASH_MASK); + cmd_header_key(desc->cmd.value)); } } @@ -746,17 +769,15 @@ static void fini_hash_table(struct intel_engine_cs *engine) * Optionally initializes fields related to batch buffer command parsing in the * struct intel_engine_cs based on whether the platform requires software * command parsing. - * - * Return: non-zero if initialization fails */ -int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) +void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; int ret; if (!IS_GEN7(engine->i915)) - return 0; + return; switch (engine->id) { case RCS: @@ -811,24 +832,27 @@ int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; default: MISSING_CASE(engine->id); - BUG(); + return; } - BUG_ON(!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)); - BUG_ON(!validate_regs_sorted(engine)); - - WARN_ON(!hash_empty(engine->cmd_hash)); + if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { + DRM_ERROR("%s: command descriptions are not sorted\n", + engine->name); + return; + } + if (!validate_regs_sorted(engine)) { + DRM_ERROR("%s: registers are not sorted\n", engine->name); + return; + } ret = init_hash_table(engine, cmd_tables, cmd_table_count); if (ret) { - DRM_ERROR("CMD: cmd_parser_init failed!\n"); + DRM_ERROR("%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return ret; + return; } engine->needs_cmd_parser = true; - - return 0; } /** @@ -853,12 +877,9 @@ find_cmd_in_table(struct intel_engine_cs *engine, struct cmd_node *desc_node; hash_for_each_possible(engine->cmd_hash, desc_node, node, - cmd_header & CMD_HASH_MASK) { + cmd_header_key(cmd_header)) { const struct drm_i915_cmd_descriptor *desc = desc_node->desc; - u32 masked_cmd = desc->cmd.mask & cmd_header; - u32 masked_value = desc->cmd.value & desc->cmd.mask; - - if (masked_cmd == masked_value) + if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) return desc; } @@ -876,11 +897,14 @@ find_cmd_in_table(struct intel_engine_cs *engine, static const struct drm_i915_cmd_descriptor* find_cmd(struct intel_engine_cs *engine, u32 cmd_header, + const struct drm_i915_cmd_descriptor *desc, struct drm_i915_cmd_descriptor *default_desc) { - const struct drm_i915_cmd_descriptor *desc; u32 mask; + if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0) + return desc; + desc = find_cmd_in_table(engine, cmd_header); if (desc) return desc; @@ -889,140 +913,127 @@ find_cmd(struct intel_engine_cs *engine, if (!mask) return NULL; - BUG_ON(!default_desc); - default_desc->flags = CMD_DESC_SKIP; + default_desc->cmd.value = cmd_header; + default_desc->cmd.mask = ~0u << MIN_OPCODE_SHIFT; default_desc->length.mask = mask; - + default_desc->flags = CMD_DESC_SKIP; return default_desc; } static const struct drm_i915_reg_descriptor * -find_reg(const struct drm_i915_reg_descriptor *table, - int count, u32 addr) +__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr) { - int i; - - for (i = 0; i < count; i++) { - if (i915_mmio_reg_offset(table[i].addr) == addr) - return &table[i]; + int start = 0, end = count; + while (start < end) { + int mid = start + (end - start) / 2; + int ret = addr - i915_mmio_reg_offset(table[mid].addr); + if (ret < 0) + end = mid; + else if (ret > 0) + start = mid + 1; + else + return &table[mid]; } - return NULL; } static const struct drm_i915_reg_descriptor * -find_reg_in_tables(const struct drm_i915_reg_table *tables, - int count, bool is_master, u32 addr) +find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) { - int i; - const struct drm_i915_reg_table *table; - const struct drm_i915_reg_descriptor *reg; + const struct drm_i915_reg_table *table = engine->reg_tables; + int count = engine->reg_table_count; - for (i = 0; i < count; i++) { - table = &tables[i]; + do { if (!table->master || is_master) { - reg = find_reg(table->regs, table->num_regs, - addr); + const struct drm_i915_reg_descriptor *reg; + + reg = __find_reg(table->regs, table->num_regs, addr); if (reg != NULL) return reg; } - } + } while (table++, --count); return NULL; } -static u32 *vmap_batch(struct drm_i915_gem_object *obj, - unsigned start, unsigned len) -{ - int i; - void *addr = NULL; - struct sg_page_iter sg_iter; - int first_page = start >> PAGE_SHIFT; - int last_page = (len + start + 4095) >> PAGE_SHIFT; - int npages = last_page - first_page; - struct page **pages; - - pages = drm_malloc_ab(npages, sizeof(*pages)); - if (pages == NULL) { - DRM_DEBUG_DRIVER("Failed to get space for pages\n"); - goto finish; - } - - i = 0; - for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) { - pages[i++] = sg_page_iter_page(&sg_iter); - if (i == npages) - break; - } - - addr = vmap(pages, i, 0, PAGE_KERNEL); - if (addr == NULL) { - DRM_DEBUG_DRIVER("Failed to vmap pages\n"); - goto finish; - } - -finish: - if (pages) - drm_free_large(pages); - return (u32*)addr; -} - -/* Returns a vmap'd pointer to dest_obj, which the caller must unmap */ -static u32 *copy_batch(struct drm_i915_gem_object *dest_obj, +/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ +static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, struct drm_i915_gem_object *src_obj, u32 batch_start_offset, - u32 batch_len) + u32 batch_len, + bool *needs_clflush_after) { - int needs_clflush = 0; - void *src_base, *src; - void *dst = NULL; + unsigned int src_needs_clflush; + unsigned int dst_needs_clflush; + void *dst, *src; int ret; - if (batch_len > dest_obj->base.size || - batch_len + batch_start_offset > src_obj->base.size) - return ERR_PTR(-E2BIG); - - if (WARN_ON(dest_obj->pages_pin_count == 0)) - return ERR_PTR(-ENODEV); - - ret = i915_gem_obj_prepare_shmem_read(src_obj, &needs_clflush); - if (ret) { - DRM_DEBUG_DRIVER("CMD: failed to prepare shadow batch\n"); + ret = i915_gem_obj_prepare_shmem_read(src_obj, &src_needs_clflush); + if (ret) return ERR_PTR(ret); - } - src_base = vmap_batch(src_obj, batch_start_offset, batch_len); - if (!src_base) { - DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n"); - ret = -ENOMEM; + ret = i915_gem_obj_prepare_shmem_write(dst_obj, &dst_needs_clflush); + if (ret) { + dst = ERR_PTR(ret); goto unpin_src; } - ret = i915_gem_object_set_to_cpu_domain(dest_obj, true); - if (ret) { - DRM_DEBUG_DRIVER("CMD: Failed to set shadow batch to CPU\n"); - goto unmap_src; + dst = i915_gem_object_pin_map(dst_obj, I915_MAP_WB); + if (IS_ERR(dst)) + goto unpin_dst; + + src = ERR_PTR(-ENODEV); + if (src_needs_clflush && + i915_memcpy_from_wc((void *)(uintptr_t)batch_start_offset, 0, 0)) { + src = i915_gem_object_pin_map(src_obj, I915_MAP_WC); + if (!IS_ERR(src)) { + i915_memcpy_from_wc(dst, + src + batch_start_offset, + ALIGN(batch_len, 16)); + i915_gem_object_unpin_map(src_obj); + } } - - dst = vmap_batch(dest_obj, 0, batch_len); - if (!dst) { - DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n"); - ret = -ENOMEM; - goto unmap_src; + if (IS_ERR(src)) { + void *ptr; + int offset, n; + + offset = offset_in_page(batch_start_offset); + + /* We can avoid clflushing partial cachelines before the write + * if we only every write full cache-lines. Since we know that + * both the source and destination are in multiples of + * PAGE_SIZE, we can simply round up to the next cacheline. + * We don't care about copying too much here as we only + * validate up to the end of the batch. + */ + if (dst_needs_clflush & CLFLUSH_BEFORE) + batch_len = roundup(batch_len, + boot_cpu_data.x86_clflush_size); + + ptr = dst; + for (n = batch_start_offset >> PAGE_SHIFT; batch_len; n++) { + int len = min_t(int, batch_len, PAGE_SIZE - offset); + + src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); + if (src_needs_clflush) + drm_clflush_virt_range(src + offset, len); + memcpy(ptr, src + offset, len); + kunmap_atomic(src); + + ptr += len; + batch_len -= len; + offset = 0; + } } - src = src_base + offset_in_page(batch_start_offset); - if (needs_clflush) - drm_clflush_virt_range(src, batch_len); - - memcpy(dst, src, batch_len); + /* dst_obj is returned with vmap pinned */ + *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER; -unmap_src: - vunmap(src_base); +unpin_dst: + i915_gem_obj_finish_shmem_access(dst_obj); unpin_src: - i915_gem_object_unpin_pages(src_obj); - - return ret ? ERR_PTR(ret) : dst; + i915_gem_obj_finish_shmem_access(src_obj); + return dst; } /** @@ -1052,6 +1063,9 @@ static bool check_cmd(const struct intel_engine_cs *engine, const bool is_master, bool *oacontrol_set) { + if (desc->flags & CMD_DESC_SKIP) + return true; + if (desc->flags & CMD_DESC_REJECT) { DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd); return false; @@ -1076,10 +1090,7 @@ static bool check_cmd(const struct intel_engine_cs *engine, offset += step) { const u32 reg_addr = cmd[offset] & desc->reg.mask; const struct drm_i915_reg_descriptor *reg = - find_reg_in_tables(engine->reg_tables, - engine->reg_table_count, - is_master, - reg_addr); + find_reg(engine, is_master, reg_addr); if (!reg) { DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n", @@ -1200,16 +1211,19 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, u32 batch_len, bool is_master) { - u32 *cmd, *batch_base, *batch_end; - struct drm_i915_cmd_descriptor default_desc = { 0 }; + u32 *cmd, *batch_end; + struct drm_i915_cmd_descriptor default_desc = noop_desc; + const struct drm_i915_cmd_descriptor *desc = &default_desc; bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */ + bool needs_clflush_after = false; int ret = 0; - batch_base = copy_batch(shadow_batch_obj, batch_obj, - batch_start_offset, batch_len); - if (IS_ERR(batch_base)) { + cmd = copy_batch(shadow_batch_obj, batch_obj, + batch_start_offset, batch_len, + &needs_clflush_after); + if (IS_ERR(cmd)) { DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n"); - return PTR_ERR(batch_base); + return PTR_ERR(cmd); } /* @@ -1217,17 +1231,14 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, * large or larger and copy_batch() will write MI_NOPs to the extra * space. Parsing should be faster in some cases this way. */ - batch_end = batch_base + (batch_len / sizeof(*batch_end)); - - cmd = batch_base; + batch_end = cmd + (batch_len / sizeof(*batch_end)); while (cmd < batch_end) { - const struct drm_i915_cmd_descriptor *desc; u32 length; if (*cmd == MI_BATCH_BUFFER_END) break; - desc = find_cmd(engine, *cmd, &default_desc); + desc = find_cmd(engine, *cmd, desc, &default_desc); if (!desc) { DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", *cmd); @@ -1278,7 +1289,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = -EINVAL; } - vunmap(batch_base); + if (ret == 0 && needs_clflush_after) + drm_clflush_virt_range(shadow_batch_obj->mapping, batch_len); + i915_gem_object_unpin_map(shadow_batch_obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 64e41cf..a95d7bc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -40,12 +40,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -enum { - ACTIVE_LIST, - INACTIVE_LIST, - PINNED_LIST, -}; - /* As the drm_debugfs_init() routines are called before dev->dev_private is * allocated we need to hook into the minor for release. */ static int @@ -111,7 +105,7 @@ static char get_tiling_flag(struct drm_i915_gem_object *obj) static char get_global_flag(struct drm_i915_gem_object *obj) { - return i915_gem_obj_to_ggtt(obj) ? 'g' : ' '; + return i915_gem_object_to_ggtt(obj, NULL) ? 'g' : ' '; } static char get_pin_mapped_flag(struct drm_i915_gem_object *obj) @@ -158,11 +152,9 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, "%x ", i915_gem_active_get_seqno(&obj->last_read[id], &obj->base.dev->struct_mutex)); - seq_printf(m, "] %x %x%s%s%s", + seq_printf(m, "] %x %s%s%s", i915_gem_active_get_seqno(&obj->last_write, &obj->base.dev->struct_mutex), - i915_gem_active_get_seqno(&obj->last_fence, - &obj->base.dev->struct_mutex), i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level), obj->dirty ? " dirty" : "", obj->madv == I915_MADV_DONTNEED ? " purgeable" : ""); @@ -175,8 +167,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_display) seq_printf(m, " (display)"); - if (obj->fence_reg != I915_FENCE_REG_NONE) - seq_printf(m, " (fence: %d)", obj->fence_reg); list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -186,6 +176,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) vma->node.start, vma->node.size); if (i915_vma_is_ggtt(vma)) seq_printf(m, ", type: %u", vma->ggtt_view.type); + if (vma->fence) + seq_printf(m, " , fence: %d%s", + vma->fence->id, + i915_gem_active_isset(&vma->last_fence) ? "*" : ""); seq_puts(m, ")"); } if (obj->stolen) @@ -210,53 +204,6 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_printf(m, " (frontbuffer: 0x%03x)", frontbuffer_bits); } -static int i915_gem_object_list_info(struct seq_file *m, void *data) -{ - struct drm_info_node *node = m->private; - uintptr_t list = (uintptr_t) node->info_ent->data; - struct list_head *head; - struct drm_device *dev = node->minor->dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma; - u64 total_obj_size, total_gtt_size; - int count, ret; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - - /* FIXME: the user of this interface might want more than just GGTT */ - switch (list) { - case ACTIVE_LIST: - seq_puts(m, "Active:\n"); - head = &ggtt->base.active_list; - break; - case INACTIVE_LIST: - seq_puts(m, "Inactive:\n"); - head = &ggtt->base.inactive_list; - break; - default: - mutex_unlock(&dev->struct_mutex); - return -EINVAL; - } - - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(vma, head, vm_link) { - seq_printf(m, " "); - describe_obj(m, vma->obj); - seq_printf(m, "\n"); - total_obj_size += vma->obj->base.size; - total_gtt_size += vma->node.size; - count++; - } - mutex_unlock(&dev->struct_mutex); - - seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", - count, total_obj_size, total_gtt_size); - return 0; -} - static int obj_rank_by_stolen(void *priv, struct list_head *A, struct list_head *B) { @@ -322,17 +269,6 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) return 0; } -#define count_objects(list, member) do { \ - list_for_each_entry(obj, list, member) { \ - size += i915_gem_obj_total_ggtt_size(obj); \ - ++count; \ - if (obj->map_and_fenceable) { \ - mappable_size += i915_gem_obj_ggtt_size(obj); \ - ++mappable_count; \ - } \ - } \ -} while (0) - struct file_stats { struct drm_i915_file_private *file_priv; unsigned long count; @@ -418,9 +354,9 @@ static int per_file_ctx_stats(int id, void *ptr, void *data) for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state, data); + per_file_stats(0, ctx->engine[n].state->obj, data); if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->obj, data); + per_file_stats(0, ctx->engine[n].ring->vma->obj, data); } return 0; @@ -447,30 +383,16 @@ static void print_context_stats(struct seq_file *m, print_file_stats(m, "[k]contexts", stats); } -#define count_vmas(list, member) do { \ - list_for_each_entry(vma, list, member) { \ - size += i915_gem_obj_total_ggtt_size(vma->obj); \ - ++count; \ - if (vma->obj->map_and_fenceable) { \ - mappable_size += i915_gem_obj_ggtt_size(vma->obj); \ - ++mappable_count; \ - } \ - } \ -} while (0) - static int i915_gem_object_info(struct seq_file *m, void* data) { struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - u32 count, mappable_count, purgeable_count; - u64 size, mappable_size, purgeable_size; - unsigned long pin_mapped_count = 0, pin_mapped_purgeable_count = 0; - u64 pin_mapped_size = 0, pin_mapped_purgeable_size = 0; + u32 count, mapped_count, purgeable_count, dpy_count; + u64 size, mapped_size, purgeable_size, dpy_size; struct drm_i915_gem_object *obj; struct drm_file *file; - struct i915_vma *vma; int ret; ret = mutex_lock_interruptible(&dev->struct_mutex); @@ -481,70 +403,53 @@ static int i915_gem_object_info(struct seq_file *m, void* data) dev_priv->mm.object_count, dev_priv->mm.object_memory); - size = count = mappable_size = mappable_count = 0; - count_objects(&dev_priv->mm.bound_list, global_list); - seq_printf(m, "%u [%u] objects, %llu [%llu] bytes in gtt\n", - count, mappable_count, size, mappable_size); - - size = count = mappable_size = mappable_count = 0; - count_vmas(&ggtt->base.active_list, vm_link); - seq_printf(m, " %u [%u] active objects, %llu [%llu] bytes\n", - count, mappable_count, size, mappable_size); + size = count = 0; + mapped_size = mapped_count = 0; + purgeable_size = purgeable_count = 0; + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { + size += obj->base.size; + ++count; - size = count = mappable_size = mappable_count = 0; - count_vmas(&ggtt->base.inactive_list, vm_link); - seq_printf(m, " %u [%u] inactive objects, %llu [%llu] bytes\n", - count, mappable_count, size, mappable_size); + if (obj->madv == I915_MADV_DONTNEED) { + purgeable_size += obj->base.size; + ++purgeable_count; + } - size = count = purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list) { - size += obj->base.size, ++count; - if (obj->madv == I915_MADV_DONTNEED) - purgeable_size += obj->base.size, ++purgeable_count; if (obj->mapping) { - pin_mapped_count++; - pin_mapped_size += obj->base.size; - if (obj->pages_pin_count == 0) { - pin_mapped_purgeable_count++; - pin_mapped_purgeable_size += obj->base.size; - } + mapped_count++; + mapped_size += obj->base.size; } } seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); - size = count = mappable_size = mappable_count = 0; + size = count = dpy_size = dpy_count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (obj->fault_mappable) { - size += i915_gem_obj_ggtt_size(obj); - ++count; - } + size += obj->base.size; + ++count; + if (obj->pin_display) { - mappable_size += i915_gem_obj_ggtt_size(obj); - ++mappable_count; + dpy_size += obj->base.size; + ++dpy_count; } + if (obj->madv == I915_MADV_DONTNEED) { purgeable_size += obj->base.size; ++purgeable_count; } + if (obj->mapping) { - pin_mapped_count++; - pin_mapped_size += obj->base.size; - if (obj->pages_pin_count == 0) { - pin_mapped_purgeable_count++; - pin_mapped_purgeable_size += obj->base.size; - } + mapped_count++; + mapped_size += obj->base.size; } } + seq_printf(m, "%u bound objects, %llu bytes\n", + count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", purgeable_count, purgeable_size); - seq_printf(m, "%u pinned mappable objects, %llu bytes\n", - mappable_count, mappable_size); - seq_printf(m, "%u fault mappable objects, %llu bytes\n", - count, size); - seq_printf(m, - "%lu [%lu] pin mapped objects, %llu [%llu] bytes [purgeable]\n", - pin_mapped_count, pin_mapped_purgeable_count, - pin_mapped_size, pin_mapped_purgeable_size); + seq_printf(m, "%u mapped objects, %llu bytes\n", + mapped_count, mapped_size); + seq_printf(m, "%u display objects (pinned), %llu bytes\n", + dpy_count, dpy_size); seq_printf(m, "%llu [%llu] gtt total\n", ggtt->base.total, ggtt->mappable_end - ggtt->base.start); @@ -557,6 +462,8 @@ static int i915_gem_object_info(struct seq_file *m, void* data) print_context_stats(m, dev_priv); list_for_each_entry_reverse(file, &dev->filelist, lhead) { struct file_stats stats; + struct drm_i915_file_private *file_priv = file->driver_priv; + struct drm_i915_gem_request *request; struct task_struct *task; memset(&stats, 0, sizeof(stats)); @@ -570,10 +477,17 @@ static int i915_gem_object_info(struct seq_file *m, void* data) * still alive (e.g. get_pid(current) => fork() => exit()). * Therefore, we need to protect this ->comm access using RCU. */ + mutex_lock(&dev->struct_mutex); + request = list_first_entry_or_null(&file_priv->mm.request_list, + struct drm_i915_gem_request, + client_list); rcu_read_lock(); - task = pid_task(file->pid, PIDTYPE_PID); + task = pid_task(request && request->ctx->pid ? + request->ctx->pid : file->pid, + PIDTYPE_PID); print_file_stats(m, task ? task->comm : "<unknown>", stats); rcu_read_unlock(); + mutex_unlock(&dev->struct_mutex); } mutex_unlock(&dev->filelist_mutex); @@ -584,8 +498,8 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; - uintptr_t list = (uintptr_t) node->info_ent->data; struct drm_i915_private *dev_priv = to_i915(dev); + bool show_pin_display_only = !!data; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; int count, ret; @@ -596,7 +510,7 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) total_obj_size = total_gtt_size = count = 0; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (list == PINNED_LIST && !i915_gem_obj_is_pinned(obj)) + if (show_pin_display_only && !obj->pin_display) continue; seq_puts(m, " "); @@ -755,12 +669,11 @@ static int i915_gem_request_info(struct seq_file *m, void *data) seq_printf(m, "%s requests: %d\n", engine->name, count); list_for_each_entry(req, &engine->request_list, link) { + struct pid *pid = req->ctx->pid; struct task_struct *task; rcu_read_lock(); - task = NULL; - if (req->pid) - task = pid_task(req->pid, PIDTYPE_PID); + task = pid ? pid_task(pid, PIDTYPE_PID) : NULL; seq_printf(m, " %x @ %d: %s [%d]\n", req->fence.seqno, (int) (jiffies - req->emitted_jiffies), @@ -787,8 +700,6 @@ static void i915_ring_seqno_info(struct seq_file *m, seq_printf(m, "Current sequence (%s): %x\n", engine->name, intel_engine_get_seqno(engine)); - seq_printf(m, "Current user interrupts (%s): %lx\n", - engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups)); spin_lock(&b->lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -1027,14 +938,14 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) seq_printf(m, "Total fences = %d\n", dev_priv->num_fence_regs); for (i = 0; i < dev_priv->num_fence_regs; i++) { - struct drm_i915_gem_object *obj = dev_priv->fence_regs[i].obj; + struct i915_vma *vma = dev_priv->fence_regs[i].vma; seq_printf(m, "Fence %d, pin count = %d, object = ", i, dev_priv->fence_regs[i].pin_count); - if (obj == NULL) + if (!vma) seq_puts(m, "unused"); else - describe_obj(m, obj); + describe_obj(m, vma->obj); seq_putc(m, '\n'); } @@ -1434,11 +1345,10 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) engine->hangcheck.seqno, seqno[id], engine->last_submitted_seqno); - seq_printf(m, "\twaiters? %d\n", - intel_engine_has_waiter(engine)); - seq_printf(m, "\tuser interrupts = %lx [current %lx]\n", - engine->hangcheck.user_interrupts, - READ_ONCE(engine->breadcrumbs.irq_wakeups)); + seq_printf(m, "\twaiters? %s, fake irq active? %s\n", + yesno(intel_engine_has_waiter(engine)), + yesno(test_bit(engine->id, + &dev_priv->gpu_error.missed_irq_rings))); seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n", (long long)engine->hangcheck.acthd, (long long)acthd[id]); @@ -2052,18 +1962,17 @@ static int i915_context_status(struct seq_file *m, void *unused) list_for_each_entry(ctx, &dev_priv->context_list, link) { seq_printf(m, "HW context %u ", ctx->hw_id); - if (IS_ERR(ctx->file_priv)) { - seq_puts(m, "(deleted) "); - } else if (ctx->file_priv) { - struct pid *pid = ctx->file_priv->file->pid; + if (ctx->pid) { struct task_struct *task; - task = get_pid_task(pid, PIDTYPE_PID); + task = get_pid_task(ctx->pid, PIDTYPE_PID); if (task) { seq_printf(m, "(%s [%d]) ", task->comm, task->pid); put_task_struct(task); } + } else if (IS_ERR(ctx->file_priv)) { + seq_puts(m, "(deleted) "); } else { seq_puts(m, "(kernel) "); } @@ -2077,7 +1986,7 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_printf(m, "%s: ", engine->name); seq_putc(m, ce->initialised ? 'I' : 'i'); if (ce->state) - describe_obj(m, ce->state); + describe_obj(m, ce->state->obj); if (ce->ring) describe_ctx_ring(m, ce->ring); seq_putc(m, '\n'); @@ -2095,36 +2004,34 @@ static void i915_dump_lrc_obj(struct seq_file *m, struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_gem_object *ctx_obj = ctx->engine[engine->id].state; + struct i915_vma *vma = ctx->engine[engine->id].state; struct page *page; - uint32_t *reg_state; int j; - unsigned long ggtt_offset = 0; seq_printf(m, "CONTEXT: %s %u\n", engine->name, ctx->hw_id); - if (ctx_obj == NULL) { - seq_puts(m, "\tNot allocated\n"); + if (!vma) { + seq_puts(m, "\tFake context\n"); return; } - if (!i915_gem_obj_ggtt_bound(ctx_obj)) - seq_puts(m, "\tNot bound in GGTT\n"); - else - ggtt_offset = i915_gem_obj_ggtt_offset(ctx_obj); + if (vma->flags & I915_VMA_GLOBAL_BIND) + seq_printf(m, "\tBound in GGTT at 0x%08x\n", + i915_ggtt_offset(vma)); - if (i915_gem_object_get_pages(ctx_obj)) { - seq_puts(m, "\tFailed to get pages for context object\n"); + if (i915_gem_object_get_pages(vma->obj)) { + seq_puts(m, "\tFailed to get pages for context object\n\n"); return; } - page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN); - if (!WARN_ON(page == NULL)) { - reg_state = kmap_atomic(page); + page = i915_gem_object_get_page(vma->obj, LRC_STATE_PN); + if (page) { + u32 *reg_state = kmap_atomic(page); for (j = 0; j < 0x600 / sizeof(u32) / 4; j += 4) { - seq_printf(m, "\t[0x%08lx] 0x%08x 0x%08x 0x%08x 0x%08x\n", - ggtt_offset + 4096 + (j * 4), + seq_printf(m, + "\t[0x%04x] 0x%08x 0x%08x 0x%08x 0x%08x\n", + j * 4, reg_state[j], reg_state[j + 1], reg_state[j + 2], reg_state[j + 3]); } @@ -2444,6 +2351,20 @@ static int count_irq_waiters(struct drm_i915_private *i915) return count; } +static const char *rps_power_to_str(unsigned int power) +{ + static const char * const strings[] = { + [LOW_POWER] = "low power", + [BETWEEN] = "mixed", + [HIGH_POWER] = "high power", + }; + + if (power >= ARRAY_SIZE(strings) || !strings[power]) + return "unknown"; + + return strings[power]; +} + static int i915_rps_boost_info(struct seq_file *m, void *data) { struct drm_info_node *node = m->private; @@ -2455,12 +2376,17 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) seq_printf(m, "GPU busy? %s [%x]\n", yesno(dev_priv->gt.awake), dev_priv->gt.active_engines); seq_printf(m, "CPU waiting? %d\n", count_irq_waiters(dev_priv)); - seq_printf(m, "Frequency requested %d; min hard:%d, soft:%d; max soft:%d, hard:%d\n", - intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq), + seq_printf(m, "Frequency requested %d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.cur_freq)); + seq_printf(m, " min hard:%d, soft:%d; max soft:%d, hard:%d\n", intel_gpu_freq(dev_priv, dev_priv->rps.min_freq), intel_gpu_freq(dev_priv, dev_priv->rps.min_freq_softlimit), intel_gpu_freq(dev_priv, dev_priv->rps.max_freq_softlimit), intel_gpu_freq(dev_priv, dev_priv->rps.max_freq)); + seq_printf(m, " idle:%d, efficient:%d, boost:%d\n", + intel_gpu_freq(dev_priv, dev_priv->rps.idle_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.efficient_freq), + intel_gpu_freq(dev_priv, dev_priv->rps.boost_freq)); mutex_lock(&dev->filelist_mutex); spin_lock(&dev_priv->rps.client_lock); @@ -2481,6 +2407,31 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) spin_unlock(&dev_priv->rps.client_lock); mutex_unlock(&dev->filelist_mutex); + if (INTEL_GEN(dev_priv) >= 6 && + dev_priv->rps.enabled && + dev_priv->gt.active_engines) { + u32 rpup, rpupei; + u32 rpdown, rpdownei; + + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); + rpup = I915_READ_FW(GEN6_RP_CUR_UP) & GEN6_RP_EI_MASK; + rpupei = I915_READ_FW(GEN6_RP_CUR_UP_EI) & GEN6_RP_EI_MASK; + rpdown = I915_READ_FW(GEN6_RP_CUR_DOWN) & GEN6_RP_EI_MASK; + rpdownei = I915_READ_FW(GEN6_RP_CUR_DOWN_EI) & GEN6_RP_EI_MASK; + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); + + seq_printf(m, "\nRPS Autotuning (current \"%s\" window):\n", + rps_power_to_str(dev_priv->rps.power)); + seq_printf(m, " Avg. up: %d%% [above threshold? %d%%]\n", + 100 * rpup / rpupei, + dev_priv->rps.up_threshold); + seq_printf(m, " Avg. down: %d%% [below threshold? %d%%]\n", + 100 * rpdown / rpdownei, + dev_priv->rps.down_threshold); + } else { + seq_puts(m, "\nRPS Autotuning inactive\n"); + } + return 0; } @@ -2547,6 +2498,7 @@ static void i915_guc_client_info(struct seq_file *m, struct i915_guc_client *client) { struct intel_engine_cs *engine; + enum intel_engine_id id; uint64_t tot = 0; seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n", @@ -2557,15 +2509,14 @@ static void i915_guc_client_info(struct seq_file *m, client->wq_size, client->wq_offset, client->wq_tail); seq_printf(m, "\tWork queue full: %u\n", client->no_wq_space); - seq_printf(m, "\tFailed to queue: %u\n", client->q_fail); seq_printf(m, "\tFailed doorbell: %u\n", client->b_fail); seq_printf(m, "\tLast submission result: %d\n", client->retcode); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = client->submissions[id]; + tot += submissions; seq_printf(m, "\tSubmissions: %llu %s\n", - client->submissions[engine->id], - engine->name); - tot += client->submissions[engine->id]; + submissions, engine->name); } seq_printf(m, "\tTotal: %llu\n", tot); } @@ -2578,6 +2529,7 @@ static int i915_guc_info(struct seq_file *m, void *data) struct intel_guc guc; struct i915_guc_client client = {}; struct intel_engine_cs *engine; + enum intel_engine_id id; u64 total = 0; if (!HAS_GUC_SCHED(dev_priv)) @@ -2604,11 +2556,11 @@ static int i915_guc_info(struct seq_file *m, void *data) seq_printf(m, "GuC last action error code: %d\n", guc.action_err); seq_printf(m, "\nGuC submissions:\n"); - for_each_engine(engine, dev_priv) { + for_each_engine_id(engine, dev_priv, id) { + u64 submissions = guc.submissions[id]; + total += submissions; seq_printf(m, "\t%-24s: %10llu, last seqno 0x%08x\n", - engine->name, guc.submissions[engine->id], - guc.last_seqno[engine->id]); - total += guc.submissions[engine->id]; + engine->name, submissions, guc.last_seqno[id]); } seq_printf(m, "\t%s: %llu\n", "Total", total); @@ -2625,15 +2577,15 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) struct drm_info_node *node = m->private; struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_gem_object *log_obj = dev_priv->guc.log_obj; - u32 *log; + struct drm_i915_gem_object *obj; int i = 0, pg; - if (!log_obj) + if (!dev_priv->guc.log_vma) return 0; - for (pg = 0; pg < log_obj->base.size / PAGE_SIZE; pg++) { - log = kmap_atomic(i915_gem_object_get_page(log_obj, pg)); + obj = dev_priv->guc.log_vma->obj; + for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) { + u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg)); for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4) seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n", @@ -3237,7 +3189,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) struct drm_device *dev = node->minor->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct intel_engine_cs *engine; - int num_rings = hweight32(INTEL_INFO(dev)->ring_mask); + int num_rings = INTEL_INFO(dev)->num_rings; enum intel_engine_id id; int j, ret; @@ -3255,7 +3207,7 @@ static int i915_semaphore_status(struct seq_file *m, void *unused) struct page *page; uint64_t *seqno; - page = i915_gem_object_get_page(dev_priv->semaphore_obj, 0); + page = i915_gem_object_get_page(dev_priv->semaphore->obj, 0); seqno = (uint64_t *)kmap_atomic(page); for_each_engine_id(engine, dev_priv, id) { @@ -5386,9 +5338,7 @@ static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, {"i915_gem_gtt", i915_gem_gtt_info, 0}, - {"i915_gem_pinned", i915_gem_gtt_info, 0, (void *) PINNED_LIST}, - {"i915_gem_active", i915_gem_object_list_info, 0, (void *) ACTIVE_LIST}, - {"i915_gem_inactive", i915_gem_object_list_info, 0, (void *) INACTIVE_LIST}, + {"i915_gem_pin_display", i915_gem_gtt_info, 0, (void *)1}, {"i915_gem_stolen", i915_gem_stolen_list_info }, {"i915_gem_pageflip", i915_gem_pageflip_info, 0}, {"i915_gem_request", i915_gem_request_info, 0}, diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 57eb380..13ae340 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); + i915_memcpy_init_early(dev_priv); + ret = i915_workqueues_init(dev_priv); if (ret < 0) return ret; @@ -1560,6 +1562,7 @@ static int i915_drm_resume(struct drm_device *dev) i915_gem_resume(dev); i915_restore_state(dev); + intel_pps_unlock_regs_wa(dev_priv); intel_opregion_setup(dev_priv); intel_init_pch_refclk(dev); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c36d176..e6069057 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -70,7 +70,7 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20160808" +#define DRIVER_DATE "20160822" #undef WARN_ON /* Many gcc seem to no see through this and fall over :( */ @@ -455,15 +455,21 @@ struct intel_opregion { struct intel_overlay; struct intel_overlay_error_state; -#define I915_FENCE_REG_NONE -1 -#define I915_MAX_NUM_FENCES 32 -/* 32 fences + sign bit for FENCE_REG_NONE */ -#define I915_MAX_NUM_FENCE_BITS 6 - struct drm_i915_fence_reg { - struct list_head lru_list; - struct drm_i915_gem_object *obj; + struct list_head link; + struct drm_i915_private *i915; + struct i915_vma *vma; int pin_count; + int id; + /** + * Whether the tiling parameters for the currently + * associated fence register have changed. Note that + * for the purposes of tracking tiling changes we also + * treat the unfenced register, the register slot that + * the object occupies whilst it executes a fenced + * command (such as BLT on gen2/3), as a "fence". + */ + bool dirty; }; struct sdvo_device_mapping { @@ -475,130 +481,6 @@ struct sdvo_device_mapping { u8 ddc_pin; }; -struct intel_display_error_state; - -struct drm_i915_error_state { - struct kref ref; - struct timeval time; - - char error_msg[128]; - bool simulated; - int iommu; - u32 reset_count; - u32 suspend_count; - - /* Generic register state */ - u32 eir; - u32 pgtbl_er; - u32 ier; - u32 gtier[4]; - u32 ccid; - u32 derrmr; - u32 forcewake; - u32 error; /* gen6+ */ - u32 err_int; /* gen7 */ - u32 fault_data0; /* gen8, gen9 */ - u32 fault_data1; /* gen8, gen9 */ - u32 done_reg; - u32 gac_eco; - u32 gam_ecochk; - u32 gab_ctl; - u32 gfx_mode; - u32 extra_instdone[I915_NUM_INSTDONE_REG]; - u64 fence[I915_MAX_NUM_FENCES]; - struct intel_overlay_error_state *overlay; - struct intel_display_error_state *display; - struct drm_i915_error_object *semaphore_obj; - - struct drm_i915_error_engine { - int engine_id; - /* Software tracked state */ - bool waiting; - int num_waiters; - int hangcheck_score; - enum intel_engine_hangcheck_action hangcheck_action; - int num_requests; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - u32 last_seqno; - u32 semaphore_seqno[I915_NUM_ENGINES - 1]; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 instdone; - u32 bbstate; - u32 instpm; - u32 instps; - u32 seqno; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; - - struct drm_i915_error_object { - int page_count; - u64 gtt_offset; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object *wa_ctx; - - struct drm_i915_error_request { - long jiffies; - u32 seqno; - u32 tail; - } *requests; - - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - - pid_t pid; - char comm[TASK_COMM_LEN]; - } engine[I915_NUM_ENGINES]; - - struct drm_i915_error_buffer { - u32 size; - u32 name; - u32 rseqno[I915_NUM_ENGINES], wseqno; - u64 gtt_offset; - u32 read_domains; - u32 write_domain; - s32 fence_reg:I915_MAX_NUM_FENCE_BITS; - s32 pinned:2; - u32 tiling:2; - u32 dirty:1; - u32 purgeable:1; - u32 userptr:1; - s32 engine:4; - u32 cache_level:3; - } **active_bo, **pinned_bo; - - u32 *active_bo_count, *pinned_bo_count; - u32 vm_count; -}; - struct intel_connector; struct intel_encoder; struct intel_crtc_state; @@ -793,6 +675,7 @@ struct intel_device_info { u8 gen; u16 gen_mask; u8 ring_mask; /* Rings supported by the HW */ + u8 num_rings; DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG, SEP_SEMICOLON); /* Register offsets for the various display pipes and transcoders */ int pipe_offsets[I915_MAX_TRANSCODERS]; @@ -822,6 +705,134 @@ struct intel_device_info { #undef DEFINE_FLAG #undef SEP_SEMICOLON +struct intel_display_error_state; + +struct drm_i915_error_state { + struct kref ref; + struct timeval time; + + char error_msg[128]; + bool simulated; + int iommu; + u32 reset_count; + u32 suspend_count; + struct intel_device_info device_info; + + /* Generic register state */ + u32 eir; + u32 pgtbl_er; + u32 ier; + u32 gtier[4]; + u32 ccid; + u32 derrmr; + u32 forcewake; + u32 error; /* gen6+ */ + u32 err_int; /* gen7 */ + u32 fault_data0; /* gen8, gen9 */ + u32 fault_data1; /* gen8, gen9 */ + u32 done_reg; + u32 gac_eco; + u32 gam_ecochk; + u32 gab_ctl; + u32 gfx_mode; + u32 extra_instdone[I915_NUM_INSTDONE_REG]; + u64 fence[I915_MAX_NUM_FENCES]; + struct intel_overlay_error_state *overlay; + struct intel_display_error_state *display; + struct drm_i915_error_object *semaphore; + + struct drm_i915_error_engine { + int engine_id; + /* Software tracked state */ + bool waiting; + int num_waiters; + int hangcheck_score; + enum intel_engine_hangcheck_action hangcheck_action; + struct i915_address_space *vm; + int num_requests; + + /* our own tracking of ring head and tail */ + u32 cpu_ring_head; + u32 cpu_ring_tail; + + u32 last_seqno; + u32 semaphore_seqno[I915_NUM_ENGINES - 1]; + + /* Register state */ + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 instdone; + u32 bbstate; + u32 instpm; + u32 instps; + u32 seqno; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; + + struct drm_i915_error_object { + int page_count; + u64 gtt_offset; + u64 gtt_size; + u32 *pages[0]; + } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + + struct drm_i915_error_object *wa_ctx; + + struct drm_i915_error_request { + long jiffies; + pid_t pid; + u32 seqno; + u32 head; + u32 tail; + } *requests; + + struct drm_i915_error_waiter { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 seqno; + } *waiters; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + + pid_t pid; + char comm[TASK_COMM_LEN]; + } engine[I915_NUM_ENGINES]; + + struct drm_i915_error_buffer { + u32 size; + u32 name; + u32 rseqno[I915_NUM_ENGINES], wseqno; + u64 gtt_offset; + u32 read_domains; + u32 write_domain; + s32 fence_reg:I915_MAX_NUM_FENCE_BITS; + u32 tiling:2; + u32 dirty:1; + u32 purgeable:1; + u32 userptr:1; + s32 engine:4; + u32 cache_level:3; + } *active_bo[I915_NUM_ENGINES], *pinned_bo; + u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; + struct i915_address_space *active_vm[I915_NUM_ENGINES]; +}; + enum i915_cache_level { I915_CACHE_NONE = 0, I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ @@ -878,22 +889,23 @@ struct i915_gem_context { struct drm_i915_private *i915; struct drm_i915_file_private *file_priv; struct i915_hw_ppgtt *ppgtt; + struct pid *pid; struct i915_ctx_hang_stats hang_stats; - /* Unique identifier for this context, used by the hw for tracking */ unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) - unsigned hw_id; + + /* Unique identifier for this context, used by the hw for tracking */ + unsigned int hw_id; u32 user_handle; u32 ggtt_alignment; struct intel_context { - struct drm_i915_gem_object *state; + struct i915_vma *state; struct intel_ring *ring; - struct i915_vma *lrc_vma; uint32_t *lrc_reg_state; u64 lrc_desc; int pin_count; @@ -1061,13 +1073,6 @@ struct intel_gmbus { struct i915_suspend_saved_registers { u32 saveDSPARB; - u32 saveLVDS; - u32 savePP_ON_DELAYS; - u32 savePP_OFF_DELAYS; - u32 savePP_ON; - u32 savePP_OFF; - u32 savePP_CONTROL; - u32 savePP_DIVISOR; u32 saveFBC_CONTROL; u32 saveCACHE_MODE_0; u32 saveMI_ARB_STATE; @@ -1749,12 +1754,14 @@ struct drm_i915_private { uint32_t psr_mmio_base; + uint32_t pps_mmio_base; + wait_queue_head_t gmbus_wait_queue; struct pci_dev *bridge_dev; struct i915_gem_context *kernel_context; struct intel_engine_cs engine[I915_NUM_ENGINES]; - struct drm_i915_gem_object *semaphore_obj; + struct i915_vma *semaphore; u32 next_seqno; struct drm_dma_handle *status_page_dmah; @@ -1840,6 +1847,7 @@ struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; + struct drm_modeset_acquire_ctx reset_ctx; struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ @@ -2171,33 +2179,11 @@ struct drm_i915_gem_object { unsigned int dirty:1; /** - * Fence register bits (if any) for this object. Will be set - * as needed when mapped into the GTT. - * Protected by dev->struct_mutex. - */ - signed int fence_reg:I915_MAX_NUM_FENCE_BITS; - - /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; /** - * Whether the tiling parameters for the currently associated fence - * register have changed. Note that for the purposes of tracking - * tiling changes we also treat the unfenced register, the register - * slot that the object occupies whilst it executes a fenced - * command (such as BLT on gen2/3), as a "fence". - */ - unsigned int fence_dirty:1; - - /** - * Is the object at the current location in the gtt mappable and - * fenceable? Used to avoid costly recalculations. - */ - unsigned int map_and_fenceable:1; - - /** * Whether the current gtt mapping needs to be mappable (and isn't just * mappable by accident). Track pin and fault separate for a more * accurate mappable working set. @@ -2213,6 +2199,7 @@ struct drm_i915_gem_object { unsigned int cache_dirty:1; atomic_t frontbuffer_bits; + unsigned int frontbuffer_ggtt_origin; /* write once */ /** Current tiling stride for the object, if it's tiled. */ unsigned int tiling_and_stride; @@ -2220,7 +2207,6 @@ struct drm_i915_gem_object { #define TILING_MASK (FENCE_MINIMUM_STRIDE-1) #define STRIDE_MASK (~TILING_MASK) - unsigned int has_wc_mmap; /** Count of VMA actually bound by this object */ unsigned int bind_count; unsigned int pin_display; @@ -2245,7 +2231,6 @@ struct drm_i915_gem_object { */ struct i915_gem_active last_read[I915_NUM_ENGINES]; struct i915_gem_active last_write; - struct i915_gem_active last_fence; /** References from framebuffers, locks out tiling changes. */ unsigned long framebuffer_references; @@ -2375,6 +2360,18 @@ i915_gem_object_get_stride(struct drm_i915_gem_object *obj) return obj->tiling_and_stride & STRIDE_MASK; } +static inline struct i915_vma *i915_vma_get(struct i915_vma *vma) +{ + i915_gem_object_get(vma->obj); + return vma; +} + +static inline void i915_vma_put(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->dev->struct_mutex); + i915_gem_object_put(vma->obj); +} + /* * Optimised SGL iterator for GEM objects */ @@ -3066,7 +3063,7 @@ struct drm_i915_gem_object *i915_gem_object_create_from_data( void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file); void i915_gem_free_object(struct drm_gem_object *obj); -int __must_check +struct i915_vma * __must_check i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, u64 size, @@ -3085,9 +3082,6 @@ int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); -int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush); - int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) @@ -3147,13 +3141,20 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) obj->pages_pin_count--; } +enum i915_map_type { + I915_MAP_WB = 0, + I915_MAP_WC, +}; + /** * i915_gem_object_pin_map - return a contiguous mapping of the entire object * @obj - the object to map into kernel address space + * @type - the type of mapping, used to select pgprot_t * * Calls i915_gem_object_pin_pages() to prevent reaping of the object's * pages and then returns a contiguous mapping of the backing storage into - * the kernel address space. + * the kernel address space. Based on the @type of mapping, the PTE will be + * set to either WriteBack or WriteCombine (via pgprot_t). * * The caller must hold the struct_mutex, and is responsible for calling * i915_gem_object_unpin_map() when the mapping is no longer required. @@ -3161,7 +3162,8 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj) * Returns the pointer through which to access the mapped object, or an * ERR_PTR() on error. */ -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj); +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type); /** * i915_gem_object_unpin_map - releases an earlier mapping @@ -3180,6 +3182,20 @@ static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj) i915_gem_object_unpin_pages(obj); } +int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush); +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush); +#define CLFLUSH_BEFORE 0x1 +#define CLFLUSH_AFTER 0x2 +#define CLFLUSH_FLAGS (CLFLUSH_BEFORE | CLFLUSH_AFTER) + +static inline void +i915_gem_obj_finish_shmem_access(struct drm_i915_gem_object *obj) +{ + i915_gem_object_unpin_pages(obj); +} + int __must_check i915_mutex_lock_interruptible(struct drm_device *dev); int i915_gem_object_sync(struct drm_i915_gem_object *obj, struct drm_i915_gem_request *to); @@ -3262,12 +3278,11 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write); int __must_check i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write); -int __must_check +struct i915_vma * __must_check i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view); -void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); +void i915_gem_object_unpin_from_display_plane(struct i915_vma *vma); int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align); int i915_gem_open(struct drm_device *dev, struct drm_file *file); @@ -3287,71 +3302,81 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, struct dma_buf *i915_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gem_obj, int flags); -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm); -static inline u64 -i915_gem_obj_ggtt_offset(struct drm_i915_gem_object *o) -{ - return i915_gem_obj_ggtt_offset_view(o, &i915_ggtt_view_normal); -} - -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view); -bool i915_gem_obj_bound(struct drm_i915_gem_object *o, - struct i915_address_space *vm); - struct i915_vma * i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm); -struct i915_vma * -i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); + struct i915_address_space *vm, + const struct i915_ggtt_view *view); struct i915_vma * i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm); -struct i915_vma * -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); - -static inline struct i915_vma * -i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj) -{ - return i915_gem_obj_to_ggtt_view(obj, &i915_ggtt_view_normal); -} -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj); + struct i915_address_space *vm, + const struct i915_ggtt_view *view); -/* Some GGTT VM helpers */ static inline struct i915_hw_ppgtt * i915_vm_to_ppgtt(struct i915_address_space *vm) { return container_of(vm, struct i915_hw_ppgtt, base); } -static inline bool i915_gem_obj_ggtt_bound(struct drm_i915_gem_object *obj) +static inline struct i915_vma * +i915_gem_object_to_ggtt(struct drm_i915_gem_object *obj, + const struct i915_ggtt_view *view) { - return i915_gem_obj_ggtt_bound_view(obj, &i915_ggtt_view_normal); + return i915_gem_obj_to_vma(obj, &to_i915(obj->base.dev)->ggtt.base, view); } -unsigned long -i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj); - -void i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view); -static inline void -i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj) +static inline unsigned long +i915_gem_object_ggtt_offset(struct drm_i915_gem_object *o, + const struct i915_ggtt_view *view) { - i915_gem_object_ggtt_unpin_view(obj, &i915_ggtt_view_normal); + return i915_ggtt_offset(i915_gem_object_to_ggtt(o, view)); } /* i915_gem_fence.c */ -int __must_check i915_gem_object_get_fence(struct drm_i915_gem_object *obj); -int __must_check i915_gem_object_put_fence(struct drm_i915_gem_object *obj); +int __must_check i915_vma_get_fence(struct i915_vma *vma); +int __must_check i915_vma_put_fence(struct i915_vma *vma); + +/** + * i915_vma_pin_fence - pin fencing state + * @vma: vma to pin fencing for + * + * This pins the fencing state (whether tiled or untiled) to make sure the + * vma (and its object) is ready to be used as a scanout target. Fencing + * status must be synchronize first by calling i915_vma_get_fence(): + * + * The resulting fence pin reference must be released again with + * i915_vma_unpin_fence(). + * + * Returns: + * + * True if the vma has a fence, false otherwise. + */ +static inline bool +i915_vma_pin_fence(struct i915_vma *vma) +{ + if (vma->fence) { + vma->fence->pin_count++; + return true; + } else + return false; +} -bool i915_gem_object_pin_fence(struct drm_i915_gem_object *obj); -void i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj); +/** + * i915_vma_unpin_fence - unpin fencing state + * @vma: vma to unpin fencing for + * + * This releases the fence pin reference acquired through + * i915_vma_pin_fence. It will handle both objects with and without an + * attached fence correctly, callers do not need to distinguish this. + */ +static inline void +i915_vma_unpin_fence(struct i915_vma *vma) +{ + if (vma->fence) { + GEM_BUG_ON(vma->fence->pin_count <= 0); + vma->fence->pin_count--; + } +} void i915_gem_restore_fences(struct drm_device *dev); @@ -3429,6 +3454,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { + wmb(); if (INTEL_GEN(dev_priv) < 6) intel_gtt_chipset_flush(); } @@ -3516,7 +3542,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, @@ -3848,7 +3874,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * is woken. */ if (engine->irq_seqno_barrier && - READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current && + rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh) == current && cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) { struct task_struct *tsk; @@ -3873,7 +3899,7 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) * irq_posted == false but we are still running). */ rcu_read_lock(); - tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); if (tsk && tsk != current) /* Note that if the bottom-half is changed as we * are sending the wake-up, the new bottom-half will @@ -3902,4 +3928,32 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req) return false; } +void i915_memcpy_init_early(struct drm_i915_private *dev_priv); +bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len); + +/* i915_mm.c */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap); + +#define ptr_mask_bits(ptr) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (typeof(ptr))(__v & PAGE_MASK); \ +}) + +#define ptr_unpack_bits(ptr, bits) ({ \ + unsigned long __v = (unsigned long)(ptr); \ + (bits) = __v & ~PAGE_MASK; \ + (typeof(ptr))(__v & PAGE_MASK); \ +}) + +#define ptr_pack_bits(ptr, bits) \ + ((typeof(ptr))((unsigned long)(ptr) | (bits))) + +#define fetch_and_zero(ptr) ({ \ + typeof(*ptr) __T = *(ptr); \ + *(ptr) = (typeof(*ptr))0; \ + __T; \ +}) + #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index f4f8eaa..04607d4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -279,16 +279,25 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; -int -i915_gem_object_unbind(struct drm_i915_gem_object *obj) +int i915_gem_object_unbind(struct drm_i915_gem_object *obj) { struct i915_vma *vma; LIST_HEAD(still_in_list); int ret; - /* The vma will only be freed if it is marked as closed, and if we wait - * upon rendering to the vma, we may unbind anything in the list. + lockdep_assert_held(&obj->base.dev->struct_mutex); + + /* Closed vma are removed from the obj->vma_list - but they may + * still have an active binding on the object. To remove those we + * must wait for all rendering to complete to the object (as unbinding + * must anyway), and retire the requests. */ + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + + i915_gem_retire_requests(to_i915(obj->base.dev)); + while ((vma = list_first_entry_or_null(&obj->vma_list, struct i915_vma, obj_link))) { @@ -600,34 +609,106 @@ __copy_from_user_swizzled(char *gpu_vaddr, int gpu_offset, * flush the object from the CPU cache. */ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, - int *needs_clflush) + unsigned int *needs_clflush) { int ret; *needs_clflush = 0; - if (WARN_ON(!i915_gem_object_has_struct_page(obj))) - return -EINVAL; + if (!i915_gem_object_has_struct_page(obj)) + return -ENODEV; ret = i915_gem_object_wait_rendering(obj, true); if (ret) return ret; - if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) { - /* If we're not in the cpu read domain, set ourself into the gtt - * read domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will dirty the data - * anyway again before the next pread happens. */ + ret = i915_gem_object_get_pages(obj); + if (ret) + return ret; + + i915_gem_object_pin_pages(obj); + + i915_gem_object_flush_gtt_write_domain(obj); + + /* If we're not in the cpu read domain, set ourself into the gtt + * read domain and manually flush cachelines (if required). This + * optimizes for the case when the gpu will dirty the data + * anyway again before the next pread happens. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) *needs_clflush = !cpu_cache_is_coherent(obj->base.dev, obj->cache_level); + + if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, false); + if (ret) + goto err_unpin; + + *needs_clflush = 0; } + /* return with the pages pinned */ + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); + return ret; +} + +int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj, + unsigned int *needs_clflush) +{ + int ret; + + *needs_clflush = 0; + if (!i915_gem_object_has_struct_page(obj)) + return -ENODEV; + + ret = i915_gem_object_wait_rendering(obj, false); + if (ret) + return ret; + ret = i915_gem_object_get_pages(obj); if (ret) return ret; i915_gem_object_pin_pages(obj); + i915_gem_object_flush_gtt_write_domain(obj); + + /* If we're not in the cpu write domain, set ourself into the + * gtt write domain and manually flush cachelines (as required). + * This optimizes for the case when the gpu will use the data + * right away and we therefore have to clflush anyway. + */ + if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) + *needs_clflush |= cpu_write_needs_clflush(obj) << 1; + + /* Same trick applies to invalidate partially written cachelines read + * before writing. + */ + if (!(obj->base.read_domains & I915_GEM_DOMAIN_CPU)) + *needs_clflush |= !cpu_cache_is_coherent(obj->base.dev, + obj->cache_level); + + if (*needs_clflush && !static_cpu_has(X86_FEATURE_CLFLUSH)) { + ret = i915_gem_object_set_to_cpu_domain(obj, true); + if (ret) + goto err_unpin; + + *needs_clflush = 0; + } + + if ((*needs_clflush & CLFLUSH_AFTER) == 0) + obj->cache_dirty = true; + + intel_fb_obj_invalidate(obj, ORIGIN_CPU); + obj->dirty = 1; + /* return with the pages pinned */ + return 0; + +err_unpin: + i915_gem_object_unpin_pages(obj); return ret; } @@ -737,14 +818,24 @@ i915_gem_gtt_pread(struct drm_device *dev, { struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct i915_vma *vma; struct drm_mm_node node; char __user *user_data; uint64_t remain; uint64_t offset; int ret; - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); - if (ret) { + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); + if (!IS_ERR(vma)) { + node.start = i915_ggtt_offset(vma); + node.allocated = false; + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + vma = ERR_PTR(ret); + } + } + if (IS_ERR(vma)) { ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); if (ret) goto out; @@ -756,12 +847,6 @@ i915_gem_gtt_pread(struct drm_device *dev, } i915_gem_object_pin_pages(obj); - } else { - node.start = i915_gem_obj_ggtt_offset(obj); - node.allocated = false; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto out_unpin; } ret = i915_gem_object_set_to_gtt_domain(obj, false); @@ -806,7 +891,7 @@ i915_gem_gtt_pread(struct drm_device *dev, * and write to user memory which may result into page * faults, and so we cannot perform this under struct_mutex. */ - if (slow_user_access(ggtt->mappable, page_base, + if (slow_user_access(&ggtt->mappable, page_base, page_offset, user_data, page_length, false)) { ret = -EFAULT; @@ -838,7 +923,7 @@ out_unpin: i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { - i915_gem_object_ggtt_unpin(obj); + i915_vma_unpin(vma); } out: return ret; @@ -859,19 +944,14 @@ i915_gem_shmem_pread(struct drm_device *dev, int needs_clflush = 0; struct sg_page_iter sg_iter; - if (!i915_gem_object_has_struct_page(obj)) - return -ENODEV; - - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - ret = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush); if (ret) return ret; + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -927,7 +1007,7 @@ next_page: } out: - i915_gem_object_unpin_pages(obj); + i915_gem_obj_finish_shmem_access(obj); return ret; } @@ -1036,6 +1116,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, { struct i915_ggtt *ggtt = &i915->ggtt; struct drm_device *dev = obj->base.dev; + struct i915_vma *vma; struct drm_mm_node node; uint64_t remain, offset; char __user *user_data; @@ -1045,9 +1126,18 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, if (i915_gem_object_is_tiled(obj)) return -EFAULT; - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) { + if (!IS_ERR(vma)) { + node.start = i915_ggtt_offset(vma); + node.allocated = false; + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + vma = ERR_PTR(ret); + } + } + if (IS_ERR(vma)) { ret = insert_mappable_node(i915, &node, PAGE_SIZE); if (ret) goto out; @@ -1059,19 +1149,13 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, } i915_gem_object_pin_pages(obj); - } else { - node.start = i915_gem_obj_ggtt_offset(obj); - node.allocated = false; - ret = i915_gem_object_put_fence(obj); - if (ret) - goto out_unpin; } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) goto out_unpin; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + intel_fb_obj_invalidate(obj, ORIGIN_CPU); obj->dirty = true; user_data = u64_to_user_ptr(args->data_ptr); @@ -1103,11 +1187,11 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, * If the object is non-shmem backed, we retry again with the * path that handles page fault. */ - if (fast_user_write(ggtt->mappable, page_base, + if (fast_user_write(&ggtt->mappable, page_base, page_offset, user_data, page_length)) { hit_slow_path = true; mutex_unlock(&dev->struct_mutex); - if (slow_user_access(ggtt->mappable, + if (slow_user_access(&ggtt->mappable, page_base, page_offset, user_data, page_length, true)) { @@ -1138,7 +1222,7 @@ out_flush: } } - intel_fb_obj_flush(obj, false, ORIGIN_GTT); + intel_fb_obj_flush(obj, false, ORIGIN_CPU); out_unpin: if (node.allocated) { wmb(); @@ -1148,7 +1232,7 @@ out_unpin: i915_gem_object_unpin_pages(obj); remove_mappable_node(&node); } else { - i915_gem_object_ggtt_unpin(obj); + i915_vma_unpin(vma); } out: return ret; @@ -1231,42 +1315,17 @@ i915_gem_shmem_pwrite(struct drm_device *dev, int shmem_page_offset, page_length, ret = 0; int obj_do_bit17_swizzling, page_do_bit17_swizzling; int hit_slowpath = 0; - int needs_clflush_after = 0; - int needs_clflush_before = 0; + unsigned int needs_clflush; struct sg_page_iter sg_iter; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); - - ret = i915_gem_object_wait_rendering(obj, false); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) { - /* If we're not in the cpu write domain, set ourself into the gtt - * write domain and manually flush cachelines (if required). This - * optimizes for the case when the gpu will use the data - * right away and we therefore have to clflush anyway. */ - needs_clflush_after = cpu_write_needs_clflush(obj); - } - /* Same trick applies to invalidate partially written cachelines read - * before writing. */ - if ((obj->base.read_domains & I915_GEM_DOMAIN_CPU) == 0) - needs_clflush_before = - !cpu_cache_is_coherent(dev, obj->cache_level); - - ret = i915_gem_object_get_pages(obj); - if (ret) - return ret; - - intel_fb_obj_invalidate(obj, ORIGIN_CPU); - - i915_gem_object_pin_pages(obj); - + obj_do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj); + user_data = u64_to_user_ptr(args->data_ptr); offset = args->offset; - obj->dirty = 1; + remain = args->size; for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, offset >> PAGE_SHIFT) { @@ -1290,7 +1349,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, /* If we don't overwrite a cacheline completely we need to be * careful to have up-to-date data by first clflushing. Don't * overcomplicate things and flush the entire patch. */ - partial_cacheline_write = needs_clflush_before && + partial_cacheline_write = needs_clflush & CLFLUSH_BEFORE && ((shmem_page_offset | page_length) & (boot_cpu_data.x86_clflush_size - 1)); @@ -1300,7 +1359,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_fast(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); if (ret == 0) goto next_page; @@ -1309,7 +1368,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev, ret = shmem_pwrite_slow(page, shmem_page_offset, page_length, user_data, page_do_bit17_swizzling, partial_cacheline_write, - needs_clflush_after); + needs_clflush & CLFLUSH_AFTER); mutex_lock(&dev->struct_mutex); @@ -1323,7 +1382,7 @@ next_page: } out: - i915_gem_object_unpin_pages(obj); + i915_gem_obj_finish_shmem_access(obj); if (hit_slowpath) { /* @@ -1331,17 +1390,15 @@ out: * cachelines in-line while writing and the object moved * out of the cpu write domain while we've dropped the lock. */ - if (!needs_clflush_after && + if (!(needs_clflush & CLFLUSH_AFTER) && obj->base.write_domain != I915_GEM_DOMAIN_CPU) { if (i915_gem_clflush_object(obj, obj->pin_display)) - needs_clflush_after = true; + needs_clflush |= CLFLUSH_AFTER; } } - if (needs_clflush_after) + if (needs_clflush & CLFLUSH_AFTER) i915_gem_chipset_flush(to_i915(dev)); - else - obj->cache_dirty = true; intel_fb_obj_flush(obj, false, ORIGIN_CPU); return ret; @@ -1420,10 +1477,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); - else if (i915_gem_object_has_struct_page(obj)) - ret = i915_gem_shmem_pwrite(dev, obj, args, file); else - ret = -ENODEV; + ret = i915_gem_shmem_pwrite(dev, obj, args, file); } i915_gem_object_put(obj); @@ -1439,11 +1494,11 @@ err: return ret; } -static enum fb_op_origin +static inline enum fb_op_origin write_origin(struct drm_i915_gem_object *obj, unsigned domain) { - return domain == I915_GEM_DOMAIN_GTT && !obj->has_wc_mmap ? - ORIGIN_GTT : ORIGIN_CPU; + return (domain == I915_GEM_DOMAIN_GTT ? + obj->frontbuffer_ggtt_origin : ORIGIN_CPU); } /** @@ -1603,7 +1658,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, up_write(&mm->mmap_sem); /* This may race, but that's ok, it only gets set */ - WRITE_ONCE(obj->has_wc_mmap, true); + WRITE_ONCE(obj->frontbuffer_ggtt_origin, ORIGIN_CPU); } i915_gem_object_put_unlocked(obj); if (IS_ERR((void *)addr)) @@ -1614,9 +1669,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, return 0; } +static unsigned int tile_row_pages(struct drm_i915_gem_object *obj) +{ + u64 size; + + size = i915_gem_object_get_stride(obj); + size *= i915_gem_object_get_tiling(obj) == I915_TILING_Y ? 32 : 8; + + return size >> PAGE_SHIFT; +} + /** * i915_gem_fault - fault a page into the GTT - * @vma: VMA in question + * @area: CPU VMA in question * @vmf: fault info * * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped @@ -1630,20 +1695,21 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * suffer if the GTT working set is large or there are few fence registers * left. */ -int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf) { - struct drm_i915_gem_object *obj = to_intel_bo(vma->vm_private_data); +#define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */ + struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data); struct drm_device *dev = obj->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_ggtt_view view = i915_ggtt_view_normal; bool write = !!(vmf->flags & FAULT_FLAG_WRITE); + struct i915_vma *vma; pgoff_t page_offset; - unsigned long pfn; + unsigned int flags; int ret; /* We don't use vmf->pgoff since that has the fake offset */ - page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >> + page_offset = ((unsigned long)vmf->virtual_address - area->vm_start) >> PAGE_SHIFT; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -1669,79 +1735,71 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) goto err_unlock; } - /* Use a partial view if the object is bigger than the aperture. */ - if (obj->base.size >= ggtt->mappable_end && - !i915_gem_object_is_tiled(obj)) { - static const unsigned int chunk_size = 256; // 1 MiB + /* If the object is smaller than a couple of partial vma, it is + * not worth only creating a single partial vma - we may as well + * clear enough space for the full object. + */ + flags = PIN_MAPPABLE; + if (obj->base.size > 2 * MIN_CHUNK_PAGES << PAGE_SHIFT) + flags |= PIN_NONBLOCK | PIN_NONFAULT; + + /* Now pin it into the GTT as needed */ + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, flags); + if (IS_ERR(vma)) { + struct i915_ggtt_view view; + unsigned int chunk_size; + + /* Use a partial view if it is bigger than available space */ + chunk_size = MIN_CHUNK_PAGES; + if (i915_gem_object_is_tiled(obj)) + chunk_size = max(chunk_size, tile_row_pages(obj)); memset(&view, 0, sizeof(view)); view.type = I915_GGTT_VIEW_PARTIAL; view.params.partial.offset = rounddown(page_offset, chunk_size); view.params.partial.size = - min_t(unsigned int, - chunk_size, - (vma->vm_end - vma->vm_start)/PAGE_SIZE - + min_t(unsigned int, chunk_size, + (area->vm_end - area->vm_start) / PAGE_SIZE - view.params.partial.offset); - } - /* Now pin it into the GTT if needed */ - ret = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); - if (ret) + /* If the partial covers the entire object, just create a + * normal VMA. + */ + if (chunk_size >= obj->base.size >> PAGE_SHIFT) + view.type = I915_GGTT_VIEW_NORMAL; + + /* Userspace is now writing through an untracked VMA, abandon + * all hope that the hardware is able to track future writes. + */ + obj->frontbuffer_ggtt_origin = ORIGIN_CPU; + + vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); + } + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err_unlock; + } ret = i915_gem_object_set_to_gtt_domain(obj, write); if (ret) goto err_unpin; - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) goto err_unpin; /* Finally, remap it using the new GTT offset */ - pfn = ggtt->mappable_base + - i915_gem_obj_ggtt_offset_view(obj, &view); - pfn >>= PAGE_SHIFT; - - if (unlikely(view.type == I915_GGTT_VIEW_PARTIAL)) { - /* Overriding existing pages in partial view does not cause - * us any trouble as TLBs are still valid because the fault - * is due to userspace losing part of the mapping or never - * having accessed it before (at this partials' range). - */ - unsigned long base = vma->vm_start + - (view.params.partial.offset << PAGE_SHIFT); - unsigned int i; - - for (i = 0; i < view.params.partial.size; i++) { - ret = vm_insert_pfn(vma, base + i * PAGE_SIZE, pfn + i); - if (ret) - break; - } - - obj->fault_mappable = true; - } else { - if (!obj->fault_mappable) { - unsigned long size = min_t(unsigned long, - vma->vm_end - vma->vm_start, - obj->base.size); - int i; - - for (i = 0; i < size >> PAGE_SHIFT; i++) { - ret = vm_insert_pfn(vma, - (unsigned long)vma->vm_start + i * PAGE_SIZE, - pfn + i); - if (ret) - break; - } + ret = remap_io_mapping(area, + area->vm_start + (vma->ggtt_view.params.partial.offset << PAGE_SHIFT), + (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT, + min_t(u64, vma->size, area->vm_end - area->vm_start), + &ggtt->mappable); + if (ret) + goto err_unpin; - obj->fault_mappable = true; - } else - ret = vm_insert_pfn(vma, - (unsigned long)vmf->virtual_address, - pfn + page_offset); - } + obj->fault_mappable = true; err_unpin: - i915_gem_object_ggtt_unpin_view(obj, &view); + __i915_vma_unpin(vma); err_unlock: mutex_unlock(&dev->struct_mutex); err_rpm: @@ -2077,10 +2135,14 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj) list_del(&obj->global_list); if (obj->mapping) { - if (is_vmalloc_addr(obj->mapping)) - vunmap(obj->mapping); + void *ptr; + + ptr = ptr_mask_bits(obj->mapping); + if (is_vmalloc_addr(ptr)) + vunmap(ptr); else - kunmap(kmap_to_page(obj->mapping)); + kunmap(kmap_to_page(ptr)); + obj->mapping = NULL; } @@ -2253,7 +2315,8 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj) } /* The 'mapping' part of i915_gem_object_pin_map() below */ -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj, + enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT; struct sg_table *sgt = obj->pages; @@ -2262,10 +2325,11 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) struct page *stack_pages[32]; struct page **pages = stack_pages; unsigned long i = 0; + pgprot_t pgprot; void *addr; /* A single page can always be kmapped */ - if (n_pages == 1) + if (n_pages == 1 && type == I915_MAP_WB) return kmap(sg_page(sgt->sgl)); if (n_pages > ARRAY_SIZE(stack_pages)) { @@ -2281,7 +2345,15 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) /* Check that we have the expected number of pages */ GEM_BUG_ON(i != n_pages); - addr = vmap(pages, n_pages, 0, PAGE_KERNEL); + switch (type) { + case I915_MAP_WB: + pgprot = PAGE_KERNEL; + break; + case I915_MAP_WC: + pgprot = pgprot_writecombine(PAGE_KERNEL_IO); + break; + } + addr = vmap(pages, n_pages, 0, pgprot); if (pages != stack_pages) drm_free_large(pages); @@ -2290,27 +2362,54 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj) } /* get, pin, and map the pages of the object into kernel space */ -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj) +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, + enum i915_map_type type) { + enum i915_map_type has_type; + bool pinned; + void *ptr; int ret; lockdep_assert_held(&obj->base.dev->struct_mutex); + GEM_BUG_ON(!i915_gem_object_has_struct_page(obj)); ret = i915_gem_object_get_pages(obj); if (ret) return ERR_PTR(ret); i915_gem_object_pin_pages(obj); + pinned = obj->pages_pin_count > 1; - if (!obj->mapping) { - obj->mapping = i915_gem_object_map(obj); - if (!obj->mapping) { - i915_gem_object_unpin_pages(obj); - return ERR_PTR(-ENOMEM); + ptr = ptr_unpack_bits(obj->mapping, has_type); + if (ptr && has_type != type) { + if (pinned) { + ret = -EBUSY; + goto err; } + + if (is_vmalloc_addr(ptr)) + vunmap(ptr); + else + kunmap(kmap_to_page(ptr)); + + ptr = obj->mapping = NULL; + } + + if (!ptr) { + ptr = i915_gem_object_map(obj, type); + if (!ptr) { + ret = -ENOMEM; + goto err; + } + + obj->mapping = ptr_pack_bits(ptr, type); } - return obj->mapping; + return ptr; + +err: + i915_gem_object_unpin_pages(obj); + return ERR_PTR(ret); } static void @@ -2423,15 +2522,11 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) struct drm_i915_gem_request *request; struct intel_ring *ring; - request = i915_gem_active_peek(&engine->last_request, - &engine->i915->drm.struct_mutex); - /* Mark all pending requests as complete so that any concurrent * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - if (request) - intel_engine_init_seqno(engine, request->fence.seqno); + intel_engine_init_seqno(engine, engine->last_submitted_seqno); /* * Clear the execlists queue up before freeing the requests, as those @@ -2453,6 +2548,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ + request = i915_gem_active_raw(&engine->last_request, + &engine->i915->drm.struct_mutex); if (request) i915_gem_request_retire_upto(request); GEM_BUG_ON(intel_engine_is_active(engine)); @@ -2526,7 +2623,6 @@ i915_gem_idle_work_handler(struct work_struct *work) container_of(work, typeof(*dev_priv), gt.idle_work.work); struct drm_device *dev = &dev_priv->drm; struct intel_engine_cs *engine; - unsigned int stuck_engines; bool rearm_hangcheck; if (!READ_ONCE(dev_priv->gt.awake)) @@ -2556,15 +2652,6 @@ i915_gem_idle_work_handler(struct work_struct *work) dev_priv->gt.awake = false; rearm_hangcheck = false; - /* As we have disabled hangcheck, we need to unstick any waiters still - * hanging around. However, as we may be racing against the interrupt - * handler or the waiters themselves, we skip enabling the fake-irq. - */ - stuck_engines = intel_kick_waiters(dev_priv); - if (unlikely(stuck_engines)) - DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n", - stuck_engines); - if (INTEL_GEN(dev_priv) >= 6) gen6_rps_idle(dev_priv); intel_runtime_pm_put(dev_priv); @@ -2734,27 +2821,6 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj, return 0; } -static void i915_gem_object_finish_gtt(struct drm_i915_gem_object *obj) -{ - u32 old_write_domain, old_read_domains; - - /* Force a pagefault for domain tracking on next user access */ - i915_gem_release_mmap(obj); - - if ((obj->base.read_domains & I915_GEM_DOMAIN_GTT) == 0) - return; - - old_read_domains = obj->base.read_domains; - old_write_domain = obj->base.write_domain; - - obj->base.read_domains &= ~I915_GEM_DOMAIN_GTT; - obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT; - - trace_i915_gem_object_change_domain(obj, - old_read_domains, - old_write_domain); -} - static void __i915_vma_iounmap(struct i915_vma *vma) { GEM_BUG_ON(i915_vma_is_pinned(vma)); @@ -2809,16 +2875,17 @@ int i915_vma_unbind(struct i915_vma *vma) GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!obj->pages); - if (i915_vma_is_ggtt(vma) && - vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - i915_gem_object_finish_gtt(obj); - + if (i915_vma_is_map_and_fenceable(vma)) { /* release the fence reg _after_ flushing */ - ret = i915_gem_object_put_fence(obj); + ret = i915_vma_put_fence(vma); if (ret) return ret; + /* Force a pagefault for domain tracking on next user access */ + i915_gem_release_mmap(obj); + __i915_vma_iounmap(vma); + vma->flags &= ~I915_VMA_CAN_FENCE; } if (likely(!vma->vm->closed)) { @@ -2830,15 +2897,12 @@ int i915_vma_unbind(struct i915_vma *vma) drm_mm_remove_node(&vma->node); list_move_tail(&vma->vm_link, &vma->vm->unbound_list); - if (i915_vma_is_ggtt(vma)) { - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) { - obj->map_and_fenceable = false; - } else if (vma->ggtt_view.pages) { - sg_free_table(vma->ggtt_view.pages); - kfree(vma->ggtt_view.pages); - } - vma->ggtt_view.pages = NULL; + if (vma->pages != obj->pages) { + GEM_BUG_ON(!vma->pages); + sg_free_table(vma->pages); + kfree(vma->pages); } + vma->pages = NULL; /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ @@ -2930,7 +2994,6 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); struct drm_i915_gem_object *obj = vma->obj; u64 start, end; - u64 min_alignment; int ret; GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND)); @@ -2941,17 +3004,10 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) size = i915_gem_get_ggtt_size(dev_priv, size, i915_gem_object_get_tiling(obj)); - min_alignment = - i915_gem_get_ggtt_alignment(dev_priv, size, - i915_gem_object_get_tiling(obj), - flags & PIN_MAPPABLE); - if (alignment == 0) - alignment = min_alignment; - if (alignment & (min_alignment - 1)) { - DRM_DEBUG("Invalid object alignment requested %llu, minimum %llu\n", - alignment, min_alignment); - return -EINVAL; - } + alignment = max(max(alignment, vma->display_alignment), + i915_gem_get_ggtt_alignment(dev_priv, size, + i915_gem_object_get_tiling(obj), + flags & PIN_MAPPABLE)); start = flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0; @@ -3091,51 +3147,72 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj, static void i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj) { - uint32_t old_write_domain; + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); if (obj->base.write_domain != I915_GEM_DOMAIN_GTT) return; /* No actual flushing is required for the GTT write domain. Writes - * to it immediately go to main memory as far as we know, so there's + * to it "immediately" go to main memory as far as we know, so there's * no chipset flush. It also doesn't land in render cache. * * However, we do have to enforce the order so that all writes through * the GTT land before any writes to the device, such as updates to * the GATT itself. + * + * We also have to wait a bit for the writes to land from the GTT. + * An uncached read (i.e. mmio) seems to be ideal for the round-trip + * timing. This issue has only been observed when switching quickly + * between GTT writes and CPU reads from inside the kernel on recent hw, + * and it appears to only affect discrete GTT blocks (i.e. on LLC + * system agents we cannot reproduce this behaviour). */ wmb(); + if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) + POSTING_READ(RING_ACTHD(dev_priv->engine[RCS].mmio_base)); - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - - intel_fb_obj_flush(obj, false, ORIGIN_GTT); + intel_fb_obj_flush(obj, false, write_origin(obj, I915_GEM_DOMAIN_GTT)); + obj->base.write_domain = 0; trace_i915_gem_object_change_domain(obj, obj->base.read_domains, - old_write_domain); + I915_GEM_DOMAIN_GTT); } /** Flushes the CPU write domain for the object if it's dirty. */ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *obj) { - uint32_t old_write_domain; - if (obj->base.write_domain != I915_GEM_DOMAIN_CPU) return; if (i915_gem_clflush_object(obj, obj->pin_display)) i915_gem_chipset_flush(to_i915(obj->base.dev)); - old_write_domain = obj->base.write_domain; - obj->base.write_domain = 0; - intel_fb_obj_flush(obj, false, ORIGIN_CPU); + obj->base.write_domain = 0; trace_i915_gem_object_change_domain(obj, obj->base.read_domains, - old_write_domain); + I915_GEM_DOMAIN_CPU); +} + +static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!i915_vma_is_ggtt(vma)) + continue; + + if (i915_vma_is_active(vma)) + continue; + + if (!drm_mm_node_allocated(&vma->node)) + continue; + + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + } } /** @@ -3150,7 +3227,6 @@ int i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) { uint32_t old_write_domain, old_read_domains; - struct i915_vma *vma; int ret; ret = i915_gem_object_wait_rendering(obj, !write); @@ -3200,11 +3276,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) old_write_domain); /* And bump the LRU for this access */ - vma = i915_gem_obj_to_ggtt(obj); - if (vma && - drm_mm_node_allocated(&vma->node) && - !i915_vma_is_active(vma)) - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + i915_gem_object_bump_inactive_ggtt(obj); return 0; } @@ -3295,9 +3367,11 @@ restart: * dropped the fence as all snoopable access is * supposed to be linear. */ - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + ret = i915_vma_put_fence(vma); + if (ret) + return ret; + } } else { /* We either have incoherent backing store and * so no GTT access or the architecture is fully @@ -3424,11 +3498,12 @@ rpm_put: * Can be called from an uninterruptible phase (modesetting) and allows * any flushes to be pipelined (for pageflips). */ -int +struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, u32 alignment, const struct i915_ggtt_view *view) { + struct i915_vma *vma; u32 old_read_domains, old_write_domain; int ret; @@ -3448,19 +3523,31 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, */ ret = i915_gem_object_set_cache_level(obj, HAS_WT(obj->base.dev) ? I915_CACHE_WT : I915_CACHE_NONE); - if (ret) + if (ret) { + vma = ERR_PTR(ret); goto err_unpin_display; + } /* As the user may map the buffer once pinned in the display plane * (e.g. libkms for the bootup splash), we have to ensure that we - * always use map_and_fenceable for all scanout buffers. + * always use map_and_fenceable for all scanout buffers. However, + * it may simply be too big to fit into mappable, in which case + * put it anyway and hope that userspace can cope (but always first + * try to preserve the existing ABI). */ - ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment, - view->type == I915_GGTT_VIEW_NORMAL ? - PIN_MAPPABLE : 0); - if (ret) + vma = ERR_PTR(-ENOSPC); + if (view->type == I915_GGTT_VIEW_NORMAL) + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) + vma = i915_gem_object_ggtt_pin(obj, view, 0, alignment, 0); + if (IS_ERR(vma)) goto err_unpin_display; + vma->display_alignment = max_t(u64, vma->display_alignment, alignment); + + WARN_ON(obj->pin_display > i915_vma_pin_count(vma)); + i915_gem_object_flush_cpu_write_domain(obj); old_write_domain = obj->base.write_domain; @@ -3476,23 +3563,28 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, old_read_domains, old_write_domain); - return 0; + return vma; err_unpin_display: obj->pin_display--; - return ret; + return vma; } void -i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) +i915_gem_object_unpin_from_display_plane(struct i915_vma *vma) { - if (WARN_ON(obj->pin_display == 0)) + if (WARN_ON(vma->obj->pin_display == 0)) return; - i915_gem_object_ggtt_unpin_view(obj, view); + if (--vma->obj->pin_display == 0) + vma->display_alignment = 0; - obj->pin_display--; + /* Bump the LRU to try and avoid premature eviction whilst flipping */ + if (!i915_vma_is_active(vma)) + list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + i915_vma_unpin(vma); + WARN_ON(vma->obj->pin_display > i915_vma_pin_count(vma)); } /** @@ -3605,8 +3697,6 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file) static bool i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) { - struct drm_i915_gem_object *obj = vma->obj; - if (!drm_mm_node_allocated(&vma->node)) return false; @@ -3616,7 +3706,7 @@ i915_vma_misplaced(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) if (alignment && vma->node.start & (alignment - 1)) return true; - if (flags & PIN_MAPPABLE && !obj->map_and_fenceable) + if (flags & PIN_MAPPABLE && !i915_vma_is_map_and_fenceable(vma)) return true; if (flags & PIN_OFFSET_BIAS && @@ -3638,10 +3728,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) u32 fence_size, fence_alignment; fence_size = i915_gem_get_ggtt_size(dev_priv, - obj->base.size, + vma->size, i915_gem_object_get_tiling(obj)); fence_alignment = i915_gem_get_ggtt_alignment(dev_priv, - obj->base.size, + vma->size, i915_gem_object_get_tiling(obj), true); @@ -3651,7 +3741,10 @@ void __i915_vma_set_map_and_fenceable(struct i915_vma *vma) mappable = (vma->node.start + fence_size <= dev_priv->ggtt.mappable_end); - obj->map_and_fenceable = mappable && fenceable; + if (mappable && fenceable) + vma->flags |= I915_VMA_CAN_FENCE; + else + vma->flags &= ~I915_VMA_CAN_FENCE; } int __i915_vma_do_pin(struct i915_vma *vma, @@ -3689,53 +3782,46 @@ err: return ret; } -int +struct i915_vma * i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj, const struct i915_ggtt_view *view, u64 size, u64 alignment, u64 flags) { + struct i915_address_space *vm = &to_i915(obj->base.dev)->ggtt.base; struct i915_vma *vma; int ret; - if (!view) - view = &i915_ggtt_view_normal; - - vma = i915_gem_obj_lookup_or_create_ggtt_vma(obj, view); + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, view); if (IS_ERR(vma)) - return PTR_ERR(vma); + return vma; if (i915_vma_misplaced(vma, size, alignment, flags)) { if (flags & PIN_NONBLOCK && (i915_vma_is_pinned(vma) || i915_vma_is_active(vma))) - return -ENOSPC; + return ERR_PTR(-ENOSPC); WARN(i915_vma_is_pinned(vma), "bo is already pinned in ggtt with incorrect alignment:" - " offset=%08x %08x, req.alignment=%llx, req.map_and_fenceable=%d," - " obj->map_and_fenceable=%d\n", - upper_32_bits(vma->node.start), - lower_32_bits(vma->node.start), - alignment, + " offset=%08x, req.alignment=%llx," + " req.map_and_fenceable=%d, vma->map_and_fenceable=%d\n", + i915_ggtt_offset(vma), alignment, !!(flags & PIN_MAPPABLE), - obj->map_and_fenceable); + i915_vma_is_map_and_fenceable(vma)); ret = i915_vma_unbind(vma); if (ret) - return ret; + return ERR_PTR(ret); } - return i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); -} + ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); + if (ret) + return ERR_PTR(ret); -void -i915_gem_object_ggtt_unpin_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - i915_vma_unpin(i915_gem_obj_to_ggtt_view(obj, view)); + return vma; } -static __always_inline unsigned __busy_read_flag(unsigned int id) +static __always_inline unsigned int __busy_read_flag(unsigned int id) { /* Note that we could alias engines in the execbuf API, but * that would be very unwise as it prevents userspace from @@ -3750,39 +3836,92 @@ static __always_inline unsigned __busy_read_flag(unsigned int id) static __always_inline unsigned int __busy_write_id(unsigned int id) { - return id; + /* The uABI guarantees an active writer is also amongst the read + * engines. This would be true if we accessed the activity tracking + * under the lock, but as we perform the lookup of the object and + * its activity locklessly we can not guarantee that the last_write + * being active implies that we have set the same engine flag from + * last_read - hence we always set both read and write busy for + * last_write. + */ + return id | __busy_read_flag(id); } -static __always_inline unsigned +static __always_inline unsigned int __busy_set_if_active(const struct i915_gem_active *active, unsigned int (*flag)(unsigned int id)) { - /* For more discussion about the barriers and locking concerns, - * see __i915_gem_active_get_rcu(). - */ - do { - struct drm_i915_gem_request *request; - unsigned int id; - - request = rcu_dereference(active->request); - if (!request || i915_gem_request_completed(request)) - return 0; + struct drm_i915_gem_request *request; - id = request->engine->exec_id; + request = rcu_dereference(active->request); + if (!request || i915_gem_request_completed(request)) + return 0; - /* Check that the pointer wasn't reassigned and overwritten. */ - if (request == rcu_access_pointer(active->request)) - return flag(id); - } while (1); + /* This is racy. See __i915_gem_active_get_rcu() for an in detail + * discussion of how to handle the race correctly, but for reporting + * the busy state we err on the side of potentially reporting the + * wrong engine as being busy (but we guarantee that the result + * is at least self-consistent). + * + * As we use SLAB_DESTROY_BY_RCU, the request may be reallocated + * whilst we are inspecting it, even under the RCU read lock as we are. + * This means that there is a small window for the engine and/or the + * seqno to have been overwritten. The seqno will always be in the + * future compared to the intended, and so we know that if that + * seqno is idle (on whatever engine) our request is idle and the + * return 0 above is correct. + * + * The issue is that if the engine is switched, it is just as likely + * to report that it is busy (but since the switch happened, we know + * the request should be idle). So there is a small chance that a busy + * result is actually the wrong engine. + * + * So why don't we care? + * + * For starters, the busy ioctl is a heuristic that is by definition + * racy. Even with perfect serialisation in the driver, the hardware + * state is constantly advancing - the state we report to the user + * is stale. + * + * The critical information for the busy-ioctl is whether the object + * is idle as userspace relies on that to detect whether its next + * access will stall, or if it has missed submitting commands to + * the hardware allowing the GPU to stall. We never generate a + * false-positive for idleness, thus busy-ioctl is reliable at the + * most fundamental level, and we maintain the guarantee that a + * busy object left to itself will eventually become idle (and stay + * idle!). + * + * We allow ourselves the leeway of potentially misreporting the busy + * state because that is an optimisation heuristic that is constantly + * in flux. Being quickly able to detect the busy/idle state is much + * more important than accurate logging of exactly which engines were + * busy. + * + * For accuracy in reporting the engine, we could use + * + * result = 0; + * request = __i915_gem_active_get_rcu(active); + * if (request) { + * if (!i915_gem_request_completed(request)) + * result = flag(request->engine->exec_id); + * i915_gem_request_put(request); + * } + * + * but that still remains susceptible to both hardware and userspace + * races. So we accept making the result of that race slightly worse, + * given the rarity of the race and its low impact on the result. + */ + return flag(READ_ONCE(request->engine->exec_id)); } -static inline unsigned +static __always_inline unsigned int busy_check_reader(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_read_flag); } -static inline unsigned +static __always_inline unsigned int busy_check_writer(const struct i915_gem_active *active) { return __busy_set_if_active(active, __busy_write_id); @@ -3821,11 +3960,12 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, * retired and freed. We take a local copy of the pointer, * but before we add its engine into the busy set, the other * thread reallocates it and assigns it to a task on another - * engine with a fresh and incomplete seqno. - * - * So after we lookup the engine's id, we double check that - * the active request is the same and only then do we add it - * into the busy set. + * engine with a fresh and incomplete seqno. Guarding against + * that requires careful serialisation and reference counting, + * i.e. using __i915_gem_active_get_request_rcu(). We don't, + * instead we expect that if the result is busy, which engines + * are busy is not completely reliable - we only guarantee + * that the object was busy. */ rcu_read_lock(); @@ -3833,9 +3973,11 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data, args->busy |= busy_check_reader(&obj->last_read[idx]); /* For ABI sanity, we only care that the write engine is in - * the set of read engines. This is ensured by the ordering - * of setting last_read/last_write in i915_vma_move_to_active, - * and then in reverse in retire. + * the set of read engines. This should be ensured by the + * ordering of setting last_read/last_write in + * i915_vma_move_to_active(), and then in reverse in retire. + * However, for good measure, we always report the last_write + * request as a busy read as well as being a busy write. * * We don't care that the set of active read/write engines * may change during construction of the result, as it is @@ -3920,14 +4062,13 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, i915_gem_object_retire__read); init_request_active(&obj->last_write, i915_gem_object_retire__write); - init_request_active(&obj->last_fence, NULL); INIT_LIST_HEAD(&obj->obj_exec_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); obj->ops = ops; - obj->fence_reg = I915_FENCE_REG_NONE; + obj->frontbuffer_ggtt_origin = ORIGIN_GTT; obj->madv = I915_MADV_WILLNEED; i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); @@ -4082,32 +4223,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj) intel_runtime_pm_put(dev_priv); } -struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) { - if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL && - vma->vm == vm) - return vma; - } - return NULL; -} - -struct i915_vma *i915_gem_obj_to_ggtt_view(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - GEM_BUG_ON(!view); - - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (i915_vma_is_ggtt(vma) && - i915_ggtt_view_equal(&vma->ggtt_view, view)) - return vma; - return NULL; -} - int i915_gem_suspend(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); @@ -4383,6 +4498,7 @@ void i915_gem_load_init_fences(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; + int i; if (INTEL_INFO(dev_priv)->gen >= 7 && !IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv)) @@ -4398,6 +4514,13 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) I915_READ(vgtif_reg(avail_rs.fence_num)); /* Initialize fence registers to zero */ + for (i = 0; i < dev_priv->num_fence_regs; i++) { + struct drm_i915_fence_reg *fence = &dev_priv->fence_regs[i]; + + fence->i915 = dev_priv; + fence->id = i; + list_add_tail(&fence->link, &dev_priv->mm.fence_list); + } i915_gem_restore_fences(dev); i915_gem_detect_bit_6_swizzle(dev); @@ -4433,8 +4556,6 @@ i915_gem_load_init(struct drm_device *dev) INIT_LIST_HEAD(&dev_priv->mm.fence_list); for (i = 0; i < I915_NUM_ENGINES; i++) init_engine_lists(&dev_priv->engine[i]); - for (i = 0; i < I915_MAX_NUM_FENCES; i++) - INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list); INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -4444,8 +4565,6 @@ i915_gem_load_init(struct drm_device *dev) dev_priv->relative_constants_mode = I915_EXEC_CONSTANTS_REL_GENERAL; - INIT_LIST_HEAD(&dev_priv->mm.fence_list); - init_waitqueue_head(&dev_priv->pending_flip_queue); dev_priv->mm.interruptible = true; @@ -4575,97 +4694,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, } } -/* All the new VM stuff */ -u64 i915_gem_obj_offset(struct drm_i915_gem_object *o, - struct i915_address_space *vm) -{ - struct drm_i915_private *dev_priv = to_i915(o->base.dev); - struct i915_vma *vma; - - WARN_ON(vm == &dev_priv->mm.aliasing_ppgtt->base); - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (i915_vma_is_ggtt(vma) && - vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) - continue; - if (vma->vm == vm) - return vma->node.start; - } - - WARN(1, "%s vma for this object not found.\n", - i915_is_ggtt(vm) ? "global" : "ppgtt"); - return -1; -} - -u64 i915_gem_obj_ggtt_offset_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (i915_vma_is_ggtt(vma) && - i915_ggtt_view_equal(&vma->ggtt_view, view)) - return vma->node.start; - - WARN(1, "global vma for this object not found. (view=%u)\n", view->type); - return -1; -} - -bool i915_gem_obj_bound(struct drm_i915_gem_object *o, - struct i915_address_space *vm) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (i915_vma_is_ggtt(vma) && - vma->ggtt_view.type != I915_GGTT_VIEW_NORMAL) - continue; - if (vma->vm == vm && drm_mm_node_allocated(&vma->node)) - return true; - } - - return false; -} - -bool i915_gem_obj_ggtt_bound_view(struct drm_i915_gem_object *o, - const struct i915_ggtt_view *view) -{ - struct i915_vma *vma; - - list_for_each_entry(vma, &o->vma_list, obj_link) - if (i915_vma_is_ggtt(vma) && - i915_ggtt_view_equal(&vma->ggtt_view, view) && - drm_mm_node_allocated(&vma->node)) - return true; - - return false; -} - -unsigned long i915_gem_obj_ggtt_size(struct drm_i915_gem_object *o) -{ - struct i915_vma *vma; - - GEM_BUG_ON(list_empty(&o->vma_list)); - - list_for_each_entry(vma, &o->vma_list, obj_link) { - if (i915_vma_is_ggtt(vma) && - vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - return vma->node.size; - } - - return 0; -} - -bool i915_gem_obj_is_pinned(struct drm_i915_gem_object *obj) -{ - struct i915_vma *vma; - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (i915_vma_is_pinned(vma)) - return true; - - return false; -} - /* Like i915_gem_object_get_page(), but mark the returned page dirty */ struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index bb72af5..35950ee 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -155,9 +155,10 @@ void i915_gem_context_free(struct kref *ctx_ref) if (ce->ring) intel_ring_free(ce->ring); - i915_gem_object_put(ce->state); + i915_vma_put(ce->state); } + put_pid(ctx->pid); list_del(&ctx->link); ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id); @@ -281,13 +282,24 @@ __create_hw_context(struct drm_device *dev, ctx->ggtt_alignment = get_context_alignment(dev_priv); if (dev_priv->hw_context_size) { - struct drm_i915_gem_object *obj = - i915_gem_alloc_context_obj(dev, dev_priv->hw_context_size); + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + + obj = i915_gem_alloc_context_obj(dev, + dev_priv->hw_context_size); if (IS_ERR(obj)) { ret = PTR_ERR(obj); goto err_out; } - ctx->engine[RCS].state = obj; + + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + ret = PTR_ERR(vma); + goto err_out; + } + + ctx->engine[RCS].state = vma; } /* Default context will never have a file_priv */ @@ -300,6 +312,9 @@ __create_hw_context(struct drm_device *dev, ret = DEFAULT_CONTEXT_HANDLE; ctx->file_priv = file_priv; + if (file_priv) + ctx->pid = get_task_pid(current, PIDTYPE_PID); + ctx->user_handle = ret; /* NB: Mark all slices as needing a remap so that when the context first * loads it will restore whatever remap state already exists. If there @@ -399,7 +414,7 @@ static void i915_gem_context_unpin(struct i915_gem_context *ctx, struct intel_context *ce = &ctx->engine[engine->id]; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } @@ -568,7 +583,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) const int num_rings = /* Use an extended w/a on ivb+ if signalling from other rings */ i915.semaphores ? - hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1 : + INTEL_INFO(dev_priv)->num_rings - 1 : 0; int len, ret; @@ -621,8 +636,7 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) intel_ring_emit(ring, MI_NOOP); intel_ring_emit(ring, MI_SET_CONTEXT); intel_ring_emit(ring, - i915_gem_obj_ggtt_offset(req->ctx->engine[RCS].state) | - flags); + i915_ggtt_offset(req->ctx->engine[RCS].state) | flags); /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -651,7 +665,8 @@ mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags) MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, last_reg); - intel_ring_emit(ring, engine->scratch.gtt_offset); + intel_ring_emit(ring, + i915_ggtt_offset(engine->scratch)); intel_ring_emit(ring, MI_NOOP); } intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE); @@ -755,6 +770,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) struct i915_gem_context *to = req->ctx; struct intel_engine_cs *engine = req->engine; struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt; + struct i915_vma *vma = to->engine[RCS].state; struct i915_gem_context *from; u32 hw_flags; int ret, i; @@ -762,9 +778,15 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (skip_rcs_switch(ppgtt, engine, to)) return 0; + /* Clear this page out of any CPU caches for coherent swap-in/out. */ + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + ret = i915_gem_object_set_to_gtt_domain(vma->obj, false); + if (ret) + return ret; + } + /* Trying to pin first makes error handling easier. */ - ret = i915_gem_object_ggtt_pin(to->engine[RCS].state, NULL, 0, - to->ggtt_alignment, 0); + ret = i915_vma_pin(vma, 0, to->ggtt_alignment, PIN_GLOBAL); if (ret) return ret; @@ -777,18 +799,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) */ from = engine->last_context; - /* - * Clear this page out of any CPU caches for coherent swap-in/out. Note - * that thanks to write = false in this call and us not setting any gpu - * write domains when putting a context object onto the active list - * (when switching away from it), this won't block. - * - * XXX: We need a real interface to do this instead of trickery. - */ - ret = i915_gem_object_set_to_gtt_domain(to->engine[RCS].state, false); - if (ret) - goto unpin_out; - if (needs_pd_load_pre(ppgtt, engine, to)) { /* Older GENs and non render rings still want the load first, * "PP_DCLV followed by PP_DIR_BASE register through Load @@ -797,7 +807,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) trace_switch_mm(engine, to); ret = ppgtt->switch_mm(ppgtt, req); if (ret) - goto unpin_out; + goto err; } if (!to->engine[RCS].initialised || i915_gem_context_is_default(to)) @@ -814,7 +824,7 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) if (to != from || (hw_flags & MI_FORCE_RESTORE)) { ret = mi_set_context(req, hw_flags); if (ret) - goto unpin_out; + goto err; } /* The backing object for the context is done after switching to the @@ -824,8 +834,6 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * MI_SET_CONTEXT instead of when the next seqno has completed. */ if (from != NULL) { - struct drm_i915_gem_object *obj = from->engine[RCS].state; - /* As long as MI_SET_CONTEXT is serializing, ie. it flushes the * whole damn pipeline, we don't need to explicitly mark the * object dirty. The only exception is that the context must be @@ -833,11 +841,9 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) * able to defer doing this until we know the object would be * swapped, but there is no way to do that yet. */ - obj->base.read_domains = I915_GEM_DOMAIN_INSTRUCTION; - i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), req, 0); - - /* obj is kept alive until the next request by its active ref */ - i915_gem_object_ggtt_unpin(obj); + i915_vma_move_to_active(from->engine[RCS].state, req, 0); + /* state is kept alive until the next request */ + i915_vma_unpin(from->engine[RCS].state); i915_gem_context_put(from); } engine->last_context = i915_gem_context_get(to); @@ -882,8 +888,8 @@ static int do_rcs_switch(struct drm_i915_gem_request *req) return 0; -unpin_out: - i915_gem_object_ggtt_unpin(to->engine[RCS].state); +err: + i915_vma_unpin(vma); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c index c60a8d5b..10265bb 100644 --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c @@ -119,7 +119,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf) if (ret) return ERR_PTR(ret); - addr = i915_gem_object_pin_map(obj); + addr = i915_gem_object_pin_map(obj, I915_MAP_WB); mutex_unlock(&dev->struct_mutex); return addr; diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c index f76c06e..815d5fb 100644 --- a/drivers/gpu/drm/i915/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/i915_gem_evict.c @@ -47,7 +47,7 @@ gpu_is_idle(struct drm_i915_private *dev_priv) } static bool -mark_free(struct i915_vma *vma, struct list_head *unwind) +mark_free(struct i915_vma *vma, unsigned int flags, struct list_head *unwind) { if (i915_vma_is_pinned(vma)) return false; @@ -55,6 +55,9 @@ mark_free(struct i915_vma *vma, struct list_head *unwind) if (WARN_ON(!list_empty(&vma->exec_list))) return false; + if (flags & PIN_NONFAULT && vma->obj->fault_mappable) + return false; + list_add(&vma->exec_list, unwind); return drm_mm_scan_add_block(&vma->node); } @@ -129,7 +132,7 @@ search_again: phase = phases; do { list_for_each_entry(vma, *phase, vm_link) - if (mark_free(vma, &eviction_list)) + if (mark_free(vma, flags, &eviction_list)) goto found; } while (*++phase); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index c494b79..601156c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -39,6 +39,8 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" +#define DBG_USE_CPU_RELOC 0 /* -1 force GTT relocs; 1 force CPU relocs */ + #define __EXEC_OBJECT_HAS_PIN (1<<31) #define __EXEC_OBJECT_HAS_FENCE (1<<30) #define __EXEC_OBJECT_NEEDS_MAP (1<<29) @@ -59,6 +61,7 @@ struct i915_execbuffer_params { }; struct eb_vmas { + struct drm_i915_private *i915; struct list_head vmas; int and; union { @@ -68,7 +71,8 @@ struct eb_vmas { }; static struct eb_vmas * -eb_create(struct drm_i915_gem_execbuffer2 *args) +eb_create(struct drm_i915_private *i915, + struct drm_i915_gem_execbuffer2 *args) { struct eb_vmas *eb = NULL; @@ -95,6 +99,7 @@ eb_create(struct drm_i915_gem_execbuffer2 *args) } else eb->and = -args->buffer_count; + eb->i915 = i915; INIT_LIST_HEAD(&eb->vmas); return eb; } @@ -180,8 +185,8 @@ eb_lookup_vmas(struct eb_vmas *eb, * from the (obj, vm) we don't run the risk of creating * duplicated vmas for the same vm. */ - vma = i915_gem_obj_lookup_or_create_vma(obj, vm); - if (IS_ERR(vma)) { + vma = i915_gem_obj_lookup_or_create_vma(obj, vm, NULL); + if (unlikely(IS_ERR(vma))) { DRM_DEBUG("Failed to lookup VMA\n"); ret = PTR_ERR(vma); goto err; @@ -245,7 +250,6 @@ static void i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry; - struct drm_i915_gem_object *obj = vma->obj; if (!drm_mm_node_allocated(&vma->node)) return; @@ -253,7 +257,7 @@ i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma) entry = vma->exec_entry; if (entry->flags & __EXEC_OBJECT_HAS_FENCE) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); if (entry->flags & __EXEC_OBJECT_HAS_PIN) __i915_vma_unpin(vma); @@ -271,13 +275,19 @@ static void eb_destroy(struct eb_vmas *eb) exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - i915_gem_object_put(vma->obj); + i915_vma_put(vma); } kfree(eb); } static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { + if (!i915_gem_object_has_struct_page(obj)) + return false; + + if (DBG_USE_CPU_RELOC) + return DBG_USE_CPU_RELOC > 0; + return (HAS_LLC(obj->base.dev) || obj->base.write_domain == I915_GEM_DOMAIN_CPU || obj->cache_level != I915_CACHE_NONE); @@ -302,137 +312,243 @@ static inline uint64_t gen8_noncanonical_addr(uint64_t address) } static inline uint64_t -relocation_target(struct drm_i915_gem_relocation_entry *reloc, +relocation_target(const struct drm_i915_gem_relocation_entry *reloc, uint64_t target_offset) { return gen8_canonical_addr((int)reloc->delta + target_offset); } -static int -relocate_entry_cpu(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +struct reloc_cache { + struct drm_i915_private *i915; + struct drm_mm_node node; + unsigned long vaddr; + unsigned int page; + bool use_64bit_reloc; +}; + +static void reloc_cache_init(struct reloc_cache *cache, + struct drm_i915_private *i915) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + cache->page = -1; + cache->vaddr = 0; + cache->i915 = i915; + cache->use_64bit_reloc = INTEL_GEN(cache->i915) >= 8; + cache->node.allocated = false; +} - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - return ret; +static inline void *unmask_page(unsigned long p) +{ + return (void *)(uintptr_t)(p & PAGE_MASK); +} - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta); +static inline unsigned int unmask_flags(unsigned long p) +{ + return p & ~PAGE_MASK; +} + +#define KMAP 0x4 /* after CLFLUSH_FLAGS */ + +static void reloc_cache_fini(struct reloc_cache *cache) +{ + void *vaddr; - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + if (!cache->vaddr) + return; + + vaddr = unmask_page(cache->vaddr); + if (cache->vaddr & KMAP) { + if (cache->vaddr & CLFLUSH_AFTER) + mb(); - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); + kunmap_atomic(vaddr); + i915_gem_obj_finish_shmem_access((struct drm_i915_gem_object *)cache->node.mm); + } else { + wmb(); + io_mapping_unmap_atomic((void __iomem *)vaddr); + if (cache->node.allocated) { + struct i915_ggtt *ggtt = &cache->i915->ggtt; + + ggtt->base.clear_range(&ggtt->base, + cache->node.start, + cache->node.size, + true); + drm_mm_remove_node(&cache->node); + } else { + i915_vma_unpin((struct i915_vma *)cache->node.mm); } + } +} + +static void *reloc_kmap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) +{ + void *vaddr; + + if (cache->vaddr) { + kunmap_atomic(unmask_page(cache->vaddr)); + } else { + unsigned int flushes; + int ret; - *(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta); + ret = i915_gem_obj_prepare_shmem_write(obj, &flushes); + if (ret) + return ERR_PTR(ret); + + BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS); + BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK); + + cache->vaddr = flushes | KMAP; + cache->node.mm = (void *)obj; + if (flushes) + mb(); } - kunmap_atomic(vaddr); + vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, page)); + cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr; + cache->page = page; - return 0; + return vaddr; } -static int -relocate_entry_gtt(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void *reloc_iomap(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - uint64_t delta = relocation_target(reloc, target_offset); - uint64_t offset; - void __iomem *reloc_page; - int ret; + struct i915_ggtt *ggtt = &cache->i915->ggtt; + unsigned long offset; + void *vaddr; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + if (cache->node.allocated) { + wmb(); + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + cache->node.start, I915_CACHE_NONE, 0); + cache->page = page; + return unmask_page(cache->vaddr); + } - ret = i915_gem_object_put_fence(obj); - if (ret) - return ret; + if (cache->vaddr) { + io_mapping_unmap_atomic(unmask_page(cache->vaddr)); + } else { + struct i915_vma *vma; + int ret; - /* Map the page containing the relocation we're going to perform. */ - offset = i915_gem_obj_ggtt_offset(obj); - offset += reloc->offset; - reloc_page = io_mapping_map_atomic_wc(ggtt->mappable, - offset & PAGE_MASK); - iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset)); - - if (INTEL_INFO(dev)->gen >= 8) { - offset += sizeof(uint32_t); - - if (offset_in_page(offset) == 0) { - io_mapping_unmap_atomic(reloc_page); - reloc_page = - io_mapping_map_atomic_wc(ggtt->mappable, - offset); + if (use_cpu_reloc(obj)) + return NULL; + + ret = i915_gem_object_set_to_gtt_domain(obj, true); + if (ret) + return ERR_PTR(ret); + + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, + PIN_MAPPABLE | PIN_NONBLOCK); + if (IS_ERR(vma)) { + memset(&cache->node, 0, sizeof(cache->node)); + ret = drm_mm_insert_node_in_range_generic + (&ggtt->base.mm, &cache->node, + 4096, 0, 0, + 0, ggtt->mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); + if (ret) + return ERR_PTR(ret); + } else { + ret = i915_vma_put_fence(vma); + if (ret) { + i915_vma_unpin(vma); + return ERR_PTR(ret); + } + + cache->node.start = vma->node.start; + cache->node.mm = (void *)vma; } + } - iowrite32(upper_32_bits(delta), - reloc_page + offset_in_page(offset)); + offset = cache->node.start; + if (cache->node.allocated) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, page), + offset, I915_CACHE_NONE, 0); + } else { + offset += page << PAGE_SHIFT; } - io_mapping_unmap_atomic(reloc_page); + vaddr = io_mapping_map_atomic_wc(&cache->i915->ggtt.mappable, offset); + cache->page = page; + cache->vaddr = (unsigned long)vaddr; - return 0; + return vaddr; } -static void -clflush_write32(void *addr, uint32_t value) +static void *reloc_vaddr(struct drm_i915_gem_object *obj, + struct reloc_cache *cache, + int page) { - /* This is not a fast path, so KISS. */ - drm_clflush_virt_range(addr, sizeof(uint32_t)); - *(uint32_t *)addr = value; - drm_clflush_virt_range(addr, sizeof(uint32_t)); + void *vaddr; + + if (cache->page == page) { + vaddr = unmask_page(cache->vaddr); + } else { + vaddr = NULL; + if ((cache->vaddr & KMAP) == 0) + vaddr = reloc_iomap(obj, cache, page); + if (!vaddr) + vaddr = reloc_kmap(obj, cache, page); + } + + return vaddr; } -static int -relocate_entry_clflush(struct drm_i915_gem_object *obj, - struct drm_i915_gem_relocation_entry *reloc, - uint64_t target_offset) +static void clflush_write32(u32 *addr, u32 value, unsigned int flushes) { - struct drm_device *dev = obj->base.dev; - uint32_t page_offset = offset_in_page(reloc->offset); - uint64_t delta = relocation_target(reloc, target_offset); - char *vaddr; - int ret; + if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) { + if (flushes & CLFLUSH_BEFORE) { + clflushopt(addr); + mb(); + } - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - return ret; + *addr = value; - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - reloc->offset >> PAGE_SHIFT)); - clflush_write32(vaddr + page_offset, lower_32_bits(delta)); + /* Writes to the same cacheline are serialised by the CPU + * (including clflush). On the write path, we only require + * that it hits memory in an orderly fashion and place + * mb barriers at the start and end of the relocation phase + * to ensure ordering of clflush wrt to the system. + */ + if (flushes & CLFLUSH_AFTER) + clflushopt(addr); + } else + *addr = value; +} + +static int +relocate_entry(struct drm_i915_gem_object *obj, + const struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache, + u64 target_offset) +{ + u64 offset = reloc->offset; + bool wide = cache->use_64bit_reloc; + void *vaddr; - if (INTEL_INFO(dev)->gen >= 8) { - page_offset = offset_in_page(page_offset + sizeof(uint32_t)); + target_offset = relocation_target(reloc, target_offset); +repeat: + vaddr = reloc_vaddr(obj, cache, offset >> PAGE_SHIFT); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); - if (page_offset == 0) { - kunmap_atomic(vaddr); - vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj, - (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT)); - } + clflush_write32(vaddr + offset_in_page(offset), + lower_32_bits(target_offset), + cache->vaddr); - clflush_write32(vaddr + page_offset, upper_32_bits(delta)); + if (wide) { + offset += sizeof(u32); + target_offset >>= 32; + wide = false; + goto repeat; } - kunmap_atomic(vaddr); - return 0; } @@ -453,7 +569,8 @@ static bool object_is_idle(struct drm_i915_gem_object *obj) static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, - struct drm_i915_gem_relocation_entry *reloc) + struct drm_i915_gem_relocation_entry *reloc, + struct reloc_cache *cache) { struct drm_device *dev = obj->base.dev; struct drm_gem_object *target_obj; @@ -516,7 +633,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, /* Check that the relocation address is valid... */ if (unlikely(reloc->offset > - obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) { + obj->base.size - (cache->use_64bit_reloc ? 8 : 4))) { DRM_DEBUG("Relocation beyond object bounds: " "obj %p target %d offset %d size %d.\n", obj, reloc->target_handle, @@ -536,23 +653,12 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (pagefault_disabled() && !object_is_idle(obj)) return -EFAULT; - if (use_cpu_reloc(obj)) - ret = relocate_entry_cpu(obj, reloc, target_offset); - else if (obj->map_and_fenceable) - ret = relocate_entry_gtt(obj, reloc, target_offset); - else if (static_cpu_has(X86_FEATURE_CLFLUSH)) - ret = relocate_entry_clflush(obj, reloc, target_offset); - else { - WARN_ONCE(1, "Impossible case in relocation handling\n"); - ret = -ENODEV; - } - + ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; /* and update the user's relocation entry */ reloc->presumed_offset = target_offset; - return 0; } @@ -564,9 +670,11 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)]; struct drm_i915_gem_relocation_entry __user *user_relocs; struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int remain, ret; + struct reloc_cache cache; + int remain, ret = 0; user_relocs = u64_to_user_ptr(entry->relocs_ptr); + reloc_cache_init(&cache, eb->i915); remain = entry->relocation_count; while (remain) { @@ -576,19 +684,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, count = ARRAY_SIZE(stack_reloc); remain -= count; - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) - return -EFAULT; + if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + ret = -EFAULT; + goto out; + } do { u64 offset = r->presumed_offset; - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r, &cache); if (ret) - return ret; + goto out; if (r->presumed_offset != offset && - __put_user(r->presumed_offset, &user_relocs->presumed_offset)) { - return -EFAULT; + __put_user(r->presumed_offset, + &user_relocs->presumed_offset)) { + ret = -EFAULT; + goto out; } user_relocs++; @@ -596,7 +708,9 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, } while (--count); } - return 0; +out: + reloc_cache_fini(&cache); + return ret; #undef N_RELOC } @@ -606,15 +720,18 @@ i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma, struct drm_i915_gem_relocation_entry *relocs) { const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - int i, ret; + struct reloc_cache cache; + int i, ret = 0; + reloc_cache_init(&cache, eb->i915); for (i = 0; i < entry->relocation_count; i++) { - ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]); + ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i], &cache); if (ret) - return ret; + break; } + reloc_cache_fini(&cache); - return 0; + return ret; } static int @@ -693,11 +810,11 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma, entry->flags |= __EXEC_OBJECT_HAS_PIN; if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) { - ret = i915_gem_object_get_fence(obj); + ret = i915_vma_get_fence(vma); if (ret) return ret; - if (i915_gem_object_pin_fence(obj)) + if (i915_vma_pin_fence(vma)) entry->flags |= __EXEC_OBJECT_HAS_FENCE; } @@ -739,7 +856,6 @@ static bool eb_vma_misplaced(struct i915_vma *vma) { struct drm_i915_gem_exec_object2 *entry = vma->exec_entry; - struct drm_i915_gem_object *obj = vma->obj; WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !i915_vma_is_ggtt(vma)); @@ -760,7 +876,8 @@ eb_vma_misplaced(struct i915_vma *vma) return true; /* avoid costly ping-pong once a batch bo ended up non-mappable */ - if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && + !i915_vma_is_map_and_fenceable(vma)) return !only_mappable_for_reloc(entry->flags); if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 && @@ -900,7 +1017,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev, vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list); list_del_init(&vma->exec_list); i915_gem_execbuffer_unreserve_vma(vma); - i915_gem_object_put(vma->obj); + i915_vma_put(vma); } mutex_unlock(&dev->struct_mutex); @@ -1010,8 +1127,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, { const unsigned int other_rings = eb_other_engines(req); struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; int ret; list_for_each_entry(vma, vmas, exec_list) { @@ -1024,16 +1139,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; + i915_gem_clflush_object(obj, false); } - if (flush_chipset) - i915_gem_chipset_flush(req->engine->i915); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(req->engine->i915); /* Unconditionally invalidate GPU caches and TLBs. */ return req->engine->emit_flush(req, EMIT_INVALIDATE); @@ -1194,15 +1304,8 @@ void i915_vma_move_to_active(struct i915_vma *vma, obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS; } - if (flags & EXEC_OBJECT_NEEDS_FENCE) { - i915_gem_active_set(&obj->last_fence, req); - if (flags & __EXEC_OBJECT_HAS_FENCE) { - struct drm_i915_private *dev_priv = req->i915; - - list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list, - &dev_priv->mm.fence_list); - } - } + if (flags & EXEC_OBJECT_NEEDS_FENCE) + i915_gem_active_set(&vma->last_fence, req); i915_vma_set_active(vma, idx); i915_gem_active_set(&vma->last_read[idx], req); @@ -1281,7 +1384,7 @@ i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req) return 0; } -static struct i915_vma* +static struct i915_vma * i915_gem_execbuffer_parse(struct intel_engine_cs *engine, struct drm_i915_gem_exec_object2 *shadow_exec_entry, struct drm_i915_gem_object *batch_obj, @@ -1305,31 +1408,28 @@ i915_gem_execbuffer_parse(struct intel_engine_cs *engine, batch_start_offset, batch_len, is_master); - if (ret) - goto err; - - ret = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); - if (ret) - goto err; + if (ret) { + if (ret == -EACCES) /* unhandled chained batch */ + vma = NULL; + else + vma = ERR_PTR(ret); + goto out; + } - i915_gem_object_unpin_pages(shadow_batch_obj); + vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) + goto out; memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry)); - vma = i915_gem_obj_to_ggtt(shadow_batch_obj); vma->exec_entry = shadow_exec_entry; vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN; i915_gem_object_get(shadow_batch_obj); list_add_tail(&vma->exec_list, &eb->vmas); - return vma; - -err: +out: i915_gem_object_unpin_pages(shadow_batch_obj); - if (ret == -EACCES) /* unhandled chained batch */ - return NULL; - else - return ERR_PTR(ret); + return vma; } static int @@ -1412,7 +1512,7 @@ execbuf_submit(struct i915_execbuffer_params *params, params->args_batch_start_offset; if (exec_len == 0) - exec_len = params->batch->size; + exec_len = params->batch->size - params->args_batch_start_offset; ret = params->engine->emit_bb_start(params->request, exec_start, exec_len, @@ -1595,7 +1695,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, memset(¶ms_master, 0x00, sizeof(params_master)); - eb = eb_create(args); + eb = eb_create(dev_priv, args); if (eb == NULL) { i915_gem_context_put(ctx); mutex_unlock(&dev->struct_mutex); @@ -1638,6 +1738,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = -EINVAL; goto err; } + if (args->batch_start_offset > params->batch->size || + args->batch_len > params->batch->size - args->batch_start_offset) { + DRM_DEBUG("Attempting to use out-of-bounds batch\n"); + ret = -EINVAL; + goto err; + } params->args_batch_start_offset = args->batch_start_offset; if (intel_engine_needs_cmd_parser(engine) && args->batch_len) { @@ -1677,6 +1783,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * hsw should have this fixed, but bdw mucks it up again. */ if (dispatch_flags & I915_DISPATCH_SECURE) { struct drm_i915_gem_object *obj = params->batch->obj; + struct i915_vma *vma; /* * So on first glance it looks freaky that we pin the batch here @@ -1688,11 +1795,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, * fitting due to fragmentation. * So this is actually safe. */ - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (ret) + vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto err; + } - params->batch = i915_gem_obj_to_ggtt(obj); + params->batch = vma; } /* Allocate a request for this batch buffer nice and early. */ @@ -1702,6 +1811,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, goto err_batch_unpin; } + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + params->request->batch = params->batch; + ret = i915_gem_request_add_to_client(params->request, file); if (ret) goto err_request; @@ -1720,7 +1837,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, ret = execbuf_submit(params, args, &eb->vmas); err_request: - __i915_add_request(params->request, params->batch->obj, ret == 0); + __i915_add_request(params->request, ret == 0); err_batch_unpin: /* diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c index 9e8173f..8df1fa7 100644 --- a/drivers/gpu/drm/i915/i915_gem_fence.c +++ b/drivers/gpu/drm/i915/i915_gem_fence.c @@ -55,87 +55,85 @@ * CPU ptes into GTT mmaps (not the GTT ptes themselves) as needed. */ -static void i965_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +#define pipelined 0 + +static void i965_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); i915_reg_t fence_reg_lo, fence_reg_hi; int fence_pitch_shift; + u64 val; - if (INTEL_INFO(dev)->gen >= 6) { - fence_reg_lo = FENCE_REG_GEN6_LO(reg); - fence_reg_hi = FENCE_REG_GEN6_HI(reg); + if (INTEL_INFO(fence->i915)->gen >= 6) { + fence_reg_lo = FENCE_REG_GEN6_LO(fence->id); + fence_reg_hi = FENCE_REG_GEN6_HI(fence->id); fence_pitch_shift = GEN6_FENCE_PITCH_SHIFT; + } else { - fence_reg_lo = FENCE_REG_965_LO(reg); - fence_reg_hi = FENCE_REG_965_HI(reg); + fence_reg_lo = FENCE_REG_965_LO(fence->id); + fence_reg_hi = FENCE_REG_965_HI(fence->id); fence_pitch_shift = I965_FENCE_PITCH_SHIFT; } - /* To w/a incoherency with non-atomic 64-bit register updates, - * we split the 64-bit update into two 32-bit writes. In order - * for a partial fence not to be evaluated between writes, we - * precede the update with write to turn off the fence register, - * and only enable the fence as the last step. - * - * For extra levels of paranoia, we make sure each step lands - * before applying the next step. - */ - I915_WRITE(fence_reg_lo, 0); - POSTING_READ(fence_reg_lo); - - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - unsigned int tiling = i915_gem_object_get_tiling(obj); - unsigned int stride = i915_gem_object_get_stride(obj); - uint64_t val; - - /* Adjust fence size to match tiled area */ - if (tiling != I915_TILING_NONE) { - uint32_t row_size = stride * - (tiling == I915_TILING_Y ? 32 : 8); - size = (size / row_size) * row_size; - } - - val = (uint64_t)((i915_gem_obj_ggtt_offset(obj) + size - 4096) & - 0xfffff000) << 32; - val |= i915_gem_obj_ggtt_offset(obj) & 0xfffff000; - val |= (uint64_t)((stride / 128) - 1) << fence_pitch_shift; - if (tiling == I915_TILING_Y) - val |= 1 << I965_FENCE_TILING_Y_SHIFT; + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 row_size = stride * (is_y_tiled ? 32 : 8); + u32 size = rounddown((u32)vma->node.size, row_size); + + val = ((vma->node.start + size - 4096) & 0xfffff000) << 32; + val |= vma->node.start & 0xfffff000; + val |= (u64)((stride / 128) - 1) << fence_pitch_shift; + if (is_y_tiled) + val |= BIT(I965_FENCE_TILING_Y_SHIFT); val |= I965_FENCE_REG_VALID; + } - I915_WRITE(fence_reg_hi, val >> 32); - POSTING_READ(fence_reg_hi); + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; - I915_WRITE(fence_reg_lo, val); + /* To w/a incoherency with non-atomic 64-bit register updates, + * we split the 64-bit update into two 32-bit writes. In order + * for a partial fence not to be evaluated between writes, we + * precede the update with write to turn off the fence register, + * and only enable the fence as the last step. + * + * For extra levels of paranoia, we make sure each step lands + * before applying the next step. + */ + I915_WRITE(fence_reg_lo, 0); + POSTING_READ(fence_reg_lo); + + I915_WRITE(fence_reg_hi, upper_32_bits(val)); + I915_WRITE(fence_reg_lo, lower_32_bits(val)); POSTING_READ(fence_reg_lo); - } else { - I915_WRITE(fence_reg_hi, 0); - POSTING_READ(fence_reg_hi); } } -static void i915_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i915_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); u32 val; - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - unsigned int tiling = i915_gem_object_get_tiling(obj); - unsigned int stride = i915_gem_object_get_stride(obj); + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); int pitch_val; int tile_width; - WARN((i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08x) aligned\n", - i915_gem_obj_ggtt_offset(obj), obj->map_and_fenceable, size); + WARN((vma->node.start & ~I915_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx [fenceable? %d] not 1M or pot-size (0x%08llx) aligned\n", + vma->node.start, + i915_vma_is_map_and_fenceable(vma), + vma->node.size); - if (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(dev)) + if (is_y_tiled && HAS_128_BYTE_Y_TILING(fence->i915)) tile_width = 128; else tile_width = 512; @@ -144,139 +142,141 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg, pitch_val = stride / tile_width; pitch_val = ffs(pitch_val) - 1; - val = i915_gem_obj_ggtt_offset(obj); - if (tiling == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I915_FENCE_SIZE_BITS(size); + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I915_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - } else - val = 0; + } + + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -static void i830_write_fence_reg(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void i830_write_fence_reg(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t val; + u32 val; - if (obj) { - u32 size = i915_gem_obj_ggtt_size(obj); - unsigned int tiling = i915_gem_object_get_tiling(obj); - unsigned int stride = i915_gem_object_get_stride(obj); - uint32_t pitch_val; + val = 0; + if (vma) { + unsigned int tiling = i915_gem_object_get_tiling(vma->obj); + bool is_y_tiled = tiling == I915_TILING_Y; + unsigned int stride = i915_gem_object_get_stride(vma->obj); + u32 pitch_val; - WARN((i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) || - (size & -size) != size || - (i915_gem_obj_ggtt_offset(obj) & (size - 1)), - "object 0x%08llx not 512K or pot-size 0x%08x aligned\n", - i915_gem_obj_ggtt_offset(obj), size); + WARN((vma->node.start & ~I830_FENCE_START_MASK) || + !is_power_of_2(vma->node.size) || + (vma->node.start & (vma->node.size - 1)), + "object 0x%08llx not 512K or pot-size 0x%08llx aligned\n", + vma->node.start, vma->node.size); pitch_val = stride / 128; pitch_val = ffs(pitch_val) - 1; - val = i915_gem_obj_ggtt_offset(obj); - if (tiling == I915_TILING_Y) - val |= 1 << I830_FENCE_TILING_Y_SHIFT; - val |= I830_FENCE_SIZE_BITS(size); + val = vma->node.start; + if (is_y_tiled) + val |= BIT(I830_FENCE_TILING_Y_SHIFT); + val |= I830_FENCE_SIZE_BITS(vma->node.size); val |= pitch_val << I830_FENCE_PITCH_SHIFT; val |= I830_FENCE_REG_VALID; - } else - val = 0; + } - I915_WRITE(FENCE_REG(reg), val); - POSTING_READ(FENCE_REG(reg)); -} + if (!pipelined) { + struct drm_i915_private *dev_priv = fence->i915; + i915_reg_t reg = FENCE_REG(fence->id); -inline static bool i915_gem_object_needs_mb(struct drm_i915_gem_object *obj) -{ - return obj && obj->base.read_domains & I915_GEM_DOMAIN_GTT; + I915_WRITE(reg, val); + POSTING_READ(reg); + } } -static void i915_gem_write_fence(struct drm_device *dev, int reg, - struct drm_i915_gem_object *obj) +static void fence_write(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(dev); - - /* Ensure that all CPU reads are completed before installing a fence - * and all writes before removing the fence. + /* Previous access through the fence register is marshalled by + * the mb() inside the fault handlers (i915_gem_release_mmaps) + * and explicitly managed for internal users. */ - if (i915_gem_object_needs_mb(dev_priv->fence_regs[reg].obj)) - mb(); - - WARN(obj && - (!i915_gem_object_get_stride(obj) || - !i915_gem_object_get_tiling(obj)), - "bogus fence setup with stride: 0x%x, tiling mode: %i\n", - i915_gem_object_get_stride(obj), - i915_gem_object_get_tiling(obj)); - - if (IS_GEN2(dev)) - i830_write_fence_reg(dev, reg, obj); - else if (IS_GEN3(dev)) - i915_write_fence_reg(dev, reg, obj); - else if (INTEL_INFO(dev)->gen >= 4) - i965_write_fence_reg(dev, reg, obj); - - /* And similarly be paranoid that no direct access to this region - * is reordered to before the fence is installed. + + if (IS_GEN2(fence->i915)) + i830_write_fence_reg(fence, vma); + else if (IS_GEN3(fence->i915)) + i915_write_fence_reg(fence, vma); + else + i965_write_fence_reg(fence, vma); + + /* Access through the fenced region afterwards is + * ordered by the posting reads whilst writing the registers. */ - if (i915_gem_object_needs_mb(obj)) - mb(); -} -static inline int fence_number(struct drm_i915_private *dev_priv, - struct drm_i915_fence_reg *fence) -{ - return fence - dev_priv->fence_regs; + fence->dirty = false; } -static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, - struct drm_i915_fence_reg *fence, - bool enable) +static int fence_update(struct drm_i915_fence_reg *fence, + struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - int reg = fence_number(dev_priv, fence); + int ret; - i915_gem_write_fence(obj->base.dev, reg, enable ? obj : NULL); + if (vma) { + if (!i915_vma_is_map_and_fenceable(vma)) + return -EINVAL; - if (enable) { - obj->fence_reg = reg; - fence->obj = obj; - list_move_tail(&fence->lru_list, &dev_priv->mm.fence_list); - } else { - obj->fence_reg = I915_FENCE_REG_NONE; - fence->obj = NULL; - list_del_init(&fence->lru_list); + if (WARN(!i915_gem_object_get_stride(vma->obj) || + !i915_gem_object_get_tiling(vma->obj), + "bogus fence setup with stride: 0x%x, tiling mode: %i\n", + i915_gem_object_get_stride(vma->obj), + i915_gem_object_get_tiling(vma->obj))) + return -EINVAL; + + ret = i915_gem_active_retire(&vma->last_fence, + &vma->obj->base.dev->struct_mutex); + if (ret) + return ret; } - obj->fence_dirty = false; -} -static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj) -{ - if (i915_gem_object_is_tiled(obj)) - i915_gem_release_mmap(obj); + if (fence->vma) { + ret = i915_gem_active_retire(&fence->vma->last_fence, + &fence->vma->obj->base.dev->struct_mutex); + if (ret) + return ret; + } - /* As we do not have an associated fence register, we will force - * a tiling change if we ever need to acquire one. - */ - obj->fence_dirty = false; - obj->fence_reg = I915_FENCE_REG_NONE; -} + if (fence->vma && fence->vma != vma) { + /* Ensure that all userspace CPU access is completed before + * stealing the fence. + */ + i915_gem_release_mmap(fence->vma->obj); -static int -i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) -{ - return i915_gem_active_retire(&obj->last_fence, - &obj->base.dev->struct_mutex); + fence->vma->fence = NULL; + fence->vma = NULL; + + list_move(&fence->link, &fence->i915->mm.fence_list); + } + + fence_write(fence, vma); + + if (vma) { + if (fence->vma != vma) { + vma->fence = fence; + fence->vma = vma; + } + + list_move_tail(&fence->link, &fence->i915->mm.fence_list); + } + + return 0; } /** - * i915_gem_object_put_fence - force-remove fence for an object - * @obj: object to map through a fence reg + * i915_vma_put_fence - force-remove fence for a VMA + * @vma: vma to map linearly (not through a fence reg) * * This function force-removes any fence from the given object, which is useful * if the kernel wants to do untiled GTT access. @@ -286,70 +286,40 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj) * 0 on success, negative error code on failure. */ int -i915_gem_object_put_fence(struct drm_i915_gem_object *obj) +i915_vma_put_fence(struct i915_vma *vma) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct drm_i915_fence_reg *fence; - int ret; - - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; + struct drm_i915_fence_reg *fence = vma->fence; - if (obj->fence_reg == I915_FENCE_REG_NONE) + if (!fence) return 0; - fence = &dev_priv->fence_regs[obj->fence_reg]; - - if (WARN_ON(fence->pin_count)) + if (fence->pin_count) return -EBUSY; - i915_gem_object_fence_lost(obj); - i915_gem_object_update_fence(obj, fence, false); - - return 0; + return fence_update(fence, NULL); } -static struct drm_i915_fence_reg * -i915_find_fence_reg(struct drm_device *dev) +static struct drm_i915_fence_reg *fence_find(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct drm_i915_fence_reg *reg, *avail; - int i; - - /* First try to find a free reg */ - avail = NULL; - for (i = 0; i < dev_priv->num_fence_regs; i++) { - reg = &dev_priv->fence_regs[i]; - if (!reg->obj) - return reg; - - if (!reg->pin_count) - avail = reg; - } - - if (avail == NULL) - goto deadlock; + struct drm_i915_fence_reg *fence; - /* None available, try to steal one or wait for a user to finish */ - list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->pin_count) + list_for_each_entry(fence, &dev_priv->mm.fence_list, link) { + if (fence->pin_count) continue; - return reg; + return fence; } -deadlock: /* Wait for completion of pending flips which consume fences */ - if (intel_has_pending_fb_unpin(dev)) + if (intel_has_pending_fb_unpin(&dev_priv->drm)) return ERR_PTR(-EAGAIN); return ERR_PTR(-EDEADLK); } /** - * i915_gem_object_get_fence - set up fencing for an object - * @obj: object to map through a fence reg + * i915_vma_get_fence - set up fencing for a vma + * @vma: vma to map through a fence reg * * When mapping objects through the GTT, userspace wants to be able to write * to them without having to worry about swizzling if the object is tiled. @@ -366,103 +336,27 @@ deadlock: * 0 on success, negative error code on failure. */ int -i915_gem_object_get_fence(struct drm_i915_gem_object *obj) +i915_vma_get_fence(struct i915_vma *vma) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - bool enable = i915_gem_object_is_tiled(obj); - struct drm_i915_fence_reg *reg; - int ret; - - /* Have we updated the tiling parameters upon the object and so - * will need to serialise the write to the associated fence register? - */ - if (obj->fence_dirty) { - ret = i915_gem_object_wait_fence(obj); - if (ret) - return ret; - } + struct drm_i915_fence_reg *fence; + struct i915_vma *set = i915_gem_object_is_tiled(vma->obj) ? vma : NULL; /* Just update our place in the LRU if our fence is getting reused. */ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - reg = &dev_priv->fence_regs[obj->fence_reg]; - if (!obj->fence_dirty) { - list_move_tail(®->lru_list, - &dev_priv->mm.fence_list); + if (vma->fence) { + fence = vma->fence; + if (!fence->dirty) { + list_move_tail(&fence->link, + &fence->i915->mm.fence_list); return 0; } - } else if (enable) { - if (WARN_ON(!obj->map_and_fenceable)) - return -EINVAL; - - reg = i915_find_fence_reg(dev); - if (IS_ERR(reg)) - return PTR_ERR(reg); - - if (reg->obj) { - struct drm_i915_gem_object *old = reg->obj; - - ret = i915_gem_object_wait_fence(old); - if (ret) - return ret; - - i915_gem_object_fence_lost(old); - } + } else if (set) { + fence = fence_find(to_i915(vma->vm->dev)); + if (IS_ERR(fence)) + return PTR_ERR(fence); } else return 0; - i915_gem_object_update_fence(obj, reg, enable); - - return 0; -} - -/** - * i915_gem_object_pin_fence - pin fencing state - * @obj: object to pin fencing for - * - * This pins the fencing state (whether tiled or untiled) to make sure the - * object is ready to be used as a scanout target. Fencing status must be - * synchronize first by calling i915_gem_object_get_fence(): - * - * The resulting fence pin reference must be released again with - * i915_gem_object_unpin_fence(). - * - * Returns: - * - * True if the object has a fence, false otherwise. - */ -bool -i915_gem_object_pin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct i915_vma *ggtt_vma = i915_gem_obj_to_ggtt(obj); - - WARN_ON(!ggtt_vma || - dev_priv->fence_regs[obj->fence_reg].pin_count > - i915_vma_pin_count(ggtt_vma)); - dev_priv->fence_regs[obj->fence_reg].pin_count++; - return true; - } else - return false; -} - -/** - * i915_gem_object_unpin_fence - unpin fencing state - * @obj: object to unpin fencing for - * - * This releases the fence pin reference acquired through - * i915_gem_object_pin_fence. It will handle both objects with and without an - * attached fence correctly, callers do not need to distinguish this. - */ -void -i915_gem_object_unpin_fence(struct drm_i915_gem_object *obj) -{ - if (obj->fence_reg != I915_FENCE_REG_NONE) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - WARN_ON(dev_priv->fence_regs[obj->fence_reg].pin_count <= 0); - dev_priv->fence_regs[obj->fence_reg].pin_count--; - } + return fence_update(fence, set); } /** @@ -479,17 +373,16 @@ void i915_gem_restore_fences(struct drm_device *dev) for (i = 0; i < dev_priv->num_fence_regs; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; + struct i915_vma *vma = reg->vma; /* * Commit delayed tiling changes if we have an object still * attached to the fence, otherwise just clear the fence. */ - if (reg->obj) { - i915_gem_object_update_fence(reg->obj, reg, - i915_gem_object_get_tiling(reg->obj)); - } else { - i915_gem_write_fence(dev, i, NULL); - } + if (vma && !i915_gem_object_is_tiled(vma->obj)) + vma = NULL; + + fence_update(reg, vma); } } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 18c7c96..b90fdce 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -170,11 +170,13 @@ static int ppgtt_bind_vma(struct i915_vma *vma, { u32 pte_flags = 0; + vma->pages = vma->obj->pages; + /* Currently applicable only to VLV */ if (vma->obj->gt_ro) pte_flags |= PTE_READ_ONLY; - vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start, + vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); return 0; @@ -2618,8 +2620,7 @@ static int ggtt_bind_vma(struct i915_vma *vma, if (obj->gt_ro) pte_flags |= PTE_READ_ONLY; - vma->vm->insert_entries(vma->vm, vma->ggtt_view.pages, - vma->node.start, + vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start, cache_level, pte_flags); /* @@ -2651,8 +2652,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, if (flags & I915_VMA_GLOBAL_BIND) { vma->vm->insert_entries(vma->vm, - vma->ggtt_view.pages, - vma->node.start, + vma->pages, vma->node.start, cache_level, pte_flags); } @@ -2660,8 +2660,7 @@ static int aliasing_gtt_bind_vma(struct i915_vma *vma, struct i915_hw_ppgtt *appgtt = to_i915(vma->vm->dev)->mm.aliasing_ppgtt; appgtt->base.insert_entries(&appgtt->base, - vma->ggtt_view.pages, - vma->node.start, + vma->pages, vma->node.start, cache_level, pte_flags); } @@ -2795,7 +2794,6 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) if (dev_priv->mm.aliasing_ppgtt) { struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; - ppgtt->base.cleanup(&ppgtt->base); kfree(ppgtt); } @@ -2812,7 +2810,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) ggtt->base.cleanup(&ggtt->base); arch_phys_wc_del(ggtt->mtrr); - io_mapping_free(ggtt->mappable); + io_mapping_fini(&ggtt->mappable); } static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) @@ -3210,9 +3208,9 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv) if (!HAS_LLC(dev_priv)) ggtt->base.mm.color_adjust = i915_gtt_color_adjust; - ggtt->mappable = - io_mapping_create_wc(ggtt->mappable_base, ggtt->mappable_end); - if (!ggtt->mappable) { + if (!io_mapping_init_wc(&dev_priv->ggtt.mappable, + dev_priv->ggtt.mappable_base, + dev_priv->ggtt.mappable_end)) { ret = -EIO; goto out_gtt_cleanup; } @@ -3323,6 +3321,7 @@ void i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(vma->node.allocated); GEM_BUG_ON(i915_vma_is_active(vma)); GEM_BUG_ON(!i915_vma_is_closed(vma)); + GEM_BUG_ON(vma->fence); list_del(&vma->vm_link); if (!i915_vma_is_ggtt(vma)) @@ -3342,33 +3341,29 @@ void i915_vma_close(struct i915_vma *vma) } static struct i915_vma * -__i915_gem_vma_create(struct drm_i915_gem_object *obj, - struct i915_address_space *vm, - const struct i915_ggtt_view *view) +__i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; int i; GEM_BUG_ON(vm->closed); - if (WARN_ON(i915_is_ggtt(vm) != !!view)) - return ERR_PTR(-EINVAL); - vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL); if (vma == NULL) return ERR_PTR(-ENOMEM); - INIT_LIST_HEAD(&vma->obj_link); INIT_LIST_HEAD(&vma->exec_list); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) init_request_active(&vma->last_read[i], i915_vma_retire); + init_request_active(&vma->last_fence, NULL); list_add(&vma->vm_link, &vm->unbound_list); vma->vm = vm; vma->obj = obj; vma->size = obj->base.size; - if (i915_is_ggtt(vm)) { - vma->flags |= I915_VMA_GGTT; + if (view) { vma->ggtt_view = *view; if (view->type == I915_GGTT_VIEW_PARTIAL) { vma->size = view->params.partial.size; @@ -3378,46 +3373,79 @@ __i915_gem_vma_create(struct drm_i915_gem_object *obj, intel_rotation_info_size(&view->params.rotated); vma->size <<= PAGE_SHIFT; } + } + + if (i915_is_ggtt(vm)) { + vma->flags |= I915_VMA_GGTT; } else { i915_ppgtt_get(i915_vm_to_ppgtt(vm)); } list_add_tail(&vma->obj_link, &obj->vma_list); - return vma; } +static inline bool vma_matches(struct i915_vma *vma, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + if (vma->vm != vm) + return false; + + if (!i915_vma_is_ggtt(vma)) + return true; + + if (!view) + return vma->ggtt_view.type == 0; + + if (vma->ggtt_view.type != view->type) + return false; + + return memcmp(&vma->ggtt_view.params, + &view->params, + sizeof(view->params)) == 0; +} + struct i915_vma * -i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, - struct i915_address_space *vm) +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) +{ + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + GEM_BUG_ON(i915_gem_obj_to_vma(obj, vm, view)); + + return __i915_vma_create(obj, vm, view); +} + +struct i915_vma * +i915_gem_obj_to_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { struct i915_vma *vma; - vma = i915_gem_obj_to_vma(obj, vm); - if (!vma) - vma = __i915_gem_vma_create(obj, vm, - i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL); + list_for_each_entry_reverse(vma, &obj->vma_list, obj_link) + if (vma_matches(vma, vm, view)) + return vma; - return vma; + return NULL; } struct i915_vma * -i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj, - const struct i915_ggtt_view *view) +i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view) { - struct drm_device *dev = obj->base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct i915_vma *vma = i915_gem_obj_to_ggtt_view(obj, view); + struct i915_vma *vma; - GEM_BUG_ON(!view); + GEM_BUG_ON(view && !i915_is_ggtt(vm)); + vma = i915_gem_obj_to_vma(obj, vm, view); if (!vma) - vma = __i915_gem_vma_create(obj, &ggtt->base, view); + vma = __i915_vma_create(obj, vm, view); GEM_BUG_ON(i915_vma_is_closed(vma)); return vma; - } static struct scatterlist * @@ -3449,18 +3477,16 @@ rotate_pages(const dma_addr_t *in, unsigned int offset, } static struct sg_table * -intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, +intel_rotate_fb_obj_pages(const struct intel_rotation_info *rot_info, struct drm_i915_gem_object *obj) { const size_t n_pages = obj->base.size / PAGE_SIZE; - unsigned int size_pages = rot_info->plane[0].width * rot_info->plane[0].height; - unsigned int size_pages_uv; + unsigned int size = intel_rotation_info_size(rot_info); struct sgt_iter sgt_iter; dma_addr_t dma_addr; unsigned long i; dma_addr_t *page_addr_list; struct sg_table *st; - unsigned int uv_start_page; struct scatterlist *sg; int ret = -ENOMEM; @@ -3471,18 +3497,12 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, if (!page_addr_list) return ERR_PTR(ret); - /* Account for UV plane with NV12. */ - if (rot_info->pixel_format == DRM_FORMAT_NV12) - size_pages_uv = rot_info->plane[1].width * rot_info->plane[1].height; - else - size_pages_uv = 0; - /* Allocate target SG list. */ st = kmalloc(sizeof(*st), GFP_KERNEL); if (!st) goto err_st_alloc; - ret = sg_alloc_table(st, size_pages + size_pages_uv, GFP_KERNEL); + ret = sg_alloc_table(st, size, GFP_KERNEL); if (ret) goto err_sg_alloc; @@ -3495,32 +3515,14 @@ intel_rotate_fb_obj_pages(struct intel_rotation_info *rot_info, st->nents = 0; sg = st->sgl; - /* Rotate the pages. */ - sg = rotate_pages(page_addr_list, 0, - rot_info->plane[0].width, rot_info->plane[0].height, - rot_info->plane[0].width, - st, sg); - - /* Append the UV plane if NV12. */ - if (rot_info->pixel_format == DRM_FORMAT_NV12) { - uv_start_page = size_pages; - - /* Check for tile-row un-alignment. */ - if (offset_in_page(rot_info->uv_offset)) - uv_start_page--; - - rot_info->uv_start_page = uv_start_page; - - sg = rotate_pages(page_addr_list, rot_info->uv_start_page, - rot_info->plane[1].width, rot_info->plane[1].height, - rot_info->plane[1].width, - st, sg); + for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) { + sg = rotate_pages(page_addr_list, rot_info->plane[i].offset, + rot_info->plane[i].width, rot_info->plane[i].height, + rot_info->plane[i].stride, st, sg); } - DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages (%u plane 0)).\n", - obj->base.size, rot_info->plane[0].width, - rot_info->plane[0].height, size_pages + size_pages_uv, - size_pages); + DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); drm_free_large(page_addr_list); @@ -3531,10 +3533,9 @@ err_sg_alloc: err_st_alloc: drm_free_large(page_addr_list); - DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%d) (%ux%u tiles, %u pages (%u plane 0))\n", - obj->base.size, ret, rot_info->plane[0].width, - rot_info->plane[0].height, size_pages + size_pages_uv, - size_pages); + DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n", + obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size); + return ERR_PTR(ret); } @@ -3584,28 +3585,27 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) { int ret = 0; - if (vma->ggtt_view.pages) + if (vma->pages) return 0; if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL) - vma->ggtt_view.pages = vma->obj->pages; + vma->pages = vma->obj->pages; else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED) - vma->ggtt_view.pages = + vma->pages = intel_rotate_fb_obj_pages(&vma->ggtt_view.params.rotated, vma->obj); else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL) - vma->ggtt_view.pages = - intel_partial_pages(&vma->ggtt_view, vma->obj); + vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj); else WARN_ONCE(1, "GGTT view %u not implemented!\n", vma->ggtt_view.type); - if (!vma->ggtt_view.pages) { + if (!vma->pages) { DRM_ERROR("Failed to get pages for GGTT view type %u!\n", vma->ggtt_view.type); ret = -EINVAL; - } else if (IS_ERR(vma->ggtt_view.pages)) { - ret = PTR_ERR(vma->ggtt_view.pages); - vma->ggtt_view.pages = NULL; + } else if (IS_ERR(vma->pages)) { + ret = PTR_ERR(vma->pages); + vma->pages = NULL; DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n", vma->ggtt_view.type, ret); } @@ -3668,8 +3668,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) { void __iomem *ptr; + /* Access through the GTT requires the device to be awake. */ + assert_rpm_wakelock_held(to_i915(vma->vm->dev)); + lockdep_assert_held(&vma->vm->dev->struct_mutex); - if (WARN_ON(!vma->obj->map_and_fenceable)) + if (WARN_ON(!i915_vma_is_map_and_fenceable(vma))) return IO_ERR_PTR(-ENODEV); GEM_BUG_ON(!i915_vma_is_ggtt(vma)); @@ -3677,7 +3680,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) ptr = vma->iomap; if (ptr == NULL) { - ptr = io_mapping_map_wc(i915_vm_to_ggtt(vma->vm)->mappable, + ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable, vma->node.start, vma->node.size); if (ptr == NULL) @@ -3689,3 +3692,15 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma) __i915_vma_pin(vma); return ptr; } + +void i915_vma_unpin_and_release(struct i915_vma **p_vma) +{ + struct i915_vma *vma; + + vma = fetch_and_zero(p_vma); + if (!vma) + return; + + i915_vma_unpin(vma); + i915_vma_put(vma); +} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index cc56206..a9aec25 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,7 +38,13 @@ #include "i915_gem_request.h" +#define I915_FENCE_REG_NONE -1 +#define I915_MAX_NUM_FENCES 32 +/* 32 fences + sign bit for FENCE_REG_NONE */ +#define I915_MAX_NUM_FENCE_BITS 6 + struct drm_i915_file_private; +struct drm_i915_fence_reg; typedef uint32_t gen6_pte_t; typedef uint64_t gen8_pte_t; @@ -139,12 +145,9 @@ enum i915_ggtt_view_type { }; struct intel_rotation_info { - unsigned int uv_offset; - uint32_t pixel_format; - unsigned int uv_start_page; struct { /* tiles */ - unsigned int width, height; + unsigned int width, height, stride, offset; } plane[2]; }; @@ -158,8 +161,6 @@ struct i915_ggtt_view { } partial; struct intel_rotation_info rotated; } params; - - struct sg_table *pages; }; extern const struct i915_ggtt_view i915_ggtt_view_normal; @@ -179,8 +180,11 @@ struct i915_vma { struct drm_mm_node node; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct drm_i915_fence_reg *fence; + struct sg_table *pages; void __iomem *iomap; u64 size; + u64 display_alignment; unsigned int flags; /** @@ -201,11 +205,13 @@ struct i915_vma { #define I915_VMA_LOCAL_BIND BIT(7) #define I915_VMA_BIND_MASK (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND | I915_VMA_PIN_OVERFLOW) -#define I915_VMA_GGTT BIT(8) -#define I915_VMA_CLOSED BIT(9) +#define I915_VMA_GGTT BIT(8) +#define I915_VMA_CAN_FENCE BIT(9) +#define I915_VMA_CLOSED BIT(10) unsigned int active; struct i915_gem_active last_read[I915_NUM_ENGINES]; + struct i915_gem_active last_fence; /** * Support different GGTT views into the same object. @@ -232,11 +238,22 @@ struct i915_vma { struct drm_i915_gem_exec_object2 *exec_entry; }; +struct i915_vma * +i915_vma_create(struct drm_i915_gem_object *obj, + struct i915_address_space *vm, + const struct i915_ggtt_view *view); +void i915_vma_unpin_and_release(struct i915_vma **p_vma); + static inline bool i915_vma_is_ggtt(const struct i915_vma *vma) { return vma->flags & I915_VMA_GGTT; } +static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma) +{ + return vma->flags & I915_VMA_CAN_FENCE; +} + static inline bool i915_vma_is_closed(const struct i915_vma *vma) { return vma->flags & I915_VMA_CLOSED; @@ -270,6 +287,15 @@ static inline bool i915_vma_has_active_engine(const struct i915_vma *vma, return vma->active & BIT(engine); } +static inline u32 i915_ggtt_offset(const struct i915_vma *vma) +{ + GEM_BUG_ON(!i915_vma_is_ggtt(vma)); + GEM_BUG_ON(!vma->node.allocated); + GEM_BUG_ON(upper_32_bits(vma->node.start)); + GEM_BUG_ON(upper_32_bits(vma->node.start + vma->node.size - 1)); + return lower_32_bits(vma->node.start); +} + struct i915_page_dma { struct page *page; union { @@ -413,13 +439,13 @@ struct i915_address_space { */ struct i915_ggtt { struct i915_address_space base; + struct io_mapping mappable; /* Mapping to our CPU mappable region */ size_t stolen_size; /* Total size of stolen memory */ size_t stolen_usable_size; /* Total size minus BIOS reserved */ size_t stolen_reserved_base; size_t stolen_reserved_size; u64 mappable_end; /* End offset that we can CPU map */ - struct io_mapping *mappable; /* Mapping to our CPU mappable region */ phys_addr_t mappable_base; /* PA of our GMADR */ /** "Graphics Stolen Memory" holds the global PTEs */ @@ -608,24 +634,11 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev); int __must_check i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj); void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj); -static inline bool -i915_ggtt_view_equal(const struct i915_ggtt_view *a, - const struct i915_ggtt_view *b) -{ - if (WARN_ON(!a || !b)) - return false; - - if (a->type != b->type) - return false; - if (a->type != I915_GGTT_VIEW_NORMAL) - return !memcmp(&a->params, &b->params, sizeof(a->params)); - return true; -} - /* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT(0) #define PIN_MAPPABLE BIT(1) #define PIN_ZONE_4G BIT(2) +#define PIN_NONFAULT BIT(3) #define PIN_MBZ BIT(5) /* I915_VMA_PIN_OVERFLOW */ #define PIN_GLOBAL BIT(6) /* I915_VMA_GLOBAL_BIND */ @@ -715,4 +728,10 @@ static inline void i915_vma_unpin_iomap(struct i915_vma *vma) i915_vma_unpin(vma); } +static inline struct page *i915_vma_first_page(struct i915_vma *vma) +{ + GEM_BUG_ON(!vma->pages); + return sg_page(vma->pages->sgl); +} + #endif diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 57fd767..95b7e9a 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -30,8 +30,7 @@ struct render_state { const struct intel_renderstate_rodata *rodata; - struct drm_i915_gem_object *obj; - u64 ggtt_offset; + struct i915_vma *vma; u32 aux_batch_size; u32 aux_batch_offset; }; @@ -73,7 +72,7 @@ render_state_get_rodata(const struct drm_i915_gem_request *req) static int render_state_setup(struct render_state *so) { - struct drm_device *dev = so->obj->base.dev; + struct drm_device *dev = so->vma->vm->dev; const struct intel_renderstate_rodata *rodata = so->rodata; const bool has_64bit_reloc = INTEL_GEN(dev) >= 8; unsigned int i = 0, reloc_index = 0; @@ -81,18 +80,18 @@ static int render_state_setup(struct render_state *so) u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->obj, true); + ret = i915_gem_object_set_to_cpu_domain(so->vma->obj, true); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->obj, 0); + page = i915_gem_object_get_dirty_page(so->vma->obj, 0); d = kmap(page); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; if (i * 4 == rodata->reloc[reloc_index]) { - u64 r = s + so->ggtt_offset; + u64 r = s + so->vma->node.start; s = lower_32_bits(r); if (has_64bit_reloc) { if (i + 1 >= rodata->batch_items || @@ -154,7 +153,7 @@ static int render_state_setup(struct render_state *so) kunmap(page); - ret = i915_gem_object_set_to_gtt_domain(so->obj, false); + ret = i915_gem_object_set_to_gtt_domain(so->vma->obj, false); if (ret) return ret; @@ -175,6 +174,7 @@ err_out: int i915_gem_render_state_init(struct drm_i915_gem_request *req) { struct render_state so; + struct drm_i915_gem_object *obj; int ret; if (WARN_ON(req->engine->id != RCS)) @@ -187,21 +187,25 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (so.rodata->batch_items * 4 > 4096) return -EINVAL; - so.obj = i915_gem_object_create(&req->i915->drm, 4096); - if (IS_ERR(so.obj)) - return PTR_ERR(so.obj); + obj = i915_gem_object_create(&req->i915->drm, 4096); + if (IS_ERR(obj)) + return PTR_ERR(obj); - ret = i915_gem_object_ggtt_pin(so.obj, NULL, 0, 0, 0); - if (ret) + so.vma = i915_vma_create(obj, &req->i915->ggtt.base, NULL); + if (IS_ERR(so.vma)) { + ret = PTR_ERR(so.vma); goto err_obj; + } - so.ggtt_offset = i915_gem_obj_ggtt_offset(so.obj); + ret = i915_vma_pin(so.vma, 0, 0, PIN_GLOBAL); + if (ret) + goto err_obj; ret = render_state_setup(&so); if (ret) goto err_unpin; - ret = req->engine->emit_bb_start(req, so.ggtt_offset, + ret = req->engine->emit_bb_start(req, so.vma->node.start, so.rodata->batch_items * 4, I915_DISPATCH_SECURE); if (ret) @@ -209,7 +213,7 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) if (so.aux_batch_size > 8) { ret = req->engine->emit_bb_start(req, - (so.ggtt_offset + + (so.vma->node.start + so.aux_batch_offset), so.aux_batch_size, I915_DISPATCH_SECURE); @@ -217,10 +221,10 @@ int i915_gem_render_state_init(struct drm_i915_gem_request *req) goto err_unpin; } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req, 0); + i915_vma_move_to_active(so.vma, req, 0); err_unpin: - i915_gem_object_ggtt_unpin(so.obj); + i915_vma_unpin(so.vma); err_obj: - i915_gem_object_put(so.obj); + i915_gem_object_put(obj); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.h b/drivers/gpu/drm/i915/i915_gem_render_state.h index c44fca8..18cce3f 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.h +++ b/drivers/gpu/drm/i915/i915_gem_render_state.h @@ -24,7 +24,7 @@ #ifndef _I915_GEM_RENDER_STATE_H_ #define _I915_GEM_RENDER_STATE_H_ -#include <linux/types.h> +struct drm_i915_gem_request; int i915_gem_render_state_init(struct drm_i915_gem_request *req); diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index 6a16616..1a21532 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -137,8 +137,6 @@ int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, list_add_tail(&req->client_list, &file_priv->mm.request_list); spin_unlock(&file_priv->mm.lock); - req->pid = get_pid(task_pid(current)); - return 0; } @@ -154,9 +152,6 @@ i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) list_del(&request->client_list); request->file_priv = NULL; spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -355,7 +350,35 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (req && i915_gem_request_completed(req)) i915_gem_request_retire(req); - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + /* Beware: Dragons be flying overhead. + * + * We use RCU to look up requests in flight. The lookups may + * race with the request being allocated from the slab freelist. + * That is the request we are writing to here, may be in the process + * of being read by __i915_gem_active_get_rcu(). As such, + * we have to be very careful when overwriting the contents. During + * the RCU lookup, we change chase the request->engine pointer, + * read the request->fence.seqno and increment the reference count. + * + * The reference count is incremented atomically. If it is zero, + * the lookup knows the request is unallocated and complete. Otherwise, + * it is either still in use, or has been reallocated and reset + * with fence_init(). This increment is safe for release as we check + * that the request we have a reference to and matches the active + * request. + * + * Before we increment the refcount, we chase the request->engine + * pointer. We must not call kmem_cache_zalloc() or else we set + * that pointer to NULL and cause a crash during the lookup. If + * we see the request is completed (based on the value of the + * old engine and seqno), the lookup is complete and reports NULL. + * If we decide the request is not completed (new engine or seqno), + * then we grab a reference and double check that it is still the + * active request - which it won't be and restart the lookup. + * + * Do not use kmem_cache_zalloc() here! + */ + req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL); if (!req) return ERR_PTR(-ENOMEM); @@ -375,6 +398,12 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, req->engine = engine; req->ctx = i915_gem_context_get(ctx); + /* No zalloc, must clear what we need by hand */ + req->previous_context = NULL; + req->file_priv = NULL; + req->batch = NULL; + req->elsp_submitted = 0; + /* * Reserve space in the ring buffer for all the commands required to * eventually emit this request. This is to guarantee that the @@ -391,6 +420,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine, if (ret) goto err_ctx; + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + req->head = req->ring->tail; + return req; err_ctx: @@ -426,22 +462,14 @@ static void i915_gem_mark_busy(const struct intel_engine_cs *engine) * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) +void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches) { - struct intel_engine_cs *engine; - struct intel_ring *ring; + struct intel_engine_cs *engine = request->engine; + struct intel_ring *ring = request->ring; u32 request_start; u32 reserved_tail; int ret; - if (WARN_ON(!request)) - return; - - engine = request->engine; - ring = request->ring; - /* * To ensure that this call will not fail, space for its emissions * should already have been reserved in the ring buffer. Let the ring @@ -467,16 +495,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, trace_i915_gem_request_add(request); - request->head = request_start; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; - /* Seal the request and mark it as pending execution. Note that * we may inspect this state, without holding any locks, during * hangcheck. Hence we apply the barrier to ensure that we do not @@ -489,10 +507,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, list_add_tail(&request->link, &engine->request_list); list_add_tail(&request->ring_link, &ring->request_list); - /* Record the position of the start of the request so that + /* Record the position of the start of the breadcrumb so that * should we detect the updated seqno part-way through the * GPU processing the request, we never over-estimate the - * position of the head. + * position of the ring's HEAD. */ request->postfix = ring->tail; diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h index 3496e28..6c72bd8 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.h +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -51,6 +51,13 @@ struct intel_signal_node { * emission time to be associated with the request for tracking how far ahead * of the GPU the submission is. * + * When modifying this structure be very aware that we perform a lockless + * RCU lookup of it that may race against reallocation of the struct + * from the slab freelist. We intentionally do not zero the structure on + * allocation so that the lookup can use the dangling pointers (and is + * cogniscent that those pointers may be wrong). Instead, everything that + * needs to be initialised must be done so explicitly. + * * The requests are reference counted. */ struct drm_i915_gem_request { @@ -111,7 +118,7 @@ struct drm_i915_gem_request { /** Batch buffer related to this request if any (used for * error state dump only). */ - struct drm_i915_gem_object *batch_obj; + struct i915_vma *batch; struct list_head active_list; /** Time at which this request was emitted, in jiffies. */ @@ -127,9 +134,6 @@ struct drm_i915_gem_request { /** file_priv list entry for this request */ struct list_head client_list; - /** process identifier submitting this request */ - struct pid *pid; - /** * The ELSP only accepts two elements at a time, so we queue * context/tail pairs on a given queue (ring->execlist_queue) until the @@ -218,13 +222,11 @@ static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, *pdst = src; } -void __i915_add_request(struct drm_i915_gem_request *req, - struct drm_i915_gem_object *batch_obj, - bool flush_caches); +void __i915_add_request(struct drm_i915_gem_request *req, bool flush_caches); #define i915_add_request(req) \ - __i915_add_request(req, NULL, true) + __i915_add_request(req, true) #define i915_add_request_no_flush(req) \ - __i915_add_request(req, NULL, false) + __i915_add_request(req, false) struct intel_rps_client; #define NO_WAITBOOST ERR_PTR(-1) @@ -360,41 +362,34 @@ __i915_gem_active_peek(const struct i915_gem_active *active) } /** - * i915_gem_active_peek - report the active request being monitored + * i915_gem_active_raw - return the active request * @active - the active tracker * - * i915_gem_active_peek() returns the current request being tracked if - * still active, or NULL. It does not obtain a reference on the request - * for the caller, so the caller must hold struct_mutex. + * i915_gem_active_raw() returns the current request being tracked, or NULL. + * It does not obtain a reference on the request for the caller, so the caller + * must hold struct_mutex. */ static inline struct drm_i915_gem_request * -i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) +i915_gem_active_raw(const struct i915_gem_active *active, struct mutex *mutex) { - struct drm_i915_gem_request *request; - - request = rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); - if (!request || i915_gem_request_completed(request)) - return NULL; - - return request; + return rcu_dereference_protected(active->request, + lockdep_is_held(mutex)); } /** - * i915_gem_active_peek_rcu - report the active request being monitored + * i915_gem_active_peek - report the active request being monitored * @active - the active tracker * - * i915_gem_active_peek_rcu() returns the current request being tracked if + * i915_gem_active_peek() returns the current request being tracked if * still active, or NULL. It does not obtain a reference on the request - * for the caller, and inspection of the request is only valid under - * the RCU lock. + * for the caller, so the caller must hold struct_mutex. */ static inline struct drm_i915_gem_request * -i915_gem_active_peek_rcu(const struct i915_gem_active *active) +i915_gem_active_peek(const struct i915_gem_active *active, struct mutex *mutex) { struct drm_i915_gem_request *request; - request = rcu_dereference(active->request); + request = i915_gem_active_raw(active, mutex); if (!request || i915_gem_request_completed(request)) return NULL; @@ -465,6 +460,10 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * just report the active tracker is idle. If the new request is * incomplete, then we acquire a reference on it and check that * it remained the active request. + * + * It is then imperative that we do not zero the request on + * reallocation, so that we can chase the dangling pointers! + * See i915_gem_request_alloc(). */ do { struct drm_i915_gem_request *request; @@ -497,6 +496,9 @@ __i915_gem_active_get_rcu(const struct i915_gem_active *active) * incremented) then the following read for rcu_access_pointer() * must occur after the atomic operation and so confirm * that this request is the one currently being tracked. + * + * The corresponding write barrier is part of + * rcu_assign_pointer(). */ if (!request || request == rcu_access_pointer(active->request)) return rcu_pointer_handoff(request); @@ -635,8 +637,7 @@ i915_gem_active_retire(struct i915_gem_active *active, struct drm_i915_gem_request *request; int ret; - request = rcu_dereference_protected(active->request, - lockdep_is_held(mutex)); + request = i915_gem_active_raw(active, mutex); if (!request) return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 1327961..aa050fa 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -115,17 +115,28 @@ static unsigned long i915_stolen_to_physical(struct drm_device *dev) base = bsm & INTEL_BSM_MASK; } else if (IS_I865G(dev)) { + u32 tseg_size = 0; u16 toud = 0; + u8 tmp; + + pci_bus_read_config_byte(dev->pdev->bus, PCI_DEVFN(0, 0), + I845_ESMRAMC, &tmp); + + if (tmp & TSEG_ENABLE) { + switch (tmp & I845_TSEG_SIZE_MASK) { + case I845_TSEG_SIZE_512K: + tseg_size = KB(512); + break; + case I845_TSEG_SIZE_1M: + tseg_size = MB(1); + break; + } + } - /* - * FIXME is the graphics stolen memory region - * always at TOUD? Ie. is it always the last - * one to be allocated by the BIOS? - */ pci_bus_read_config_word(dev->pdev->bus, PCI_DEVFN(0, 0), I865_TOUD, &toud); - base = toud << 16; + base = (toud << 16) + tseg_size; } else if (IS_I85X(dev)) { u32 tseg_size = 0; u32 tom; @@ -685,7 +696,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, if (gtt_offset == I915_GTT_OFFSET_NONE) return obj; - vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base); + vma = i915_gem_obj_lookup_or_create_vma(obj, &ggtt->base, NULL); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto err; @@ -705,6 +716,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, goto err; } + vma->pages = obj->pages; vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index f4b984d..a14b1e3 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -116,37 +116,58 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) return true; } -/* Is the current GTT allocation valid for the change in tiling? */ -static bool -i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode) +static bool i915_vma_fence_prepare(struct i915_vma *vma, int tiling_mode) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct drm_i915_private *dev_priv = to_i915(vma->vm->dev); u32 size; - if (tiling_mode == I915_TILING_NONE) - return true; - - if (INTEL_GEN(dev_priv) >= 4) + if (!i915_vma_is_map_and_fenceable(vma)) return true; - if (IS_GEN3(dev_priv)) { - if (i915_gem_obj_ggtt_offset(obj) & ~I915_FENCE_START_MASK) + if (INTEL_GEN(dev_priv) == 3) { + if (vma->node.start & ~I915_FENCE_START_MASK) return false; } else { - if (i915_gem_obj_ggtt_offset(obj) & ~I830_FENCE_START_MASK) + if (vma->node.start & ~I830_FENCE_START_MASK) return false; } - size = i915_gem_get_ggtt_size(dev_priv, obj->base.size, tiling_mode); - if (i915_gem_obj_ggtt_size(obj) != size) + size = i915_gem_get_ggtt_size(dev_priv, vma->size, tiling_mode); + if (vma->node.size < size) return false; - if (i915_gem_obj_ggtt_offset(obj) & (size - 1)) + if (vma->node.start & (size - 1)) return false; return true; } +/* Make the current GTT allocation valid for the change in tiling. */ +static int +i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj, int tiling_mode) +{ + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + int ret; + + if (tiling_mode == I915_TILING_NONE) + return 0; + + if (INTEL_GEN(dev_priv) >= 4) + return 0; + + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (i915_vma_fence_prepare(vma, tiling_mode)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + } + + return 0; +} + /** * i915_gem_set_tiling - IOCTL handler to set tiling mode * @dev: DRM device @@ -168,7 +189,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, struct drm_i915_gem_set_tiling *args = data; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj; - int ret = 0; + int err = 0; /* Make sure we don't cross-contaminate obj->tiling_and_stride */ BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK); @@ -187,7 +208,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); if (obj->pin_display || obj->framebuffer_references) { - ret = -EBUSY; + err = -EBUSY; goto err; } @@ -234,11 +255,11 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, * has to also include the unfenced register the GPU uses * whilst executing a fenced command for an untiled object. */ - if (obj->map_and_fenceable && - !i915_gem_object_fence_ok(obj, args->tiling_mode)) - ret = i915_vma_unbind(i915_gem_obj_to_ggtt(obj)); - if (ret == 0) { + err = i915_gem_object_fence_prepare(obj, args->tiling_mode); + if (!err) { + struct i915_vma *vma; + if (obj->pages && obj->madv == I915_MADV_WILLNEED && dev_priv->quirks & QUIRK_PIN_SWIZZLED_PAGES) { @@ -248,11 +269,12 @@ i915_gem_set_tiling(struct drm_device *dev, void *data, i915_gem_object_pin_pages(obj); } - obj->fence_dirty = - !i915_gem_active_is_idle(&obj->last_fence, - &dev->struct_mutex) || - obj->fence_reg != I915_FENCE_REG_NONE; + list_for_each_entry(vma, &obj->vma_list, obj_link) { + if (!vma->fence) + continue; + vma->fence->dirty = true; + } obj->tiling_and_stride = args->stride | args->tiling_mode; @@ -281,7 +303,7 @@ err: intel_runtime_pm_put(dev_priv); - return ret; + return err; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 57218cc..be54825 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -542,8 +542,6 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work) } } obj->userptr.work = ERR_PTR(ret); - if (ret) - __i915_gem_userptr_set_active(obj, false); } obj->userptr.workers--; @@ -628,15 +626,14 @@ i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) * to the vma (discard or cloning) which should prevent the more * egregious cases from causing harm. */ - if (IS_ERR(obj->userptr.work)) { - /* active flag will have been dropped already by the worker */ - ret = PTR_ERR(obj->userptr.work); - obj->userptr.work = NULL; - return ret; - } - if (obj->userptr.work) + + if (obj->userptr.work) { /* active flag should still be held for the pending work */ - return -EAGAIN; + if (IS_ERR(obj->userptr.work)) + return PTR_ERR(obj->userptr.work); + else + return -EAGAIN; + } /* Let the mmu-notifier know that we have begun and need cancellation */ ret = __i915_gem_userptr_set_active(obj, true); diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index eecb870..41ec7a1 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -42,16 +42,6 @@ static const char *engine_str(int engine) } } -static const char *pin_flag(int pinned) -{ - if (pinned > 0) - return " P"; - else if (pinned < 0) - return " p"; - else - return ""; -} - static const char *tiling_flag(int tiling) { switch (tiling) { @@ -189,7 +179,7 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, { int i; - err_printf(m, " %s [%d]:\n", name, count); + err_printf(m, "%s [%d]:\n", name, count); while (count--) { err_printf(m, " %08x_%08x %8u %02x %02x [ ", @@ -202,7 +192,6 @@ static void print_error_buffers(struct drm_i915_error_state_buf *m, err_printf(m, "%02x ", err->rseqno[i]); err_printf(m, "] %02x", err->wseqno); - err_puts(m, pin_flag(err->pinned)); err_puts(m, tiling_flag(err->tiling)); err_puts(m, dirty_flag(err->dirty)); err_puts(m, purgeable_flag(err->purgeable)); @@ -247,14 +236,23 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " HEAD: 0x%08x\n", ee->head); err_printf(m, " TAIL: 0x%08x\n", ee->tail); err_printf(m, " CTL: 0x%08x\n", ee->ctl); + err_printf(m, " MODE: 0x%08x\n", ee->mode); err_printf(m, " HWS: 0x%08x\n", ee->hws); err_printf(m, " ACTHD: 0x%08x %08x\n", (u32)(ee->acthd>>32), (u32)ee->acthd); err_printf(m, " IPEIR: 0x%08x\n", ee->ipeir); err_printf(m, " IPEHR: 0x%08x\n", ee->ipehr); err_printf(m, " INSTDONE: 0x%08x\n", ee->instdone); + if (ee->batchbuffer) { + u64 start = ee->batchbuffer->gtt_offset; + u64 end = start + ee->batchbuffer->gtt_size; + + err_printf(m, " batch: [0x%08x_%08x, 0x%08x_%08x]\n", + upper_32_bits(start), lower_32_bits(start), + upper_32_bits(end), lower_32_bits(end)); + } if (INTEL_GEN(m->i915) >= 4) { - err_printf(m, " BBADDR: 0x%08x %08x\n", + err_printf(m, " BBADDR: 0x%08x_%08x\n", (u32)(ee->bbaddr>>32), (u32)ee->bbaddr); err_printf(m, " BB_STATE: 0x%08x\n", ee->bbstate); err_printf(m, " INSTPS: 0x%08x\n", ee->instps); @@ -323,6 +321,16 @@ static void print_error_obj(struct drm_i915_error_state_buf *m, } } +static void err_print_capabilities(struct drm_i915_error_state_buf *m, + const struct intel_device_info *info) +{ +#define PRINT_FLAG(x) err_printf(m, #x ": %s\n", yesno(info->x)) +#define SEP_SEMICOLON ; + DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG, SEP_SEMICOLON); +#undef PRINT_FLAG +#undef SEP_SEMICOLON +} + int i915_error_state_to_str(struct drm_i915_error_state_buf *m, const struct i915_error_state_file_priv *error_priv) { @@ -342,6 +350,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Time: %ld s %ld us\n", error->time.tv_sec, error->time.tv_usec); err_printf(m, "Kernel: " UTS_RELEASE "\n"); + err_print_capabilities(m, &error->device_info); max_hangcheck_score = 0; for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_score > max_hangcheck_score) @@ -414,18 +423,33 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, error_print_engine(m, &error->engine[i]); } - for (i = 0; i < error->vm_count; i++) { - err_printf(m, "vm[%d]\n", i); + for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { + char buf[128]; + int len, first = 1; + + if (!error->active_vm[i]) + break; + + len = scnprintf(buf, sizeof(buf), "Active ("); + for (j = 0; j < ARRAY_SIZE(error->engine); j++) { + if (error->engine[j].vm != error->active_vm[i]) + continue; - print_error_buffers(m, "Active", + len += scnprintf(buf + len, sizeof(buf), "%s%s", + first ? "" : ", ", + dev_priv->engine[j].name); + first = 0; + } + scnprintf(buf + len, sizeof(buf), ")"); + print_error_buffers(m, buf, error->active_bo[i], error->active_bo_count[i]); - - print_error_buffers(m, "Pinned", - error->pinned_bo[i], - error->pinned_bo_count[i]); } + print_error_buffers(m, "Pinned (global)", + error->pinned_bo, + error->pinned_bo_count); + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { struct drm_i915_error_engine *ee = &error->engine[i]; @@ -455,9 +479,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, dev_priv->engine[i].name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) { - err_printf(m, " seqno 0x%08x, emitted %ld, tail 0x%08x\n", + err_printf(m, " pid %d, seqno 0x%08x, emitted %ld, head 0x%08x, tail 0x%08x\n", + ee->requests[j].pid, ee->requests[j].seqno, ee->requests[j].jiffies, + ee->requests[j].head, ee->requests[j].tail); } } @@ -533,7 +559,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - if ((obj = error->semaphore_obj)) { + if ((obj = error->semaphore)) { err_printf(m, "Semaphore page = 0x%08x\n", lower_32_bits(obj->gtt_offset)); for (elt = 0; elt < PAGE_SIZE/16; elt += 4) { @@ -624,15 +650,12 @@ static void i915_error_state_free(struct kref *error_ref) kfree(ee->waiters); } - i915_error_object_free(error->semaphore_obj); + i915_error_object_free(error->semaphore); - for (i = 0; i < error->vm_count; i++) + for (i = 0; i < ARRAY_SIZE(error->active_bo); i++) kfree(error->active_bo[i]); - - kfree(error->active_bo); - kfree(error->active_bo_count); kfree(error->pinned_bo); - kfree(error->pinned_bo_count); + kfree(error->overlay); kfree(error->display); kfree(error); @@ -640,46 +663,45 @@ static void i915_error_state_free(struct kref *error_ref) static struct drm_i915_error_object * i915_error_object_create(struct drm_i915_private *dev_priv, - struct drm_i915_gem_object *src, - struct i915_address_space *vm) + struct i915_vma *vma) { struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_object *src; struct drm_i915_error_object *dst; - struct i915_vma *vma = NULL; int num_pages; bool use_ggtt; int i = 0; u64 reloc_offset; - if (src == NULL || src->pages == NULL) + if (!vma) + return NULL; + + src = vma->obj; + if (!src->pages) return NULL; num_pages = src->base.size >> PAGE_SHIFT; dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *), GFP_ATOMIC); - if (dst == NULL) + if (!dst) return NULL; - if (i915_gem_obj_bound(src, vm)) - dst->gtt_offset = i915_gem_obj_offset(src, vm); - else - dst->gtt_offset = -1; + dst->gtt_offset = vma->node.start; + dst->gtt_size = vma->node.size; reloc_offset = dst->gtt_offset; - if (i915_is_ggtt(vm)) - vma = i915_gem_obj_to_ggtt(src); use_ggtt = (src->cache_level == I915_CACHE_NONE && - vma && (vma->flags & I915_VMA_GLOBAL_BIND) && + (vma->flags & I915_VMA_GLOBAL_BIND) && reloc_offset + num_pages * PAGE_SIZE <= ggtt->mappable_end); /* Cannot access stolen address directly, try to use the aperture */ if (src->stolen) { use_ggtt = true; - if (!(vma && vma->flags & I915_VMA_GLOBAL_BIND)) + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) goto unwind; - reloc_offset = i915_gem_obj_ggtt_offset(src); + reloc_offset = vma->node.start; if (reloc_offset + num_pages * PAGE_SIZE > ggtt->mappable_end) goto unwind; } @@ -707,7 +729,7 @@ i915_error_object_create(struct drm_i915_private *dev_priv, * captures what the GPU read. */ - s = io_mapping_map_atomic_wc(ggtt->mappable, + s = io_mapping_map_atomic_wc(&ggtt->mappable, reloc_offset); memcpy_fromio(d, s, PAGE_SIZE); io_mapping_unmap_atomic(s); @@ -739,8 +761,6 @@ unwind: kfree(dst); return NULL; } -#define i915_error_ggtt_object_create(dev_priv, src) \ - i915_error_object_create((dev_priv), (src), &(dev_priv)->ggtt.base) /* The error capture is special as tries to run underneath the normal * locking rules - so we use the raw version of the i915_gem_active lookup. @@ -777,10 +797,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->gtt_offset = vma->node.start; err->read_domains = obj->base.read_domains; err->write_domain = obj->base.write_domain; - err->fence_reg = obj->fence_reg; - err->pinned = 0; - if (i915_gem_obj_is_pinned(obj)) - err->pinned = 1; + err->fence_reg = vma->fence ? vma->fence->id : -1; err->tiling = i915_gem_object_get_tiling(obj); err->dirty = obj->dirty; err->purgeable = obj->madv != I915_MADV_WILLNEED; @@ -788,13 +805,17 @@ static void capture_bo(struct drm_i915_error_buffer *err, err->cache_level = obj->cache_level; } -static u32 capture_active_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head) +static u32 capture_error_bo(struct drm_i915_error_buffer *err, + int count, struct list_head *head, + bool pinned_only) { struct i915_vma *vma; int i = 0; list_for_each_entry(vma, head, vm_link) { + if (pinned_only && !i915_vma_is_pinned(vma)) + continue; + capture_bo(err++, vma); if (++i == count) break; @@ -803,28 +824,6 @@ static u32 capture_active_bo(struct drm_i915_error_buffer *err, return i; } -static u32 capture_pinned_bo(struct drm_i915_error_buffer *err, - int count, struct list_head *head, - struct i915_address_space *vm) -{ - struct drm_i915_gem_object *obj; - struct drm_i915_error_buffer * const first = err; - struct drm_i915_error_buffer * const last = err + count; - - list_for_each_entry(obj, head, global_list) { - struct i915_vma *vma; - - if (err == last) - break; - - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && i915_vma_is_pinned(vma)) - capture_bo(err++, vma); - } - - return err - first; -} - /* Generate a semi-unique error code. The code is not meant to have meaning, The * code's only purpose is to try to prevent false duplicated bug reports by * grossly estimating a GPU error state. @@ -884,7 +883,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, struct intel_engine_cs *to; enum intel_engine_id id; - if (!error->semaphore_obj) + if (!error->semaphore) return; for_each_engine_id(to, dev_priv, id) { @@ -897,7 +896,7 @@ static void gen8_record_semaphore_state(struct drm_i915_error_state *error, signal_offset = (GEN8_SIGNAL_OFFSET(engine, id) & (PAGE_SIZE - 1)) / 4; - tmp = error->semaphore_obj->pages[0]; + tmp = error->semaphore->pages[0]; idx = intel_engine_sync_index(engine, to); ee->semaphore_mboxes[idx] = tmp[signal_offset]; @@ -1007,6 +1006,8 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, ee->head = I915_READ_HEAD(engine); ee->tail = I915_READ_TAIL(engine); ee->ctl = I915_READ_CTL(engine); + if (INTEL_GEN(dev_priv) > 2) + ee->mode = I915_READ_MODE(engine); if (I915_NEED_GFX_HWS(dev_priv)) { i915_reg_t mmio; @@ -1062,45 +1063,76 @@ static void error_record_engine_registers(struct drm_i915_error_state *error, } } - -static void i915_gem_record_active_context(struct intel_engine_cs *engine, - struct drm_i915_error_state *error, - struct drm_i915_error_engine *ee) +static void engine_record_requests(struct intel_engine_cs *engine, + struct drm_i915_gem_request *first, + struct drm_i915_error_engine *ee) { - struct drm_i915_private *dev_priv = engine->i915; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_request *request; + int count; - /* Currently render ring is the only HW context user */ - if (engine->id != RCS || !error->ccid) + count = 0; + request = first; + list_for_each_entry_from(request, &engine->request_list, link) + count++; + if (!count) return; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - if (!i915_gem_obj_ggtt_bound(obj)) - continue; + ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); + if (!ee->requests) + return; + + ee->num_requests = count; - if ((error->ccid & PAGE_MASK) == i915_gem_obj_ggtt_offset(obj)) { - ee->ctx = i915_error_ggtt_object_create(dev_priv, obj); + count = 0; + request = first; + list_for_each_entry_from(request, &engine->request_list, link) { + struct drm_i915_error_request *erq; + + if (count >= ee->num_requests) { + /* + * If the ring request list was changed in + * between the point where the error request + * list was created and dimensioned and this + * point then just exit early to avoid crashes. + * + * We don't need to communicate that the + * request list changed state during error + * state capture and that the error state is + * slightly incorrect as a consequence since we + * are typically only interested in the request + * list state at the point of error state + * capture, not in any changes happening during + * the capture. + */ break; } + + erq = &ee->requests[count++]; + erq->seqno = request->fence.seqno; + erq->jiffies = request->emitted_jiffies; + erq->head = request->head; + erq->tail = request->tail; + + rcu_read_lock(); + erq->pid = request->ctx->pid ? pid_nr(request->ctx->pid) : 0; + rcu_read_unlock(); } + ee->num_requests = count; } static void i915_gem_record_rings(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) { struct i915_ggtt *ggtt = &dev_priv->ggtt; - struct drm_i915_gem_request *request; - int i, count; + int i; - if (dev_priv->semaphore_obj) { - error->semaphore_obj = - i915_error_ggtt_object_create(dev_priv, - dev_priv->semaphore_obj); - } + error->semaphore = + i915_error_object_create(dev_priv, dev_priv->semaphore); for (i = 0; i < I915_NUM_ENGINES; i++) { struct intel_engine_cs *engine = &dev_priv->engine[i]; struct drm_i915_error_engine *ee = &error->engine[i]; + struct drm_i915_gem_request *request; ee->pid = -1; ee->engine_id = -1; @@ -1115,10 +1147,10 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, request = i915_gem_find_active_request(engine); if (request) { - struct i915_address_space *vm; struct intel_ring *ring; + struct pid *pid; - vm = request->ctx->ppgtt ? + ee->vm = request->ctx->ppgtt ? &request->ctx->ppgtt->base : &ggtt->base; /* We need to copy these to an anonymous buffer @@ -1127,19 +1159,23 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, */ ee->batchbuffer = i915_error_object_create(dev_priv, - request->batch_obj, - vm); + request->batch); if (HAS_BROKEN_CS_TLB(dev_priv)) ee->wa_batchbuffer = - i915_error_ggtt_object_create(dev_priv, - engine->scratch.obj); + i915_error_object_create(dev_priv, + engine->scratch); + + ee->ctx = + i915_error_object_create(dev_priv, + request->ctx->engine[i].state); - if (request->pid) { + pid = request->ctx->pid; + if (pid) { struct task_struct *task; rcu_read_lock(); - task = pid_task(request->pid, PIDTYPE_PID); + task = pid_task(pid, PIDTYPE_PID); if (task) { strcpy(ee->comm, task->comm); ee->pid = task->pid; @@ -1154,145 +1190,102 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv, ee->cpu_ring_head = ring->head; ee->cpu_ring_tail = ring->tail; ee->ringbuffer = - i915_error_ggtt_object_create(dev_priv, - ring->obj); - } - - ee->hws_page = - i915_error_ggtt_object_create(dev_priv, - engine->status_page.obj); + i915_error_object_create(dev_priv, ring->vma); - ee->wa_ctx = i915_error_ggtt_object_create(dev_priv, - engine->wa_ctx.obj); - - i915_gem_record_active_context(engine, error, ee); - - count = 0; - list_for_each_entry(request, &engine->request_list, link) - count++; - - ee->num_requests = count; - ee->requests = - kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC); - if (!ee->requests) { - ee->num_requests = 0; - continue; + engine_record_requests(engine, request, ee); } - count = 0; - list_for_each_entry(request, &engine->request_list, link) { - struct drm_i915_error_request *erq; - - if (count >= ee->num_requests) { - /* - * If the ring request list was changed in - * between the point where the error request - * list was created and dimensioned and this - * point then just exit early to avoid crashes. - * - * We don't need to communicate that the - * request list changed state during error - * state capture and that the error state is - * slightly incorrect as a consequence since we - * are typically only interested in the request - * list state at the point of error state - * capture, not in any changes happening during - * the capture. - */ - break; - } + ee->hws_page = + i915_error_object_create(dev_priv, + engine->status_page.vma); - erq = &ee->requests[count++]; - erq->seqno = request->fence.seqno; - erq->jiffies = request->emitted_jiffies; - erq->tail = request->postfix; - } + ee->wa_ctx = + i915_error_object_create(dev_priv, engine->wa_ctx.vma); } } -/* FIXME: Since pin count/bound list is global, we duplicate what we capture per - * VM. - */ static void i915_gem_capture_vm(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error, struct i915_address_space *vm, - const int ndx) + int idx) { - struct drm_i915_error_buffer *active_bo = NULL, *pinned_bo = NULL; - struct drm_i915_gem_object *obj; + struct drm_i915_error_buffer *active_bo; struct i915_vma *vma; - int i; + int count; - i = 0; + count = 0; list_for_each_entry(vma, &vm->active_list, vm_link) - i++; - error->active_bo_count[ndx] = i; - - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - list_for_each_entry(vma, &obj->vma_list, obj_link) - if (vma->vm == vm && i915_vma_is_pinned(vma)) - i++; - } - error->pinned_bo_count[ndx] = i - error->active_bo_count[ndx]; - - if (i) { - active_bo = kcalloc(i, sizeof(*active_bo), GFP_ATOMIC); - if (active_bo) - pinned_bo = active_bo + error->active_bo_count[ndx]; - } + count++; + active_bo = NULL; + if (count) + active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); if (active_bo) - error->active_bo_count[ndx] = - capture_active_bo(active_bo, - error->active_bo_count[ndx], - &vm->active_list); - - if (pinned_bo) - error->pinned_bo_count[ndx] = - capture_pinned_bo(pinned_bo, - error->pinned_bo_count[ndx], - &dev_priv->mm.bound_list, vm); - error->active_bo[ndx] = active_bo; - error->pinned_bo[ndx] = pinned_bo; + count = capture_error_bo(active_bo, count, &vm->active_list, false); + else + count = 0; + + error->active_vm[idx] = vm; + error->active_bo[idx] = active_bo; + error->active_bo_count[idx] = count; } -static void i915_gem_capture_buffers(struct drm_i915_private *dev_priv, - struct drm_i915_error_state *error) +static void i915_capture_active_buffers(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) { - struct i915_address_space *vm; - int cnt = 0, i = 0; - - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - cnt++; - - error->active_bo = kcalloc(cnt, sizeof(*error->active_bo), GFP_ATOMIC); - error->pinned_bo = kcalloc(cnt, sizeof(*error->pinned_bo), GFP_ATOMIC); - error->active_bo_count = kcalloc(cnt, sizeof(*error->active_bo_count), - GFP_ATOMIC); - error->pinned_bo_count = kcalloc(cnt, sizeof(*error->pinned_bo_count), - GFP_ATOMIC); - - if (error->active_bo == NULL || - error->pinned_bo == NULL || - error->active_bo_count == NULL || - error->pinned_bo_count == NULL) { - kfree(error->active_bo); - kfree(error->active_bo_count); - kfree(error->pinned_bo); - kfree(error->pinned_bo_count); - - error->active_bo = NULL; - error->active_bo_count = NULL; - error->pinned_bo = NULL; - error->pinned_bo_count = NULL; - } else { - list_for_each_entry(vm, &dev_priv->vm_list, global_link) - i915_gem_capture_vm(dev_priv, error, vm, i++); + int cnt = 0, i, j; + + BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo)); + BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm)); + BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count)); + + /* Scan each engine looking for unique active contexts/vm */ + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + struct drm_i915_error_engine *ee = &error->engine[i]; + bool found; + + if (!ee->vm) + continue; - error->vm_count = cnt; + found = false; + for (j = 0; j < i && !found; j++) + found = error->engine[j].vm == ee->vm; + if (!found) + i915_gem_capture_vm(dev_priv, error, ee->vm, cnt++); } } +static void i915_capture_pinned_buffers(struct drm_i915_private *dev_priv, + struct drm_i915_error_state *error) +{ + struct i915_address_space *vm = &dev_priv->ggtt.base; + struct drm_i915_error_buffer *bo; + struct i915_vma *vma; + int count_inactive, count_active; + + count_inactive = 0; + list_for_each_entry(vma, &vm->active_list, vm_link) + count_inactive++; + + count_active = 0; + list_for_each_entry(vma, &vm->inactive_list, vm_link) + count_active++; + + bo = NULL; + if (count_inactive + count_active) + bo = kcalloc(count_inactive + count_active, + sizeof(*bo), GFP_ATOMIC); + if (!bo) + return; + + count_inactive = capture_error_bo(bo, count_inactive, + &vm->active_list, true); + count_active = capture_error_bo(bo + count_inactive, count_active, + &vm->inactive_list, true); + error->pinned_bo_count = count_inactive + count_active; + error->pinned_bo = bo; +} + /* Capture all registers which don't fit into another category. */ static void i915_capture_reg_state(struct drm_i915_private *dev_priv, struct drm_i915_error_state *error) @@ -1403,6 +1396,10 @@ static void i915_capture_gen_state(struct drm_i915_private *dev_priv, #endif error->reset_count = i915_reset_count(&dev_priv->gpu_error); error->suspend_count = dev_priv->suspend_count; + + memcpy(&error->device_info, + INTEL_INFO(dev_priv), + sizeof(error->device_info)); } /** @@ -1436,9 +1433,10 @@ void i915_capture_error_state(struct drm_i915_private *dev_priv, i915_capture_gen_state(dev_priv, error); i915_capture_reg_state(dev_priv, error); - i915_gem_capture_buffers(dev_priv, error); i915_gem_record_fences(dev_priv, error); i915_gem_record_rings(dev_priv, error); + i915_capture_active_buffers(dev_priv, error); + i915_capture_pinned_buffers(dev_priv, error); do_gettimeofday(&error->time); diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 03a5cef..e436941 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -183,7 +183,7 @@ static int guc_update_doorbell_id(struct intel_guc *guc, struct i915_guc_client *client, u16 new_id) { - struct sg_table *sg = guc->ctx_pool_obj->pages; + struct sg_table *sg = guc->ctx_pool_vma->pages; void *doorbell_bitmap = guc->doorbell_bitmap; struct guc_doorbell_info *doorbell; struct guc_context_desc desc; @@ -325,7 +325,6 @@ static void guc_init_proc_desc(struct intel_guc *guc, static void guc_init_ctx_desc(struct intel_guc *guc, struct i915_guc_client *client) { - struct drm_i915_gem_object *client_obj = client->client_obj; struct drm_i915_private *dev_priv = guc_to_i915(guc); struct intel_engine_cs *engine; struct i915_gem_context *ctx = client->owner; @@ -340,10 +339,10 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.priority = client->priority; desc.db_id = client->doorbell_id; - for_each_engine(engine, dev_priv) { + for_each_engine_masked(engine, dev_priv, client->engines) { struct intel_context *ce = &ctx->engine[engine->id]; - struct guc_execlist_context *lrc = &desc.lrc[engine->guc_id]; - struct drm_i915_gem_object *obj; + uint32_t guc_engine_id = engine->guc_id; + struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id]; /* TODO: We have a design issue to be solved here. Only when we * receive the first batch, we know which engine is used by the @@ -358,30 +357,29 @@ static void guc_init_ctx_desc(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - gfx_addr = i915_gem_obj_ggtt_offset(ce->state); - lrc->ring_lcra = gfx_addr + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lcra = + i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) | - (engine->guc_id << GUC_ELC_ENGINE_OFFSET); - - obj = ce->ring->obj; - gfx_addr = i915_gem_obj_ggtt_offset(obj); + (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - lrc->ring_begin = gfx_addr; - lrc->ring_end = gfx_addr + obj->base.size - 1; - lrc->ring_next_free_location = gfx_addr; + lrc->ring_begin = i915_ggtt_offset(ce->ring->vma); + lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; + lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; - desc.engines_used |= (1 << engine->guc_id); + desc.engines_used |= (1 << guc_engine_id); } + DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n", + client->engines, desc.engines_used); WARN_ON(desc.engines_used == 0); /* * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ - gfx_addr = i915_gem_obj_ggtt_offset(client_obj); - desc.db_trigger_phy = sg_dma_address(client_obj->pages->sgl) + + gfx_addr = i915_ggtt_offset(client->vma); + desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; desc.db_trigger_cpu = (uintptr_t)client->client_base + client->doorbell_offset; @@ -397,7 +395,7 @@ static void guc_init_ctx_desc(struct intel_guc *guc, desc.desc_private = (uintptr_t)client; /* Pool context is pinned already */ - sg = guc->ctx_pool_obj->pages; + sg = guc->ctx_pool_vma->pages; sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), sizeof(desc) * client->ctx_index); } @@ -410,7 +408,7 @@ static void guc_fini_ctx_desc(struct intel_guc *guc, memset(&desc, 0, sizeof(desc)); - sg = guc->ctx_pool_obj->pages; + sg = guc->ctx_pool_vma->pages; sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc), sizeof(desc) * client->ctx_index); } @@ -457,6 +455,7 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* wqi_len is in DWords, and does not include the one-word header */ const size_t wqi_size = sizeof(struct guc_wq_item); const u32 wqi_len = wqi_size/sizeof(u32) - 1; + struct intel_engine_cs *engine = rq->engine; struct guc_process_desc *desc; struct guc_wq_item *wqi; void *base; @@ -492,18 +491,17 @@ static void guc_add_workqueue_item(struct i915_guc_client *gc, /* WQ starts from the page after doorbell / process_desc */ wq_page = (wq_off + GUC_DB_SIZE) >> PAGE_SHIFT; wq_off &= PAGE_SIZE - 1; - base = kmap_atomic(i915_gem_object_get_page(gc->client_obj, wq_page)); + base = kmap_atomic(i915_gem_object_get_page(gc->vma->obj, wq_page)); wqi = (struct guc_wq_item *)((char *)base + wq_off); /* Now fill in the 4-word work queue item */ wqi->header = WQ_TYPE_INORDER | (wqi_len << WQ_LEN_SHIFT) | - (rq->engine->guc_id << WQ_TARGET_SHIFT) | + (engine->guc_id << WQ_TARGET_SHIFT) | WQ_NO_WCFLUSH_WAIT; /* The GuC wants only the low-order word of the context descriptor */ - wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, - rq->engine); + wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine); wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT; wqi->fence_id = rq->fence.seqno; @@ -611,55 +609,48 @@ static void i915_guc_submit(struct drm_i915_gem_request *rq) */ /** - * gem_allocate_guc_obj() - Allocate gem object for GuC usage - * @dev_priv: driver private data structure - * @size: size of object + * guc_allocate_vma() - Allocate a GGTT VMA for GuC usage + * @guc: the guc + * @size: size of area to allocate (both virtual space and memory) * - * This is a wrapper to create a gem obj. In order to use it inside GuC, the - * object needs to be pinned lifetime. Also we must pin it to gtt space other - * than [0, GUC_WOPCM_TOP) because this range is reserved inside GuC. + * This is a wrapper to create an object for use with the GuC. In order to + * use it inside the GuC, an object needs to be pinned lifetime, so we allocate + * both some backing storage and a range inside the Global GTT. We must pin + * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * range is reserved inside GuC. * - * Return: A drm_i915_gem_object if successful, otherwise NULL. + * Return: A i915_vma if successful, otherwise an ERR_PTR. */ -static struct drm_i915_gem_object * -gem_allocate_guc_obj(struct drm_i915_private *dev_priv, u32 size) +static struct i915_vma *guc_allocate_vma(struct intel_guc *guc, u32 size) { + struct drm_i915_private *dev_priv = guc_to_i915(guc); struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) - return NULL; + return ERR_CAST(obj); - if (i915_gem_object_get_pages(obj)) { - i915_gem_object_put(obj); - return NULL; - } + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; - if (i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP)) { - i915_gem_object_put(obj); - return NULL; + ret = i915_vma_pin(vma, 0, PAGE_SIZE, + PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + if (ret) { + vma = ERR_PTR(ret); + goto err; } /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); - return obj; -} - -/** - * gem_release_guc_obj() - Release gem object allocated for GuC usage - * @obj: gem obj to be released - */ -static void gem_release_guc_obj(struct drm_i915_gem_object *obj) -{ - if (!obj) - return; - - if (i915_gem_obj_is_pinned(obj)) - i915_gem_object_ggtt_unpin(obj); + return vma; +err: i915_gem_object_put(obj); + return vma; } static void @@ -686,7 +677,7 @@ guc_client_free(struct drm_i915_private *dev_priv, kunmap(kmap_to_page(client->client_base)); } - gem_release_guc_obj(client->client_obj); + i915_vma_unpin_and_release(&client->vma); if (client->ctx_index != GUC_INVALID_CTX_ID) { guc_fini_ctx_desc(guc, client); @@ -696,29 +687,47 @@ guc_client_free(struct drm_i915_private *dev_priv, kfree(client); } +/* Check that a doorbell register is in the expected state */ +static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + i915_reg_t drbreg = GEN8_DRBREGL(db_id); + uint32_t value = I915_READ(drbreg); + bool enabled = (value & GUC_DOORBELL_ENABLED) != 0; + bool expected = test_bit(db_id, guc->doorbell_bitmap); + + if (enabled == expected) + return true; + + DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n", + db_id, drbreg.reg, value, + expected ? "active" : "inactive"); + + return false; +} + /* - * Borrow the first client to set up & tear down every doorbell + * Borrow the first client to set up & tear down each unused doorbell * in turn, to ensure that all doorbell h/w is (re)initialised. */ static void guc_init_doorbell_hw(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); struct i915_guc_client *client = guc->execbuf_client; - uint16_t db_id, i; - int err; + uint16_t db_id; + int i, err; + /* Save client's original doorbell selection */ db_id = client->doorbell_id; for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); + /* Skip if doorbell is OK */ + if (guc_doorbell_check(guc, i)) + continue; err = guc_update_doorbell_id(guc, client, i); - - /* Report update failure or unexpectedly active doorbell */ - if (err || (i != db_id && (value & GUC_DOORBELL_ENABLED))) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) was 0x%x, err %d\n", - i, drbreg.reg, value, err); + if (err) + DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n", + i, err); } /* Restore to original value */ @@ -727,20 +736,15 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) DRM_ERROR("Failed to restore doorbell to %d, err %d\n", db_id, err); - for (i = 0; i < GUC_MAX_DOORBELLS; ++i) { - i915_reg_t drbreg = GEN8_DRBREGL(i); - u32 value = I915_READ(drbreg); - - if (i != db_id && (value & GUC_DOORBELL_ENABLED)) - DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) finally 0x%x\n", - i, drbreg.reg, value); - - } + /* Read back & verify all doorbell registers */ + for (i = 0; i < GUC_MAX_DOORBELLS; ++i) + (void)guc_doorbell_check(guc, i); } /** * guc_client_alloc() - Allocate an i915_guc_client * @dev_priv: driver private data structure + * @engines: The set of engines to enable for this client * @priority: four levels priority _CRITICAL, _HIGH, _NORMAL and _LOW * The kernel client to replace ExecList submission is created with * NORMAL priority. Priority of a client for scheduler can be HIGH, @@ -752,22 +756,24 @@ static void guc_init_doorbell_hw(struct intel_guc *guc) */ static struct i915_guc_client * guc_client_alloc(struct drm_i915_private *dev_priv, + uint32_t engines, uint32_t priority, struct i915_gem_context *ctx) { struct i915_guc_client *client; struct intel_guc *guc = &dev_priv->guc; - struct drm_i915_gem_object *obj; + struct i915_vma *vma; uint16_t db_id; client = kzalloc(sizeof(*client), GFP_KERNEL); if (!client) return NULL; - client->doorbell_id = GUC_INVALID_DOORBELL_ID; - client->priority = priority; client->owner = ctx; client->guc = guc; + client->engines = engines; + client->priority = priority; + client->doorbell_id = GUC_INVALID_DOORBELL_ID; client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0, GUC_MAX_GPU_CONTEXTS, GFP_KERNEL); @@ -777,13 +783,13 @@ guc_client_alloc(struct drm_i915_private *dev_priv, } /* The first page is doorbell/proc_desc. Two followed pages are wq. */ - obj = gem_allocate_guc_obj(dev_priv, GUC_DB_SIZE + GUC_WQ_SIZE); - if (!obj) + vma = guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE); + if (IS_ERR(vma)) goto err; /* We'll keep just the first (doorbell/proc) page permanently kmap'd. */ - client->client_obj = obj; - client->client_base = kmap(i915_gem_object_get_page(obj, 0)); + client->vma = vma; + client->client_base = kmap(i915_vma_first_page(vma)); client->wq_offset = GUC_DB_SIZE; client->wq_size = GUC_WQ_SIZE; @@ -809,8 +815,8 @@ guc_client_alloc(struct drm_i915_private *dev_priv, if (guc_init_doorbell(guc, client, db_id)) goto err; - DRM_DEBUG_DRIVER("new priority %u client %p: ctx_index %u\n", - priority, client, client->ctx_index); + DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n", + priority, client, client->engines, client->ctx_index); DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n", client->doorbell_id, client->doorbell_offset); @@ -825,8 +831,7 @@ err: static void guc_create_log(struct intel_guc *guc) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; + struct i915_vma *vma; unsigned long offset; uint32_t size, flags; @@ -842,16 +847,16 @@ static void guc_create_log(struct intel_guc *guc) GUC_LOG_ISR_PAGES + 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT; - obj = guc->log_obj; - if (!obj) { - obj = gem_allocate_guc_obj(dev_priv, size); - if (!obj) { + vma = guc->log_vma; + if (!vma) { + vma = guc_allocate_vma(guc, size); + if (IS_ERR(vma)) { /* logging will be off */ i915.guc_log_level = -1; return; } - guc->log_obj = obj; + guc->log_vma = vma; } /* each allocated unit is a page */ @@ -860,7 +865,7 @@ static void guc_create_log(struct intel_guc *guc) (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - offset = i915_gem_obj_ggtt_offset(obj) >> PAGE_SHIFT; /* in pages */ + offset = i915_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ guc->log_flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; } @@ -889,7 +894,7 @@ static void init_guc_policies(struct guc_policies *policies) static void guc_create_ads(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct drm_i915_gem_object *obj; + struct i915_vma *vma; struct guc_ads *ads; struct guc_policies *policies; struct guc_mmio_reg_state *reg_state; @@ -902,16 +907,16 @@ static void guc_create_ads(struct intel_guc *guc) sizeof(struct guc_mmio_reg_state) + GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE; - obj = guc->ads_obj; - if (!obj) { - obj = gem_allocate_guc_obj(dev_priv, PAGE_ALIGN(size)); - if (!obj) + vma = guc->ads_vma; + if (!vma) { + vma = guc_allocate_vma(guc, PAGE_ALIGN(size)); + if (IS_ERR(vma)) return; - guc->ads_obj = obj; + guc->ads_vma = vma; } - page = i915_gem_object_get_page(obj, 0); + page = i915_vma_first_page(vma); ads = kmap(page); /* @@ -922,7 +927,7 @@ static void guc_create_ads(struct intel_guc *guc) * to find it. */ engine = &dev_priv->engine[RCS]; - ads->golden_context_lrca = engine->status_page.gfx_addr; + ads->golden_context_lrca = engine->status_page.ggtt_offset; for_each_engine(engine, dev_priv) ads->eng_state_size[engine->guc_id] = intel_lr_context_size(engine); @@ -931,8 +936,8 @@ static void guc_create_ads(struct intel_guc *guc) policies = (void *)ads + sizeof(struct guc_ads); init_guc_policies(policies); - ads->scheduler_policies = i915_gem_obj_ggtt_offset(obj) + - sizeof(struct guc_ads); + ads->scheduler_policies = + i915_ggtt_offset(vma) + sizeof(struct guc_ads); /* MMIO reg state */ reg_state = (void *)policies + sizeof(struct guc_policies); @@ -960,10 +965,9 @@ static void guc_create_ads(struct intel_guc *guc) */ int i915_guc_submission_init(struct drm_i915_private *dev_priv) { - const size_t ctxsize = sizeof(struct guc_context_desc); - const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize; - const size_t gemsize = round_up(poolsize, PAGE_SIZE); struct intel_guc *guc = &dev_priv->guc; + struct i915_vma *vma; + u32 size; /* Wipe bitmap & delete client in case of reinitialisation */ bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS); @@ -972,13 +976,15 @@ int i915_guc_submission_init(struct drm_i915_private *dev_priv) if (!i915.enable_guc_submission) return 0; /* not enabled */ - if (guc->ctx_pool_obj) + if (guc->ctx_pool_vma) return 0; /* already allocated */ - guc->ctx_pool_obj = gem_allocate_guc_obj(dev_priv, gemsize); - if (!guc->ctx_pool_obj) - return -ENOMEM; + size = PAGE_ALIGN(GUC_MAX_GPU_CONTEXTS*sizeof(struct guc_context_desc)); + vma = guc_allocate_vma(guc, size); + if (IS_ERR(vma)) + return PTR_ERR(vma); + guc->ctx_pool_vma = vma; ida_init(&guc->ctx_ids); guc_create_log(guc); guc_create_ads(guc); @@ -994,6 +1000,7 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv) /* client for execbuf submission */ client = guc_client_alloc(dev_priv, + INTEL_INFO(dev_priv)->ring_mask, GUC_CTX_PRIORITY_KMD_NORMAL, dev_priv->kernel_context); if (!client) { @@ -1030,16 +1037,12 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; - gem_release_guc_obj(dev_priv->guc.ads_obj); - guc->ads_obj = NULL; - - gem_release_guc_obj(dev_priv->guc.log_obj); - guc->log_obj = NULL; + i915_vma_unpin_and_release(&guc->ads_vma); + i915_vma_unpin_and_release(&guc->log_vma); - if (guc->ctx_pool_obj) + if (guc->ctx_pool_vma) ida_destroy(&guc->ctx_ids); - gem_release_guc_obj(guc->ctx_pool_obj); - guc->ctx_pool_obj = NULL; + i915_vma_unpin_and_release(&guc->ctx_pool_vma); } /** @@ -1062,7 +1065,7 @@ int intel_guc_suspend(struct drm_device *dev) /* any value greater than GUC_POWER_D0 */ data[1] = GUC_POWER_D1; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } @@ -1087,7 +1090,7 @@ int intel_guc_resume(struct drm_device *dev) data[0] = HOST2GUC_ACTION_EXIT_S_STATE; data[1] = GUC_POWER_D0; /* first page is shared data with GuC */ - data[2] = i915_gem_obj_ggtt_offset(ctx->engine[RCS].state); + data[2] = i915_ggtt_offset(ctx->engine[RCS].state); return host2guc_action(guc, data, ARRAY_SIZE(data)); } diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 591f452..ebb83d5 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -972,10 +972,8 @@ static void ironlake_rps_change_irq_handler(struct drm_i915_private *dev_priv) static void notify_ring(struct intel_engine_cs *engine) { smp_store_mb(engine->breadcrumbs.irq_posted, true); - if (intel_engine_wakeup(engine)) { + if (intel_engine_wakeup(engine)) trace_i915_gem_request_notify(engine); - engine->breadcrumbs.irq_wakeups++; - } } static void vlv_c0_read(struct drm_i915_private *dev_priv, @@ -3044,22 +3042,6 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) return HANGCHECK_HUNG; } -static unsigned long kick_waiters(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups); - - if (engine->hangcheck.user_interrupts == irq_count && - !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { - if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings)) - DRM_ERROR("Hangcheck timer elapsed... %s idle\n", - engine->name); - - intel_engine_enable_fake_irq(engine); - } - - return irq_count; -} /* * This is called when the chip hasn't reported back with completed * batchbuffers in a long time. We keep track per ring seqno progress and @@ -3097,7 +3079,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) bool busy = intel_engine_has_waiter(engine); u64 acthd; u32 seqno; - unsigned user_interrupts; semaphore_clear_deadlocks(dev_priv); @@ -3114,15 +3095,11 @@ static void i915_hangcheck_elapsed(struct work_struct *work) acthd = intel_engine_get_active_head(engine); seqno = intel_engine_get_seqno(engine); - /* Reset stuck interrupts between batch advances */ - user_interrupts = 0; - if (engine->hangcheck.seqno == seqno) { if (!intel_engine_is_active(engine)) { engine->hangcheck.action = HANGCHECK_IDLE; if (busy) { /* Safeguard against driver failure */ - user_interrupts = kick_waiters(engine); engine->hangcheck.score += BUSY; } } else { @@ -3185,7 +3162,6 @@ static void i915_hangcheck_elapsed(struct work_struct *work) engine->hangcheck.seqno = seqno; engine->hangcheck.acthd = acthd; - engine->hangcheck.user_interrupts = user_interrupts; busy_count += busy; } diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c new file mode 100644 index 0000000..49a0794 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_memcpy.c @@ -0,0 +1,101 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/kernel.h> +#include <asm/fpu/api.h> + +#include "i915_drv.h" + +static DEFINE_STATIC_KEY_FALSE(has_movntdqa); + +#ifdef CONFIG_AS_MOVNTDQA +static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) +{ + kernel_fpu_begin(); + + len >>= 4; + while (len >= 4) { + asm("movntdqa (%0), %%xmm0\n" + "movntdqa 16(%0), %%xmm1\n" + "movntdqa 32(%0), %%xmm2\n" + "movntdqa 48(%0), %%xmm3\n" + "movaps %%xmm0, (%1)\n" + "movaps %%xmm1, 16(%1)\n" + "movaps %%xmm2, 32(%1)\n" + "movaps %%xmm3, 48(%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 64; + dst += 64; + len -= 4; + } + while (len--) { + asm("movntdqa (%0), %%xmm0\n" + "movaps %%xmm0, (%1)\n" + :: "r" (src), "r" (dst) : "memory"); + src += 16; + dst += 16; + } + + kernel_fpu_end(); +} +#endif + +/** + * i915_memcpy_from_wc: perform an accelerated *aligned* read from WC + * @dst: destination pointer + * @src: source pointer + * @len: how many bytes to copy + * + * i915_memcpy_from_wc copies @len bytes from @src to @dst using + * non-temporal instructions where available. Note that all arguments + * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple + * of 16. + * + * To test whether accelerated reads from WC are supported, use + * i915_memcpy_from_wc(NULL, NULL, 0); + * + * Returns true if the copy was successful, false if the preconditions + * are not met. + */ +bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len) +{ + if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) + return false; + +#ifdef CONFIG_AS_MOVNTDQA + if (static_branch_likely(&has_movntdqa)) { + if (likely(len)) + __memcpy_ntdqa(dst, src, len); + return true; + } +#endif + + return false; +} + +void i915_memcpy_init_early(struct drm_i915_private *dev_priv) +{ + if (static_cpu_has(X86_FEATURE_XMM4_1)) + static_branch_enable(&has_movntdqa); +} diff --git a/drivers/gpu/drm/i915/i915_mm.c b/drivers/gpu/drm/i915/i915_mm.c new file mode 100644 index 0000000..e4935dd --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mm.c @@ -0,0 +1,84 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/mm.h> +#include <linux/io-mapping.h> + +#include <asm/pgtable.h> + +#include "i915_drv.h" + +struct remap_pfn { + struct mm_struct *mm; + unsigned long pfn; + pgprot_t prot; +}; + +static int remap_pfn(pte_t *pte, pgtable_t token, + unsigned long addr, void *data) +{ + struct remap_pfn *r = data; + + /* Special PTE are not associated with any struct page */ + set_pte_at(r->mm, addr, pte, pte_mkspecial(pfn_pte(r->pfn, r->prot))); + r->pfn++; + + return 0; +} + +/** + * remap_io_mapping - remap an IO mapping to userspace + * @vma: user vma to map to + * @addr: target user address to start at + * @pfn: physical address of kernel memory + * @size: size of map area + * @iomap: the source io_mapping + * + * Note: this is only safe if the mm semaphore is held when called. + */ +int remap_io_mapping(struct vm_area_struct *vma, + unsigned long addr, unsigned long pfn, unsigned long size, + struct io_mapping *iomap) +{ + struct remap_pfn r; + int err; + + GEM_BUG_ON((vma->vm_flags & + (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)) != + (VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP)); + + /* We rely on prevalidation of the io-mapping to skip track_pfn(). */ + r.mm = vma->vm_mm; + r.pfn = pfn; + r.prot = __pgprot((pgprot_val(iomap->prot) & _PAGE_CACHE_MASK) | + (pgprot_val(vma->vm_page_prot) & ~_PAGE_CACHE_MASK)); + + err = apply_to_page_range(r.mm, addr, size, remap_pfn, &r); + if (unlikely(err)) { + zap_vma_ptes(vma, addr, (r.pfn - pfn) << PAGE_SHIFT); + return err; + } + + return 0; +} diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index b6e404c..768ad89 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -45,6 +45,7 @@ struct i915_params i915 __read_mostly = { .fastboot = 0, .prefault_disable = 0, .load_detect_test = 0, + .force_reset_modeset_test = 0, .reset = true, .invert_brightness = 0, .disable_display = 0, @@ -161,6 +162,11 @@ MODULE_PARM_DESC(load_detect_test, "Force-enable the VGA load detect code for testing (default:false). " "For developers only."); +module_param_named_unsafe(force_reset_modeset_test, i915.force_reset_modeset_test, bool, 0600); +MODULE_PARM_DESC(force_reset_modeset_test, + "Force a modeset during gpu reset for testing (default:false). " + "For developers only."); + module_param_named_unsafe(invert_brightness, i915.invert_brightness, int, 0600); MODULE_PARM_DESC(invert_brightness, "Invert backlight brightness " diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 0ad020b..3a0dd78 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -57,6 +57,7 @@ struct i915_params { bool fastboot; bool prefault_disable; bool load_detect_test; + bool force_reset_modeset_test; bool reset; bool disable_display; bool verbose_state_checks; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index f38a5e2..d4adf28 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -3660,8 +3660,17 @@ enum { #define VIDEO_DIP_ENABLE_SPD_HSW (1 << 0) /* Panel power sequencing */ -#define PP_STATUS _MMIO(0x61200) -#define PP_ON (1 << 31) +#define PPS_BASE 0x61200 +#define VLV_PPS_BASE (VLV_DISPLAY_BASE + PPS_BASE) +#define PCH_PPS_BASE 0xC7200 + +#define _MMIO_PPS(pps_idx, reg) _MMIO(dev_priv->pps_mmio_base - \ + PPS_BASE + (reg) + \ + (pps_idx) * 0x100) + +#define _PP_STATUS 0x61200 +#define PP_STATUS(pps_idx) _MMIO_PPS(pps_idx, _PP_STATUS) +#define PP_ON (1 << 31) /* * Indicates that all dependencies of the panel are on: * @@ -3669,14 +3678,14 @@ enum { * - pipe enabled * - LVDS/DVOB/DVOC on */ -#define PP_READY (1 << 30) -#define PP_SEQUENCE_NONE (0 << 28) -#define PP_SEQUENCE_POWER_UP (1 << 28) -#define PP_SEQUENCE_POWER_DOWN (2 << 28) -#define PP_SEQUENCE_MASK (3 << 28) -#define PP_SEQUENCE_SHIFT 28 -#define PP_CYCLE_DELAY_ACTIVE (1 << 27) -#define PP_SEQUENCE_STATE_MASK 0x0000000f +#define PP_READY (1 << 30) +#define PP_SEQUENCE_NONE (0 << 28) +#define PP_SEQUENCE_POWER_UP (1 << 28) +#define PP_SEQUENCE_POWER_DOWN (2 << 28) +#define PP_SEQUENCE_MASK (3 << 28) +#define PP_SEQUENCE_SHIFT 28 +#define PP_CYCLE_DELAY_ACTIVE (1 << 27) +#define PP_SEQUENCE_STATE_MASK 0x0000000f #define PP_SEQUENCE_STATE_OFF_IDLE (0x0 << 0) #define PP_SEQUENCE_STATE_OFF_S0_1 (0x1 << 0) #define PP_SEQUENCE_STATE_OFF_S0_2 (0x2 << 0) @@ -3686,11 +3695,46 @@ enum { #define PP_SEQUENCE_STATE_ON_S1_2 (0xa << 0) #define PP_SEQUENCE_STATE_ON_S1_3 (0xb << 0) #define PP_SEQUENCE_STATE_RESET (0xf << 0) -#define PP_CONTROL _MMIO(0x61204) -#define POWER_TARGET_ON (1 << 0) -#define PP_ON_DELAYS _MMIO(0x61208) -#define PP_OFF_DELAYS _MMIO(0x6120c) -#define PP_DIVISOR _MMIO(0x61210) + +#define _PP_CONTROL 0x61204 +#define PP_CONTROL(pps_idx) _MMIO_PPS(pps_idx, _PP_CONTROL) +#define PANEL_UNLOCK_REGS (0xabcd << 16) +#define PANEL_UNLOCK_MASK (0xffff << 16) +#define BXT_POWER_CYCLE_DELAY_MASK 0x1f0 +#define BXT_POWER_CYCLE_DELAY_SHIFT 4 +#define EDP_FORCE_VDD (1 << 3) +#define EDP_BLC_ENABLE (1 << 2) +#define PANEL_POWER_RESET (1 << 1) +#define PANEL_POWER_OFF (0 << 0) +#define PANEL_POWER_ON (1 << 0) + +#define _PP_ON_DELAYS 0x61208 +#define PP_ON_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_ON_DELAYS) +#define PANEL_PORT_SELECT_SHIFT 30 +#define PANEL_PORT_SELECT_MASK (3 << 30) +#define PANEL_PORT_SELECT_LVDS (0 << 30) +#define PANEL_PORT_SELECT_DPA (1 << 30) +#define PANEL_PORT_SELECT_DPC (2 << 30) +#define PANEL_PORT_SELECT_DPD (3 << 30) +#define PANEL_PORT_SELECT_VLV(port) ((port) << 30) +#define PANEL_POWER_UP_DELAY_MASK 0x1fff0000 +#define PANEL_POWER_UP_DELAY_SHIFT 16 +#define PANEL_LIGHT_ON_DELAY_MASK 0x1fff +#define PANEL_LIGHT_ON_DELAY_SHIFT 0 + +#define _PP_OFF_DELAYS 0x6120C +#define PP_OFF_DELAYS(pps_idx) _MMIO_PPS(pps_idx, _PP_OFF_DELAYS) +#define PANEL_POWER_DOWN_DELAY_MASK 0x1fff0000 +#define PANEL_POWER_DOWN_DELAY_SHIFT 16 +#define PANEL_LIGHT_OFF_DELAY_MASK 0x1fff +#define PANEL_LIGHT_OFF_DELAY_SHIFT 0 + +#define _PP_DIVISOR 0x61210 +#define PP_DIVISOR(pps_idx) _MMIO_PPS(pps_idx, _PP_DIVISOR) +#define PP_REFERENCE_DIVIDER_MASK 0xffffff00 +#define PP_REFERENCE_DIVIDER_SHIFT 8 +#define PANEL_POWER_CYCLE_DELAY_MASK 0x1f +#define PANEL_POWER_CYCLE_DELAY_SHIFT 0 /* Panel fitting */ #define PFIT_CONTROL _MMIO(dev_priv->info.display_mmio_offset + 0x61230) @@ -6750,77 +6794,6 @@ enum { #define PCH_LVDS _MMIO(0xe1180) #define LVDS_DETECTED (1 << 1) -/* vlv has 2 sets of panel control regs. */ -#define _PIPEA_PP_STATUS (VLV_DISPLAY_BASE + 0x61200) -#define _PIPEA_PP_CONTROL (VLV_DISPLAY_BASE + 0x61204) -#define _PIPEA_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61208) -#define PANEL_PORT_SELECT_VLV(port) ((port) << 30) -#define _PIPEA_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6120c) -#define _PIPEA_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61210) - -#define _PIPEB_PP_STATUS (VLV_DISPLAY_BASE + 0x61300) -#define _PIPEB_PP_CONTROL (VLV_DISPLAY_BASE + 0x61304) -#define _PIPEB_PP_ON_DELAYS (VLV_DISPLAY_BASE + 0x61308) -#define _PIPEB_PP_OFF_DELAYS (VLV_DISPLAY_BASE + 0x6130c) -#define _PIPEB_PP_DIVISOR (VLV_DISPLAY_BASE + 0x61310) - -#define VLV_PIPE_PP_STATUS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_STATUS, _PIPEB_PP_STATUS) -#define VLV_PIPE_PP_CONTROL(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_CONTROL, _PIPEB_PP_CONTROL) -#define VLV_PIPE_PP_ON_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_ON_DELAYS, _PIPEB_PP_ON_DELAYS) -#define VLV_PIPE_PP_OFF_DELAYS(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_OFF_DELAYS, _PIPEB_PP_OFF_DELAYS) -#define VLV_PIPE_PP_DIVISOR(pipe) _MMIO_PIPE(pipe, _PIPEA_PP_DIVISOR, _PIPEB_PP_DIVISOR) - -#define _PCH_PP_STATUS 0xc7200 -#define _PCH_PP_CONTROL 0xc7204 -#define PANEL_UNLOCK_REGS (0xabcd << 16) -#define PANEL_UNLOCK_MASK (0xffff << 16) -#define BXT_POWER_CYCLE_DELAY_MASK (0x1f0) -#define BXT_POWER_CYCLE_DELAY_SHIFT 4 -#define EDP_FORCE_VDD (1 << 3) -#define EDP_BLC_ENABLE (1 << 2) -#define PANEL_POWER_RESET (1 << 1) -#define PANEL_POWER_OFF (0 << 0) -#define PANEL_POWER_ON (1 << 0) -#define _PCH_PP_ON_DELAYS 0xc7208 -#define PANEL_PORT_SELECT_MASK (3 << 30) -#define PANEL_PORT_SELECT_LVDS (0 << 30) -#define PANEL_PORT_SELECT_DPA (1 << 30) -#define PANEL_PORT_SELECT_DPC (2 << 30) -#define PANEL_PORT_SELECT_DPD (3 << 30) -#define PANEL_POWER_UP_DELAY_MASK (0x1fff0000) -#define PANEL_POWER_UP_DELAY_SHIFT 16 -#define PANEL_LIGHT_ON_DELAY_MASK (0x1fff) -#define PANEL_LIGHT_ON_DELAY_SHIFT 0 - -#define _PCH_PP_OFF_DELAYS 0xc720c -#define PANEL_POWER_DOWN_DELAY_MASK (0x1fff0000) -#define PANEL_POWER_DOWN_DELAY_SHIFT 16 -#define PANEL_LIGHT_OFF_DELAY_MASK (0x1fff) -#define PANEL_LIGHT_OFF_DELAY_SHIFT 0 - -#define _PCH_PP_DIVISOR 0xc7210 -#define PP_REFERENCE_DIVIDER_MASK (0xffffff00) -#define PP_REFERENCE_DIVIDER_SHIFT 8 -#define PANEL_POWER_CYCLE_DELAY_MASK (0x1f) -#define PANEL_POWER_CYCLE_DELAY_SHIFT 0 - -#define PCH_PP_STATUS _MMIO(_PCH_PP_STATUS) -#define PCH_PP_CONTROL _MMIO(_PCH_PP_CONTROL) -#define PCH_PP_ON_DELAYS _MMIO(_PCH_PP_ON_DELAYS) -#define PCH_PP_OFF_DELAYS _MMIO(_PCH_PP_OFF_DELAYS) -#define PCH_PP_DIVISOR _MMIO(_PCH_PP_DIVISOR) - -/* BXT PPS changes - 2nd set of PPS registers */ -#define _BXT_PP_STATUS2 0xc7300 -#define _BXT_PP_CONTROL2 0xc7304 -#define _BXT_PP_ON_DELAYS2 0xc7308 -#define _BXT_PP_OFF_DELAYS2 0xc730c - -#define BXT_PP_STATUS(n) _MMIO_PIPE(n, _PCH_PP_STATUS, _BXT_PP_STATUS2) -#define BXT_PP_CONTROL(n) _MMIO_PIPE(n, _PCH_PP_CONTROL, _BXT_PP_CONTROL2) -#define BXT_PP_ON_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_ON_DELAYS, _BXT_PP_ON_DELAYS2) -#define BXT_PP_OFF_DELAYS(n) _MMIO_PIPE(n, _PCH_PP_OFF_DELAYS, _BXT_PP_OFF_DELAYS2) - #define _PCH_DP_B 0xe4100 #define PCH_DP_B _MMIO(_PCH_DP_B) #define _PCH_DPB_AUX_CH_CTL 0xe4110 @@ -7063,12 +7036,13 @@ enum { #define GEN6_RP_UP_THRESHOLD _MMIO(0xA02C) #define GEN6_RP_DOWN_THRESHOLD _MMIO(0xA030) #define GEN6_RP_CUR_UP_EI _MMIO(0xA050) -#define GEN6_CURICONT_MASK 0xffffff +#define GEN6_RP_EI_MASK 0xffffff +#define GEN6_CURICONT_MASK GEN6_RP_EI_MASK #define GEN6_RP_CUR_UP _MMIO(0xA054) -#define GEN6_CURBSYTAVG_MASK 0xffffff +#define GEN6_CURBSYTAVG_MASK GEN6_RP_EI_MASK #define GEN6_RP_PREV_UP _MMIO(0xA058) #define GEN6_RP_CUR_DOWN_EI _MMIO(0xA05C) -#define GEN6_CURIAVG_MASK 0xffffff +#define GEN6_CURIAVG_MASK GEN6_RP_EI_MASK #define GEN6_RP_CUR_DOWN _MMIO(0xA060) #define GEN6_RP_PREV_DOWN _MMIO(0xA064) #define GEN6_RP_UP_EI _MMIO(0xA068) diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c index 5cfe4c7..4f27277 100644 --- a/drivers/gpu/drm/i915/i915_suspend.c +++ b/drivers/gpu/drm/i915/i915_suspend.c @@ -37,25 +37,6 @@ static void i915_save_display(struct drm_device *dev) if (INTEL_INFO(dev)->gen <= 4) dev_priv->regfile.saveDSPARB = I915_READ(DSPARB); - /* LVDS state */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - dev_priv->regfile.saveLVDS = I915_READ(PCH_LVDS); - else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev)) - dev_priv->regfile.saveLVDS = I915_READ(LVDS); - - /* Panel power sequencer */ - if (HAS_PCH_SPLIT(dev)) { - dev_priv->regfile.savePP_CONTROL = I915_READ(PCH_PP_CONTROL); - dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PCH_PP_ON_DELAYS); - dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PCH_PP_OFF_DELAYS); - dev_priv->regfile.savePP_DIVISOR = I915_READ(PCH_PP_DIVISOR); - } else if (INTEL_INFO(dev)->gen <= 4) { - dev_priv->regfile.savePP_CONTROL = I915_READ(PP_CONTROL); - dev_priv->regfile.savePP_ON_DELAYS = I915_READ(PP_ON_DELAYS); - dev_priv->regfile.savePP_OFF_DELAYS = I915_READ(PP_OFF_DELAYS); - dev_priv->regfile.savePP_DIVISOR = I915_READ(PP_DIVISOR); - } - /* save FBC interval */ if (HAS_FBC(dev) && INTEL_INFO(dev)->gen <= 4 && !IS_G4X(dev)) dev_priv->regfile.saveFBC_CONTROL = I915_READ(FBC_CONTROL); @@ -64,33 +45,11 @@ static void i915_save_display(struct drm_device *dev) static void i915_restore_display(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - u32 mask = 0xffffffff; /* Display arbitration */ if (INTEL_INFO(dev)->gen <= 4) I915_WRITE(DSPARB, dev_priv->regfile.saveDSPARB); - mask = ~LVDS_PORT_EN; - - /* LVDS state */ - if (HAS_PCH_IBX(dev) || HAS_PCH_CPT(dev)) - I915_WRITE(PCH_LVDS, dev_priv->regfile.saveLVDS & mask); - else if (INTEL_INFO(dev)->gen <= 4 && IS_MOBILE(dev) && !IS_I830(dev)) - I915_WRITE(LVDS, dev_priv->regfile.saveLVDS & mask); - - /* Panel power sequencer */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS); - I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS); - I915_WRITE(PCH_PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR); - I915_WRITE(PCH_PP_CONTROL, dev_priv->regfile.savePP_CONTROL); - } else if (INTEL_INFO(dev)->gen <= 4) { - I915_WRITE(PP_ON_DELAYS, dev_priv->regfile.savePP_ON_DELAYS); - I915_WRITE(PP_OFF_DELAYS, dev_priv->regfile.savePP_OFF_DELAYS); - I915_WRITE(PP_DIVISOR, dev_priv->regfile.savePP_DIVISOR); - I915_WRITE(PP_CONTROL, dev_priv->regfile.savePP_CONTROL); - } - /* only restore FBC info on the platform that supports FBC*/ intel_fbc_global_disable(dev_priv); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 9086744..2491e4c 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -26,6 +26,40 @@ #include "i915_drv.h" +static void intel_breadcrumbs_hangcheck(unsigned long data) +{ + struct intel_engine_cs *engine = (struct intel_engine_cs *)data; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + + if (!b->irq_enabled) + return; + + if (time_before(jiffies, b->timeout)) { + mod_timer(&b->hangcheck, b->timeout); + return; + } + + DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name); + set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); + + /* Ensure that even if the GPU hangs, we get woken up. + * + * However, note that if no one is waiting, we never notice + * a gpu hang. Eventually, we will have to wait for a resource + * held by the GPU and so trigger a hangcheck. In the most + * pathological case, this will be upon memory starvation! To + * prevent this, we also queue the hangcheck from the retire + * worker. + */ + i915_queue_hangcheck(engine->i915); +} + +static unsigned long wait_timeout(void) +{ + return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES); +} + static void intel_breadcrumbs_fake_irq(unsigned long data) { struct intel_engine_cs *engine = (struct intel_engine_cs *)data; @@ -37,10 +71,8 @@ static void intel_breadcrumbs_fake_irq(unsigned long data) * every jiffie in order to kick the oldest waiter to do the * coherent seqno check. */ - rcu_read_lock(); if (intel_engine_wakeup(engine)) mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); - rcu_read_unlock(); } static void irq_enable(struct intel_engine_cs *engine) @@ -51,13 +83,6 @@ static void irq_enable(struct intel_engine_cs *engine) */ engine->breadcrumbs.irq_posted = true; - /* Make sure the current hangcheck doesn't falsely accuse a just - * started irq handler from missing an interrupt (because the - * interrupt count still matches the stale value from when - * the irq handler was disabled, many hangchecks ago). - */ - engine->breadcrumbs.irq_wakeups++; - spin_lock_irq(&engine->i915->irq_lock); engine->irq_enable(engine); spin_unlock_irq(&engine->i915->irq_lock); @@ -98,17 +123,13 @@ static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b) } if (!b->irq_enabled || - test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) + test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) { mod_timer(&b->fake_irq, jiffies + 1); - - /* Ensure that even if the GPU hangs, we get woken up. - * - * However, note that if no one is waiting, we never notice - * a gpu hang. Eventually, we will have to wait for a resource - * held by the GPU and so trigger a hangcheck. In the most - * pathological case, this will be upon memory starvation! - */ - i915_queue_hangcheck(i915); + } else { + /* Ensure we never sleep indefinitely */ + GEM_BUG_ON(!time_after(b->timeout, jiffies)); + mod_timer(&b->hangcheck, b->timeout); + } } static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b) @@ -211,7 +232,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, } rb_link_node(&wait->node, parent, p); rb_insert_color(&wait->node, &b->waiters); - GEM_BUG_ON(!first && !b->irq_seqno_bh); + GEM_BUG_ON(!first && !rcu_access_pointer(b->irq_seqno_bh)); if (completed) { struct rb_node *next = rb_next(completed); @@ -219,8 +240,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, GEM_BUG_ON(!next && !first); if (next && next != &wait->node) { GEM_BUG_ON(first); + b->timeout = wait_timeout(); b->first_wait = to_wait(next); - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); /* As there is a delay between reading the current * seqno, processing the completed tasks and selecting * the next waiter, we may have missed the interrupt @@ -245,8 +267,9 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, if (first) { GEM_BUG_ON(rb_first(&b->waiters) != &wait->node); + b->timeout = wait_timeout(); b->first_wait = wait; - smp_store_mb(b->irq_seqno_bh, wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, wait->tsk); /* After assigning ourselves as the new bottom-half, we must * perform a cursory check to prevent a missed interrupt. * Either we miss the interrupt whilst programming the hardware, @@ -257,7 +280,7 @@ static bool __intel_engine_add_wait(struct intel_engine_cs *engine, */ __intel_breadcrumbs_enable_irq(b); } - GEM_BUG_ON(!b->irq_seqno_bh); + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh)); GEM_BUG_ON(!b->first_wait); GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node); @@ -277,11 +300,6 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, return first; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine) -{ - mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1); -} - static inline bool chain_wakeup(struct rb_node *rb, int priority) { return rb && to_wait(rb)->tsk->prio <= priority; @@ -317,7 +335,7 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, const int priority = wakeup_priority(b, wait->tsk); struct rb_node *next; - GEM_BUG_ON(b->irq_seqno_bh != wait->tsk); + GEM_BUG_ON(rcu_access_pointer(b->irq_seqno_bh) != wait->tsk); /* We are the current bottom-half. Find the next candidate, * the first waiter in the queue on the remaining oldest @@ -359,14 +377,15 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, * the interrupt, or if we have to handle an * exception rather than a seqno completion. */ + b->timeout = wait_timeout(); b->first_wait = to_wait(next); - smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk); + rcu_assign_pointer(b->irq_seqno_bh, b->first_wait->tsk); if (b->first_wait->seqno != wait->seqno) __intel_breadcrumbs_enable_irq(b); - wake_up_process(b->irq_seqno_bh); + wake_up_process(b->first_wait->tsk); } else { b->first_wait = NULL; - WRITE_ONCE(b->irq_seqno_bh, NULL); + rcu_assign_pointer(b->irq_seqno_bh, NULL); __intel_breadcrumbs_disable_irq(b); } } else { @@ -380,7 +399,7 @@ out_unlock: GEM_BUG_ON(b->first_wait == wait); GEM_BUG_ON(rb_first(&b->waiters) != (b->first_wait ? &b->first_wait->node : NULL)); - GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters)); + GEM_BUG_ON(!rcu_access_pointer(b->irq_seqno_bh) ^ RB_EMPTY_ROOT(&b->waiters)); spin_unlock(&b->lock); } @@ -536,6 +555,9 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) setup_timer(&b->fake_irq, intel_breadcrumbs_fake_irq, (unsigned long)engine); + setup_timer(&b->hangcheck, + intel_breadcrumbs_hangcheck, + (unsigned long)engine); /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current @@ -560,6 +582,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); + del_timer_sync(&b->hangcheck); del_timer_sync(&b->fake_irq); } @@ -573,11 +596,9 @@ unsigned int intel_kick_waiters(struct drm_i915_private *i915) * RCU lock, i.e. as we call wake_up_process() we must be holding the * rcu_read_lock(). */ - rcu_read_lock(); for_each_engine(engine, i915) if (unlikely(intel_engine_wakeup(engine))) mask |= intel_engine_flag(engine); - rcu_read_unlock(); return mask; } diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 54bbb9c..d224f64 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1202,8 +1202,8 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, if (HAS_PCH_SPLIT(dev)) { u32 port_sel; - pp_reg = PCH_PP_CONTROL; - port_sel = I915_READ(PCH_PP_ON_DELAYS) & PANEL_PORT_SELECT_MASK; + pp_reg = PP_CONTROL(0); + port_sel = I915_READ(PP_ON_DELAYS(0)) & PANEL_PORT_SELECT_MASK; if (port_sel == PANEL_PORT_SELECT_LVDS && I915_READ(PCH_LVDS) & LVDS_PIPEB_SELECT) @@ -1211,10 +1211,10 @@ void assert_panel_unlocked(struct drm_i915_private *dev_priv, /* XXX: else fix for eDP */ } else if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { /* presumably write lock depends on pipe, not port select */ - pp_reg = VLV_PIPE_PP_CONTROL(pipe); + pp_reg = PP_CONTROL(pipe); panel_pipe = pipe; } else { - pp_reg = PP_CONTROL; + pp_reg = PP_CONTROL(0); if (I915_READ(LVDS) & LVDS_PIPEB_SELECT) panel_pipe = PIPE_B; } @@ -1959,12 +1959,12 @@ static void intel_enable_pipe(struct intel_crtc *crtc) * a plane. On ILK+ the pipe PLLs are integrated, so we don't * need the check. */ - if (HAS_GMCH_DISPLAY(dev_priv)) + if (HAS_GMCH_DISPLAY(dev_priv)) { if (intel_crtc_has_type(crtc->config, INTEL_OUTPUT_DSI)) assert_dsi_pll_enabled(dev_priv); else assert_pll_enabled(dev_priv, pipe); - else { + } else { if (crtc->config->has_pch_encoder) { /* if driving the PCH, we need FDI enabled */ assert_fdi_rx_pll_enabled(dev_priv, pch_transcoder); @@ -2147,33 +2147,6 @@ intel_fill_fb_ggtt_view(struct i915_ggtt_view *view, } } -static void -intel_fill_fb_info(struct drm_i915_private *dev_priv, - struct drm_framebuffer *fb) -{ - struct intel_rotation_info *info = &to_intel_framebuffer(fb)->rot_info; - unsigned int tile_size, tile_width, tile_height, cpp; - - tile_size = intel_tile_size(dev_priv); - - cpp = drm_format_plane_cpp(fb->pixel_format, 0); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier[0], cpp); - - info->plane[0].width = DIV_ROUND_UP(fb->pitches[0], tile_width * cpp); - info->plane[0].height = DIV_ROUND_UP(fb->height, tile_height); - - if (info->pixel_format == DRM_FORMAT_NV12) { - cpp = drm_format_plane_cpp(fb->pixel_format, 1); - intel_tile_dims(dev_priv, &tile_width, &tile_height, - fb->modifier[1], cpp); - - info->uv_offset = fb->offsets[1]; - info->plane[1].width = DIV_ROUND_UP(fb->pitches[1], tile_width * cpp); - info->plane[1].height = DIV_ROUND_UP(fb->height / 2, tile_height); - } -} - static unsigned int intel_linear_alignment(const struct drm_i915_private *dev_priv) { if (INTEL_INFO(dev_priv)->gen >= 9) @@ -2206,16 +2179,15 @@ static unsigned int intel_surf_alignment(const struct drm_i915_private *dev_priv } } -int -intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, - unsigned int rotation) +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) { struct drm_device *dev = fb->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; + struct i915_vma *vma; u32 alignment; - int ret; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -2240,75 +2212,112 @@ intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, */ intel_runtime_pm_get(dev_priv); - ret = i915_gem_object_pin_to_display_plane(obj, alignment, - &view); - if (ret) - goto err_pm; - - /* Install a fence for tiled scan-out. Pre-i965 always needs a - * fence, whereas 965+ only requires a fence if using - * framebuffer compression. For simplicity, we always install - * a fence as the cost is not that onerous. - */ - if (view.type == I915_GGTT_VIEW_NORMAL) { - ret = i915_gem_object_get_fence(obj); - if (ret == -EDEADLK) { - /* - * -EDEADLK means there are no free fences - * no pending flips. - * - * This is propagated to atomic, but it uses - * -EDEADLK to force a locking recovery, so - * change the returned error to -EBUSY. - */ - ret = -EBUSY; - goto err_unpin; - } else if (ret) - goto err_unpin; + vma = i915_gem_object_pin_to_display_plane(obj, alignment, &view); + if (IS_ERR(vma)) + goto err; - i915_gem_object_pin_fence(obj); + if (i915_vma_is_map_and_fenceable(vma)) { + /* Install a fence for tiled scan-out. Pre-i965 always needs a + * fence, whereas 965+ only requires a fence if using + * framebuffer compression. For simplicity, we always, when + * possible, install a fence as the cost is not that onerous. + * + * If we fail to fence the tiled scanout, then either the + * modeset will reject the change (which is highly unlikely as + * the affected systems, all but one, do not have unmappable + * space) or we will not be able to enable full powersaving + * techniques (also likely not to apply due to various limits + * FBC and the like impose on the size of the buffer, which + * presumably we violated anyway with this unmappable buffer). + * Anyway, it is presumably better to stumble onwards with + * something and try to run the system in a "less than optimal" + * mode that matches the user configuration. + */ + if (i915_vma_get_fence(vma) == 0) + i915_vma_pin_fence(vma); } +err: intel_runtime_pm_put(dev_priv); - return 0; - -err_unpin: - i915_gem_object_unpin_from_display_plane(obj, &view); -err_pm: - intel_runtime_pm_put(dev_priv); - return ret; + return vma; } void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation) { struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; + struct i915_vma *vma; WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); intel_fill_fb_ggtt_view(&view, fb, rotation); + vma = i915_gem_object_to_ggtt(obj, &view); - if (view.type == I915_GGTT_VIEW_NORMAL) - i915_gem_object_unpin_fence(obj); + i915_vma_unpin_fence(vma); + i915_gem_object_unpin_from_display_plane(vma); +} - i915_gem_object_unpin_from_display_plane(obj, &view); +static int intel_fb_pitch(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + if (intel_rotation_90_or_270(rotation)) + return to_intel_framebuffer(fb)->rotated[plane].pitch; + else + return fb->pitches[plane]; +} + +/* + * Convert the x/y offsets into a linear offset. + * Only valid with 0/180 degree rotation, which is fine since linear + * offset is only used with linear buffers on pre-hsw and tiled buffers + * with gen2/3, and 90/270 degree rotations isn't supported on any of them. + */ +u32 intel_fb_xy_to_linear(int x, int y, + const struct intel_plane_state *state, + int plane) +{ + const struct drm_framebuffer *fb = state->base.fb; + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int pitch = fb->pitches[plane]; + + return y * pitch + x * cpp; +} + +/* + * Add the x/y offsets derived from fb->offsets[] to the user + * specified plane src x/y offsets. The resulting x/y offsets + * specify the start of scanout from the beginning of the gtt mapping. + */ +void intel_add_fb_offsets(int *x, int *y, + const struct intel_plane_state *state, + int plane) + +{ + const struct intel_framebuffer *intel_fb = to_intel_framebuffer(state->base.fb); + unsigned int rotation = state->base.rotation; + + if (intel_rotation_90_or_270(rotation)) { + *x += intel_fb->rotated[plane].x; + *y += intel_fb->rotated[plane].y; + } else { + *x += intel_fb->normal[plane].x; + *y += intel_fb->normal[plane].y; + } } /* - * Adjust the tile offset by moving the difference into - * the x/y offsets. - * * Input tile dimensions and pitch must already be * rotated to match x and y, and in pixel units. */ -static u32 intel_adjust_tile_offset(int *x, int *y, - unsigned int tile_width, - unsigned int tile_height, - unsigned int tile_size, - unsigned int pitch_tiles, - u32 old_offset, - u32 new_offset) -{ +static u32 _intel_adjust_tile_offset(int *x, int *y, + unsigned int tile_width, + unsigned int tile_height, + unsigned int tile_size, + unsigned int pitch_tiles, + u32 old_offset, + u32 new_offset) +{ + unsigned int pitch_pixels = pitch_tiles * tile_width; unsigned int tiles; WARN_ON(old_offset & (tile_size - 1)); @@ -2320,6 +2329,54 @@ static u32 intel_adjust_tile_offset(int *x, int *y, *y += tiles / pitch_tiles * tile_height; *x += tiles % pitch_tiles * tile_width; + /* minimize x in case it got needlessly big */ + *y += *x / pitch_pixels * tile_height; + *x %= pitch_pixels; + + return new_offset; +} + +/* + * Adjust the tile offset by moving the difference into + * the x/y offsets. + */ +static u32 intel_adjust_tile_offset(int *x, int *y, + const struct intel_plane_state *state, int plane, + u32 old_offset, u32 new_offset) +{ + const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); + const struct drm_framebuffer *fb = state->base.fb; + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int rotation = state->base.rotation; + unsigned int pitch = intel_fb_pitch(fb, plane, rotation); + + WARN_ON(new_offset > old_offset); + + if (fb->modifier[plane] != DRM_FORMAT_MOD_NONE) { + unsigned int tile_size, tile_width, tile_height; + unsigned int pitch_tiles; + + tile_size = intel_tile_size(dev_priv); + intel_tile_dims(dev_priv, &tile_width, &tile_height, + fb->modifier[plane], cpp); + + if (intel_rotation_90_or_270(rotation)) { + pitch_tiles = pitch / tile_height; + swap(tile_width, tile_height); + } else { + pitch_tiles = pitch / (tile_width * cpp); + } + + _intel_adjust_tile_offset(x, y, tile_width, tile_height, + tile_size, pitch_tiles, + old_offset, new_offset); + } else { + old_offset += *y * pitch + *x * cpp; + + *y = (old_offset - new_offset) / pitch; + *x = ((old_offset - new_offset) - *y * pitch) / cpp; + } + return new_offset; } @@ -2330,18 +2387,24 @@ static u32 intel_adjust_tile_offset(int *x, int *y, * In the 90/270 rotated case, x and y are assumed * to be already rotated to match the rotated GTT view, and * pitch is the tile_height aligned framebuffer height. + * + * This function is used when computing the derived information + * under intel_framebuffer, so using any of that information + * here is not allowed. Anything under drm_framebuffer can be + * used. This is why the user has to pass in the pitch since it + * is specified in the rotated orientation. */ -u32 intel_compute_tile_offset(int *x, int *y, - const struct drm_framebuffer *fb, int plane, - unsigned int pitch, - unsigned int rotation) +static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv, + int *x, int *y, + const struct drm_framebuffer *fb, int plane, + unsigned int pitch, + unsigned int rotation, + u32 alignment) { - const struct drm_i915_private *dev_priv = to_i915(fb->dev); uint64_t fb_modifier = fb->modifier[plane]; unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); - u32 offset, offset_aligned, alignment; + u32 offset, offset_aligned; - alignment = intel_surf_alignment(dev_priv, fb_modifier); if (alignment) alignment--; @@ -2369,9 +2432,9 @@ u32 intel_compute_tile_offset(int *x, int *y, offset = (tile_rows * pitch_tiles + tiles) * tile_size; offset_aligned = offset & ~alignment; - intel_adjust_tile_offset(x, y, tile_width, tile_height, - tile_size, pitch_tiles, - offset, offset_aligned); + _intel_adjust_tile_offset(x, y, tile_width, tile_height, + tile_size, pitch_tiles, + offset, offset_aligned); } else { offset = *y * pitch + *x * cpp; offset_aligned = offset & ~alignment; @@ -2383,6 +2446,177 @@ u32 intel_compute_tile_offset(int *x, int *y, return offset_aligned; } +u32 intel_compute_tile_offset(int *x, int *y, + const struct intel_plane_state *state, + int plane) +{ + const struct drm_i915_private *dev_priv = to_i915(state->base.plane->dev); + const struct drm_framebuffer *fb = state->base.fb; + unsigned int rotation = state->base.rotation; + int pitch = intel_fb_pitch(fb, plane, rotation); + u32 alignment; + + /* AUX_DIST needs only 4K alignment */ + if (fb->pixel_format == DRM_FORMAT_NV12 && plane == 1) + alignment = 4096; + else + alignment = intel_surf_alignment(dev_priv, fb->modifier[plane]); + + return _intel_compute_tile_offset(dev_priv, x, y, fb, plane, pitch, + rotation, alignment); +} + +/* Convert the fb->offset[] linear offset into x/y offsets */ +static void intel_fb_offset_to_xy(int *x, int *y, + const struct drm_framebuffer *fb, int plane) +{ + unsigned int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + unsigned int pitch = fb->pitches[plane]; + u32 linear_offset = fb->offsets[plane]; + + *y = linear_offset / pitch; + *x = linear_offset % pitch / cpp; +} + +static unsigned int intel_fb_modifier_to_tiling(uint64_t fb_modifier) +{ + switch (fb_modifier) { + case I915_FORMAT_MOD_X_TILED: + return I915_TILING_X; + case I915_FORMAT_MOD_Y_TILED: + return I915_TILING_Y; + default: + return I915_TILING_NONE; + } +} + +static int +intel_fill_fb_info(struct drm_i915_private *dev_priv, + struct drm_framebuffer *fb) +{ + struct intel_framebuffer *intel_fb = to_intel_framebuffer(fb); + struct intel_rotation_info *rot_info = &intel_fb->rot_info; + u32 gtt_offset_rotated = 0; + unsigned int max_size = 0; + uint32_t format = fb->pixel_format; + int i, num_planes = drm_format_num_planes(format); + unsigned int tile_size = intel_tile_size(dev_priv); + + for (i = 0; i < num_planes; i++) { + unsigned int width, height; + unsigned int cpp, size; + u32 offset; + int x, y; + + cpp = drm_format_plane_cpp(format, i); + width = drm_format_plane_width(fb->width, format, i); + height = drm_format_plane_height(fb->height, format, i); + + intel_fb_offset_to_xy(&x, &y, fb, i); + + /* + * The fence (if used) is aligned to the start of the object + * so having the framebuffer wrap around across the edge of the + * fenced region doesn't really work. We have no API to configure + * the fence start offset within the object (nor could we probably + * on gen2/3). So it's just easier if we just require that the + * fb layout agrees with the fence layout. We already check that the + * fb stride matches the fence stride elsewhere. + */ + if (i915_gem_object_is_tiled(intel_fb->obj) && + (x + width) * cpp > fb->pitches[i]) { + DRM_DEBUG("bad fb plane %d offset: 0x%x\n", + i, fb->offsets[i]); + return -EINVAL; + } + + /* + * First pixel of the framebuffer from + * the start of the normal gtt mapping. + */ + intel_fb->normal[i].x = x; + intel_fb->normal[i].y = y; + + offset = _intel_compute_tile_offset(dev_priv, &x, &y, + fb, 0, fb->pitches[i], + DRM_ROTATE_0, tile_size); + offset /= tile_size; + + if (fb->modifier[i] != DRM_FORMAT_MOD_NONE) { + unsigned int tile_width, tile_height; + unsigned int pitch_tiles; + struct drm_rect r; + + intel_tile_dims(dev_priv, &tile_width, &tile_height, + fb->modifier[i], cpp); + + rot_info->plane[i].offset = offset; + rot_info->plane[i].stride = DIV_ROUND_UP(fb->pitches[i], tile_width * cpp); + rot_info->plane[i].width = DIV_ROUND_UP(x + width, tile_width); + rot_info->plane[i].height = DIV_ROUND_UP(y + height, tile_height); + + intel_fb->rotated[i].pitch = + rot_info->plane[i].height * tile_height; + + /* how many tiles does this plane need */ + size = rot_info->plane[i].stride * rot_info->plane[i].height; + /* + * If the plane isn't horizontally tile aligned, + * we need one more tile. + */ + if (x != 0) + size++; + + /* rotate the x/y offsets to match the GTT view */ + r.x1 = x; + r.y1 = y; + r.x2 = x + width; + r.y2 = y + height; + drm_rect_rotate(&r, + rot_info->plane[i].width * tile_width, + rot_info->plane[i].height * tile_height, + DRM_ROTATE_270); + x = r.x1; + y = r.y1; + + /* rotate the tile dimensions to match the GTT view */ + pitch_tiles = intel_fb->rotated[i].pitch / tile_height; + swap(tile_width, tile_height); + + /* + * We only keep the x/y offsets, so push all of the + * gtt offset into the x/y offsets. + */ + _intel_adjust_tile_offset(&x, &y, tile_size, + tile_width, tile_height, pitch_tiles, + gtt_offset_rotated * tile_size, 0); + + gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height; + + /* + * First pixel of the framebuffer from + * the start of the rotated gtt mapping. + */ + intel_fb->rotated[i].x = x; + intel_fb->rotated[i].y = y; + } else { + size = DIV_ROUND_UP((y + height) * fb->pitches[i] + + x * cpp, tile_size); + } + + /* how many tiles in total needed in the bo */ + max_size = max(max_size, offset + size); + } + + if (max_size * tile_size > to_intel_framebuffer(fb)->obj->base.size) { + DRM_DEBUG("fb too big for bo (need %u bytes, have %zu bytes)\n", + max_size * tile_size, to_intel_framebuffer(fb)->obj->base.size); + return -EINVAL; + } + + return 0; +} + static int i9xx_format_to_fourcc(int format) { switch (format) { @@ -2552,7 +2786,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; obj = intel_fb_obj(fb); - if (i915_gem_obj_ggtt_offset(obj) == plane_config->base) { + if (i915_gem_object_ggtt_offset(obj, NULL) == plane_config->base) { drm_framebuffer_reference(fb); goto valid_fb; } @@ -2604,6 +2838,169 @@ valid_fb: &obj->frontbuffer_bits); } +static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + + switch (fb->modifier[plane]) { + case DRM_FORMAT_MOD_NONE: + case I915_FORMAT_MOD_X_TILED: + switch (cpp) { + case 8: + return 4096; + case 4: + case 2: + case 1: + return 8192; + default: + MISSING_CASE(cpp); + break; + } + break; + case I915_FORMAT_MOD_Y_TILED: + case I915_FORMAT_MOD_Yf_TILED: + switch (cpp) { + case 8: + return 2048; + case 4: + return 4096; + case 2: + case 1: + return 8192; + default: + MISSING_CASE(cpp); + break; + } + break; + default: + MISSING_CASE(fb->modifier[plane]); + } + + return 2048; +} + +static int skl_check_main_surface(struct intel_plane_state *plane_state) +{ + const struct drm_i915_private *dev_priv = to_i915(plane_state->base.plane->dev); + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int x = plane_state->base.src.x1 >> 16; + int y = plane_state->base.src.y1 >> 16; + int w = drm_rect_width(&plane_state->base.src) >> 16; + int h = drm_rect_height(&plane_state->base.src) >> 16; + int max_width = skl_max_plane_width(fb, 0, rotation); + int max_height = 4096; + u32 alignment, offset, aux_offset = plane_state->aux.offset; + + if (w > max_width || h > max_height) { + DRM_DEBUG_KMS("requested Y/RGB source size %dx%d too big (limit %dx%d)\n", + w, h, max_width, max_height); + return -EINVAL; + } + + intel_add_fb_offsets(&x, &y, plane_state, 0); + offset = intel_compute_tile_offset(&x, &y, plane_state, 0); + + alignment = intel_surf_alignment(dev_priv, fb->modifier[0]); + + /* + * AUX surface offset is specified as the distance from the + * main surface offset, and it must be non-negative. Make + * sure that is what we will get. + */ + if (offset > aux_offset) + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, aux_offset & ~(alignment - 1)); + + /* + * When using an X-tiled surface, the plane blows up + * if the x offset + width exceed the stride. + * + * TODO: linear and Y-tiled seem fine, Yf untested, + */ + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) { + int cpp = drm_format_plane_cpp(fb->pixel_format, 0); + + while ((x + w) * cpp > fb->pitches[0]) { + if (offset == 0) { + DRM_DEBUG_KMS("Unable to find suitable display surface offset\n"); + return -EINVAL; + } + + offset = intel_adjust_tile_offset(&x, &y, plane_state, 0, + offset, offset - alignment); + } + } + + plane_state->main.offset = offset; + plane_state->main.x = x; + plane_state->main.y = y; + + return 0; +} + +static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int max_width = skl_max_plane_width(fb, 1, rotation); + int max_height = 4096; + int x = plane_state->base.src.x1 >> 17; + int y = plane_state->base.src.y1 >> 17; + int w = drm_rect_width(&plane_state->base.src) >> 17; + int h = drm_rect_height(&plane_state->base.src) >> 17; + u32 offset; + + intel_add_fb_offsets(&x, &y, plane_state, 1); + offset = intel_compute_tile_offset(&x, &y, plane_state, 1); + + /* FIXME not quite sure how/if these apply to the chroma plane */ + if (w > max_width || h > max_height) { + DRM_DEBUG_KMS("CbCr source size %dx%d too big (limit %dx%d)\n", + w, h, max_width, max_height); + return -EINVAL; + } + + plane_state->aux.offset = offset; + plane_state->aux.x = x; + plane_state->aux.y = y; + + return 0; +} + +int skl_check_plane_surface(struct intel_plane_state *plane_state) +{ + const struct drm_framebuffer *fb = plane_state->base.fb; + unsigned int rotation = plane_state->base.rotation; + int ret; + + /* Rotate src coordinates to match rotated GTT view */ + if (intel_rotation_90_or_270(rotation)) + drm_rect_rotate(&plane_state->base.src, + fb->width, fb->height, DRM_ROTATE_270); + + /* + * Handle the AUX surface first since + * the main surface setup depends on it. + */ + if (fb->pixel_format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_aux_surface(plane_state); + if (ret) + return ret; + } else { + plane_state->aux.offset = ~0xfff; + plane_state->aux.x = 0; + plane_state->aux.y = 0; + } + + ret = skl_check_main_surface(plane_state); + if (ret) + return ret; + + return 0; +} + static void i9xx_update_primary_plane(struct drm_plane *primary, const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) @@ -2618,7 +3015,6 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, u32 dspcntr; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; @@ -2671,36 +3067,31 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, BUG(); } - if (INTEL_INFO(dev)->gen >= 4 && i915_gem_object_is_tiled(obj)) + if (INTEL_GEN(dev_priv) >= 4 && + fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; if (IS_G4X(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - linear_offset = y * fb->pitches[0] + x * cpp; + intel_add_fb_offsets(&x, &y, plane_state, 0); - if (INTEL_INFO(dev)->gen >= 4) { + if (INTEL_INFO(dev)->gen >= 4) intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= intel_crtc->dspaddr_offset; - } else { - intel_crtc->dspaddr_offset = linear_offset; - } + intel_compute_tile_offset(&x, &y, plane_state, 0); if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; x += (crtc_state->pipe_src_w - 1); y += (crtc_state->pipe_src_h - 1); - - /* Finding the last pixel of the last line of the display - data and adding to linear_offset*/ - linear_offset += - (crtc_state->pipe_src_h - 1) * fb->pitches[0] + - (crtc_state->pipe_src_w - 1) * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + + if (INTEL_INFO(dev)->gen < 4) + intel_crtc->dspaddr_offset = linear_offset; + intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; @@ -2709,11 +3100,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); if (INTEL_INFO(dev)->gen >= 4) { I915_WRITE(DSPSURF(plane), - i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); + intel_fb_gtt_offset(fb, rotation) + + intel_crtc->dspaddr_offset); I915_WRITE(DSPTILEOFF(plane), (y << 16) | x); I915_WRITE(DSPLINOFF(plane), linear_offset); } else - I915_WRITE(DSPADDR(plane), i915_gem_obj_ggtt_offset(obj) + linear_offset); + I915_WRITE(DSPADDR(plane), i915_gem_object_ggtt_offset(obj, NULL) + linear_offset); POSTING_READ(reg); } @@ -2741,13 +3133,11 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int plane = intel_crtc->plane; u32 linear_offset; u32 dspcntr; i915_reg_t reg = DSPCNTR(plane); unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); int x = plane_state->base.src.x1 >> 16; int y = plane_state->base.src.y1 >> 16; @@ -2780,32 +3170,28 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, BUG(); } - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE; - linear_offset = y * fb->pitches[0] + x * cpp; + intel_add_fb_offsets(&x, &y, plane_state, 0); + intel_crtc->dspaddr_offset = - intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= intel_crtc->dspaddr_offset; + intel_compute_tile_offset(&x, &y, plane_state, 0); + if (rotation == DRM_ROTATE_180) { dspcntr |= DISPPLANE_ROTATE_180; if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { x += (crtc_state->pipe_src_w - 1); y += (crtc_state->pipe_src_h - 1); - - /* Finding the last pixel of the last line of the display - data and adding to linear_offset*/ - linear_offset += - (crtc_state->pipe_src_h - 1) * fb->pitches[0] + - (crtc_state->pipe_src_w - 1) * cpp; } } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + intel_crtc->adjusted_x = x; intel_crtc->adjusted_y = y; @@ -2813,7 +3199,8 @@ static void ironlake_update_primary_plane(struct drm_plane *primary, I915_WRITE(DSPSTRIDE(plane), fb->pitches[0]); I915_WRITE(DSPSURF(plane), - i915_gem_obj_ggtt_offset(obj) + intel_crtc->dspaddr_offset); + intel_fb_gtt_offset(fb, rotation) + + intel_crtc->dspaddr_offset); if (IS_HASWELL(dev) || IS_BROADWELL(dev)) { I915_WRITE(DSPOFFSET(plane), (y << 16) | x); } else { @@ -2835,32 +3222,21 @@ u32 intel_fb_stride_alignment(const struct drm_i915_private *dev_priv, } } -u32 intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj, - unsigned int plane) +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, + unsigned int rotation) { + struct drm_i915_gem_object *obj = intel_fb_obj(fb); struct i915_ggtt_view view; struct i915_vma *vma; - u64 offset; - intel_fill_fb_ggtt_view(&view, intel_plane->base.state->fb, - intel_plane->base.state->rotation); + intel_fill_fb_ggtt_view(&view, fb, rotation); - vma = i915_gem_obj_to_ggtt_view(obj, &view); + vma = i915_gem_object_to_ggtt(obj, &view); if (WARN(!vma, "ggtt vma for display object not found! (view=%u)\n", - view.type)) + view.type)) return -1; - offset = vma->node.start; - - if (plane == 1) { - offset += vma->ggtt_view.params.rotated.uv_start_page * - PAGE_SIZE; - } - - WARN_ON(upper_32_bits(offset)); - - return lower_32_bits(offset); + return i915_ggtt_offset(vma); } static void skl_detach_scaler(struct intel_crtc *intel_crtc, int id) @@ -2890,6 +3266,28 @@ static void skl_detach_scalers(struct intel_crtc *intel_crtc) } } +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, + unsigned int rotation) +{ + const struct drm_i915_private *dev_priv = to_i915(fb->dev); + u32 stride = intel_fb_pitch(fb, plane, rotation); + + /* + * The stride is either expressed as a multiple of 64 bytes chunks for + * linear buffers or in number of tiles for tiled buffers. + */ + if (intel_rotation_90_or_270(rotation)) { + int cpp = drm_format_plane_cpp(fb->pixel_format, plane); + + stride /= intel_tile_height(dev_priv, fb->modifier[0], cpp); + } else { + stride /= intel_fb_stride_alignment(dev_priv, fb->modifier[0], + fb->pixel_format); + } + + return stride; +} + u32 skl_plane_ctl_format(uint32_t pixel_format) { switch (pixel_format) { @@ -2979,16 +3377,14 @@ static void skylake_update_primary_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_crtc->pipe; - u32 plane_ctl, stride_div, stride; - u32 tile_height, plane_offset, plane_size; + u32 plane_ctl; unsigned int rotation = plane_state->base.rotation; - int x_offset, y_offset; - u32 surf_addr; + u32 stride = skl_plane_stride(fb, 0, rotation); + u32 surf_addr = plane_state->main.offset; int scaler_id = plane_state->scaler_id; - int src_x = plane_state->base.src.x1 >> 16; - int src_y = plane_state->base.src.y1 >> 16; + int src_x = plane_state->main.x; + int src_y = plane_state->main.y; int src_w = drm_rect_width(&plane_state->base.src) >> 16; int src_h = drm_rect_height(&plane_state->base.src) >> 16; int dst_x = plane_state->base.dst.x1; @@ -3005,36 +3401,19 @@ static void skylake_update_primary_plane(struct drm_plane *plane, plane_ctl |= PLANE_CTL_PLANE_GAMMA_DISABLE; plane_ctl |= skl_plane_ctl_rotation(rotation); - stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - surf_addr = intel_plane_obj_offset(to_intel_plane(plane), obj, 0); - - WARN_ON(drm_rect_width(&plane_state->base.src) == 0); - - if (intel_rotation_90_or_270(rotation)) { - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - - /* stride = Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp); - stride = DIV_ROUND_UP(fb->height, tile_height); - x_offset = stride * tile_height - src_y - src_h; - y_offset = src_x; - plane_size = (src_w - 1) << 16 | (src_h - 1); - } else { - stride = fb->pitches[0] / stride_div; - x_offset = src_x; - y_offset = src_y; - plane_size = (src_h - 1) << 16 | (src_w - 1); - } - plane_offset = y_offset << 16 | x_offset; + /* Sizes are 0 based */ + src_w--; + src_h--; + dst_w--; + dst_h--; - intel_crtc->adjusted_x = x_offset; - intel_crtc->adjusted_y = y_offset; + intel_crtc->adjusted_x = src_x; + intel_crtc->adjusted_y = src_y; I915_WRITE(PLANE_CTL(pipe, 0), plane_ctl); - I915_WRITE(PLANE_OFFSET(pipe, 0), plane_offset); - I915_WRITE(PLANE_SIZE(pipe, 0), plane_size); + I915_WRITE(PLANE_OFFSET(pipe, 0), (src_y << 16) | src_x); I915_WRITE(PLANE_STRIDE(pipe, 0), stride); + I915_WRITE(PLANE_SIZE(pipe, 0), (src_h << 16) | src_w); if (scaler_id >= 0) { uint32_t ps_ctrl = 0; @@ -3051,7 +3430,8 @@ static void skylake_update_primary_plane(struct drm_plane *plane, I915_WRITE(PLANE_POS(pipe, 0), (dst_y << 16) | dst_x); } - I915_WRITE(PLANE_SURF(pipe, 0), surf_addr); + I915_WRITE(PLANE_SURF(pipe, 0), + intel_fb_gtt_offset(fb, rotation) + surf_addr); POSTING_READ(PLANE_SURF(pipe, 0)); } @@ -3093,40 +3473,113 @@ static void intel_update_primary_planes(struct drm_device *dev) for_each_crtc(dev, crtc) { struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state; - - drm_modeset_lock_crtc(crtc, &plane->base); - plane_state = to_intel_plane_state(plane->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); if (plane_state->base.visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } +} + +static int +__intel_display_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + int i, ret; + + intel_modeset_setup_hw_state(dev); + i915_redisable_vga(dev); + + if (!state) + return 0; - drm_modeset_unlock_crtc(crtc); + for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * Force recalculation even if we restore + * current state. With fast modeset this may not result + * in a modeset when the state is compatible. + */ + crtc_state->mode_changed = true; } + + /* ignore any reset values/BIOS leftovers in the WM registers */ + to_intel_atomic_state(state)->skip_intermediate_wm = true; + + ret = drm_atomic_commit(state); + + WARN_ON(ret == -EDEADLK); + return ret; +} + +static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv) +{ + return intel_has_gpu_reset(dev_priv) && + INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv); } void intel_prepare_reset(struct drm_i915_private *dev_priv) { - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state; + int ret; - /* reset doesn't touch the display */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) + /* + * Need mode_config.mutex so that we don't + * trample ongoing ->detect() and whatnot. + */ + mutex_lock(&dev->mode_config.mutex); + drm_modeset_acquire_init(ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(ctx); + } + + /* reset doesn't touch the display, but flips might get nuked anyway, */ + if (!i915.force_reset_modeset_test && + !gpu_reset_clobbers_display(dev_priv)) return; - drm_modeset_lock_all(&dev_priv->drm); /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. */ - intel_display_suspend(&dev_priv->drm); + state = drm_atomic_helper_duplicate_state(dev, ctx); + if (IS_ERR(state)) { + ret = PTR_ERR(state); + state = NULL; + DRM_ERROR("Duplicating state failed with %i\n", ret); + goto err; + } + + ret = drm_atomic_helper_disable_all(dev, ctx); + if (ret) { + DRM_ERROR("Suspending crtc's failed with %i\n", ret); + goto err; + } + + dev_priv->modeset_restore_state = state; + state->acquire_ctx = ctx; + return; + +err: + drm_atomic_state_free(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state = dev_priv->modeset_restore_state; + int ret; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3134,44 +3587,51 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) */ intel_complete_page_flips(dev_priv); - /* no reset support for gen2 */ - if (IS_GEN2(dev_priv)) - return; + dev_priv->modeset_restore_state = NULL; /* reset doesn't touch the display */ - if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { + if (!gpu_reset_clobbers_display(dev_priv)) { + if (!state) { + /* + * Flips in the rings have been nuked by the reset, + * so update the base address of all primary + * planes to the the last fb to make sure we're + * showing the correct fb after a reset. + * + * FIXME: Atomic will make this obsolete since we won't schedule + * CS-based flips (which might get lost in gpu resets) any more. + */ + intel_update_primary_planes(dev); + } else { + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); + } + } else { /* - * Flips in the rings have been nuked by the reset, - * so update the base address of all primary - * planes to the the last fb to make sure we're - * showing the correct fb after a reset. - * - * FIXME: Atomic will make this obsolete since we won't schedule - * CS-based flips (which might get lost in gpu resets) any more. + * The display has been reset as well, + * so need a full re-initialization. */ - intel_update_primary_planes(&dev_priv->drm); - return; - } + intel_runtime_pm_disable_interrupts(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); - /* - * The display has been reset as well, - * so need a full re-initialization. - */ - intel_runtime_pm_disable_interrupts(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); + intel_modeset_init_hw(dev); - intel_modeset_init_hw(&dev_priv->drm); + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); - - intel_display_resume(&dev_priv->drm); + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); - intel_hpd_init(dev_priv); + intel_hpd_init(dev_priv); + } - drm_modeset_unlock_all(&dev_priv->drm); + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + mutex_unlock(&dev->mode_config.mutex); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) @@ -9411,7 +9871,7 @@ static void assert_can_disable_lcpll(struct drm_i915_private *dev_priv) I915_STATE_WARN(I915_READ(SPLL_CTL) & SPLL_PLL_ENABLE, "SPLL enabled\n"); I915_STATE_WARN(I915_READ(WRPLL_CTL(0)) & WRPLL_PLL_ENABLE, "WRPLL1 enabled\n"); I915_STATE_WARN(I915_READ(WRPLL_CTL(1)) & WRPLL_PLL_ENABLE, "WRPLL2 enabled\n"); - I915_STATE_WARN(I915_READ(PCH_PP_STATUS) & PP_ON, "Panel power on\n"); + I915_STATE_WARN(I915_READ(PP_STATUS(0)) & PP_ON, "Panel power on\n"); I915_STATE_WARN(I915_READ(BLC_PWM_CPU_CTL2) & BLM_PWM_ENABLE, "CPU PWM1 enabled\n"); if (IS_HASWELL(dev)) @@ -11198,7 +11658,7 @@ static int intel_gen4_queue_flip(struct drm_device *dev, MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); intel_ring_emit(ring, fb->pitches[0]); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset | - i915_gem_object_get_tiling(obj)); + intel_fb_modifier_to_tiling(fb->modifier[0])); /* XXX Enabling the panel-fitter across page-flip is so far * untested on non-native modes, so ignore it for now. @@ -11230,7 +11690,8 @@ static int intel_gen6_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_DISPLAY_FLIP | MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); - intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, fb->pitches[0] | + intel_fb_modifier_to_tiling(fb->modifier[0])); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); /* Contrary to the suggestions in the documentation, @@ -11325,7 +11786,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, intel_ring_emit(ring, MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT); intel_ring_emit_reg(ring, DERRMR); - intel_ring_emit(ring, req->engine->scratch.gtt_offset + 256); + intel_ring_emit(ring, + i915_ggtt_offset(req->engine->scratch) + 256); if (IS_GEN8(dev)) { intel_ring_emit(ring, 0); intel_ring_emit(ring, MI_NOOP); @@ -11333,7 +11795,8 @@ static int intel_gen7_queue_flip(struct drm_device *dev, } intel_ring_emit(ring, MI_DISPLAY_FLIP_I915 | plane_bit); - intel_ring_emit(ring, fb->pitches[0] | i915_gem_object_get_tiling(obj)); + intel_ring_emit(ring, fb->pitches[0] | + intel_fb_modifier_to_tiling(fb->modifier[0])); intel_ring_emit(ring, intel_crtc->flip_work->gtt_offset); intel_ring_emit(ring, (MI_NOOP)); @@ -11382,7 +11845,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_framebuffer *fb = intel_crtc->base.primary->fb; const enum pipe pipe = intel_crtc->pipe; - u32 ctl, stride, tile_height; + u32 ctl, stride = skl_plane_stride(fb, 0, rotation); ctl = I915_READ(PLANE_CTL(pipe, 0)); ctl &= ~PLANE_CTL_TILED_MASK; @@ -11403,20 +11866,6 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc, } /* - * The stride is either expressed as a multiple of 64 bytes chunks for - * linear buffers or in number of tiles for tiled buffers. - */ - if (intel_rotation_90_or_270(rotation)) { - /* stride = Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], 0); - stride = DIV_ROUND_UP(fb->height, tile_height); - } else { - stride = fb->pitches[0] / - intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - } - - /* * Both PLANE_CTL and PLANE_STRIDE are not updated on vblank but on * PLANE_SURF updates, the update is then guaranteed to be atomic. */ @@ -11432,15 +11881,13 @@ static void ilk_do_mmio_flip(struct intel_crtc *intel_crtc, { struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct intel_framebuffer *intel_fb = - to_intel_framebuffer(intel_crtc->base.primary->fb); - struct drm_i915_gem_object *obj = intel_fb->obj; + struct drm_framebuffer *fb = intel_crtc->base.primary->fb; i915_reg_t reg = DSPCNTR(intel_crtc->plane); u32 dspcntr; dspcntr = I915_READ(reg); - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dspcntr |= DISPPLANE_TILED; else dspcntr &= ~DISPPLANE_TILED; @@ -11577,6 +12024,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, struct intel_engine_cs *engine; bool mmio_flip; struct drm_i915_gem_request *request; + struct i915_vma *vma; int ret; /* @@ -11668,8 +12116,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { engine = &dev_priv->engine[BCS]; - if (i915_gem_object_get_tiling(obj) != - i915_gem_object_get_tiling(intel_fb_obj(work->old_fb))) + if (fb->modifier[0] != old_fb->modifier[0]) /* vlv: DISPLAY_FLIP fails to change tiling */ engine = NULL; } else if (IS_IVYBRIDGE(dev) || IS_HASWELL(dev)) { @@ -11685,12 +12132,13 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc, mmio_flip = use_mmio_flip(engine, obj); - ret = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); - if (ret) + vma = intel_pin_and_fence_fb_obj(fb, primary->state->rotation); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto cleanup_pending; + } - work->gtt_offset = intel_plane_obj_offset(to_intel_plane(primary), - obj, 0); + work->gtt_offset = intel_fb_gtt_offset(fb, primary->state->rotation); work->gtt_offset += intel_crtc->dspaddr_offset; work->rotation = crtc->primary->state->rotation; @@ -14035,7 +14483,11 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (ret) DRM_DEBUG_KMS("failed to attach phys object\n"); } else { - ret = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + struct i915_vma *vma; + + vma = intel_pin_and_fence_fb_obj(fb, new_state->rotation); + if (IS_ERR(vma)) + ret = PTR_ERR(vma); } if (ret == 0) { @@ -14110,12 +14562,14 @@ intel_check_primary_plane(struct drm_plane *plane, struct intel_crtc_state *crtc_state, struct intel_plane_state *state) { + struct drm_i915_private *dev_priv = to_i915(plane->dev); struct drm_crtc *crtc = state->base.crtc; int min_scale = DRM_PLANE_HELPER_NO_SCALING; int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; + int ret; - if (INTEL_INFO(plane->dev)->gen >= 9) { + if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ if (state->ckey.flags == I915_SET_COLORKEY_NONE) { min_scale = 1; @@ -14124,10 +14578,23 @@ intel_check_primary_plane(struct drm_plane *plane, can_position = true; } - return drm_plane_helper_check_state(&state->base, - &state->clip, - min_scale, max_scale, - can_position, true); + ret = drm_plane_helper_check_state(&state->base, + &state->clip, + min_scale, max_scale, + can_position, true); + if (ret) + return ret; + + if (!state->base.fb) + return 0; + + if (INTEL_GEN(dev_priv) >= 9) { + ret = skl_check_plane_surface(state); + if (ret) + return ret; + } + + return 0; } static void intel_begin_crtc_commit(struct drm_crtc *crtc, @@ -14386,7 +14853,7 @@ intel_update_cursor_plane(struct drm_plane *plane, if (!obj) addr = 0; else if (!INTEL_INFO(dev)->cursor_needs_physical) - addr = i915_gem_obj_ggtt_offset(obj); + addr = i915_gem_object_ggtt_offset(obj, NULL); else addr = obj->phys_handle->busaddr; @@ -14639,12 +15106,50 @@ static bool intel_crt_present(struct drm_device *dev) return true; } +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv) +{ + int pps_num; + int pps_idx; + + if (HAS_DDI(dev_priv)) + return; + /* + * This w/a is needed at least on CPT/PPT, but to be sure apply it + * everywhere where registers can be write protected. + */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pps_num = 2; + else + pps_num = 1; + + for (pps_idx = 0; pps_idx < pps_num; pps_idx++) { + u32 val = I915_READ(PP_CONTROL(pps_idx)); + + val = (val & ~PANEL_UNLOCK_MASK) | PANEL_UNLOCK_REGS; + I915_WRITE(PP_CONTROL(pps_idx), val); + } +} + +static void intel_pps_init(struct drm_i915_private *dev_priv) +{ + if (HAS_PCH_SPLIT(dev_priv) || IS_BROXTON(dev_priv)) + dev_priv->pps_mmio_base = PCH_PPS_BASE; + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + dev_priv->pps_mmio_base = VLV_PPS_BASE; + else + dev_priv->pps_mmio_base = PPS_BASE; + + intel_pps_unlock_regs_wa(dev_priv); +} + static void intel_setup_outputs(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); struct intel_encoder *encoder; bool dpd_is_edp = false; + intel_pps_init(dev_priv); + /* * intel_edp_init_connector() depends on this completing first, to * prevent the registeration of both eDP and LVDS and the incorrect @@ -14912,7 +15417,7 @@ static int intel_framebuffer_init(struct drm_device *dev, struct drm_i915_gem_object *obj) { struct drm_i915_private *dev_priv = to_i915(dev); - unsigned int aligned_height; + unsigned int tiling = i915_gem_object_get_tiling(obj); int ret; u32 pitch_limit, stride_alignment; char *format_name; @@ -14920,17 +15425,19 @@ static int intel_framebuffer_init(struct drm_device *dev, WARN_ON(!mutex_is_locked(&dev->struct_mutex)); if (mode_cmd->flags & DRM_MODE_FB_MODIFIERS) { - /* Enforce that fb modifier and tiling mode match, but only for - * X-tiled. This is needed for FBC. */ - if (!!(i915_gem_object_get_tiling(obj) == I915_TILING_X) != - !!(mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED)) { + /* + * If there's a fence, enforce that + * the fb modifier and tiling mode match. + */ + if (tiling != I915_TILING_NONE && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { DRM_DEBUG("tiling_mode doesn't match fb modifier\n"); return -EINVAL; } } else { - if (i915_gem_object_get_tiling(obj) == I915_TILING_X) + if (tiling == I915_TILING_X) { mode_cmd->modifier[0] = I915_FORMAT_MOD_X_TILED; - else if (i915_gem_object_get_tiling(obj) == I915_TILING_Y) { + } else if (tiling == I915_TILING_Y) { DRM_DEBUG("No Y tiling for legacy addfb\n"); return -EINVAL; } @@ -14954,6 +15461,16 @@ static int intel_framebuffer_init(struct drm_device *dev, return -EINVAL; } + /* + * gen2/3 display engine uses the fence if present, + * so the tiling mode must match the fb modifier exactly. + */ + if (INTEL_INFO(dev_priv)->gen < 4 && + tiling != intel_fb_modifier_to_tiling(mode_cmd->modifier[0])) { + DRM_DEBUG("tiling_mode must match fb modifier exactly on gen2/3\n"); + return -EINVAL; + } + stride_alignment = intel_fb_stride_alignment(dev_priv, mode_cmd->modifier[0], mode_cmd->pixel_format); @@ -14973,7 +15490,11 @@ static int intel_framebuffer_init(struct drm_device *dev, return -EINVAL; } - if (mode_cmd->modifier[0] == I915_FORMAT_MOD_X_TILED && + /* + * If there's a fence, enforce that + * the fb pitch and fence stride match. + */ + if (tiling != I915_TILING_NONE && mode_cmd->pitches[0] != i915_gem_object_get_stride(obj)) { DRM_DEBUG("pitch (%d) must match tiling stride (%d)\n", mode_cmd->pitches[0], @@ -15045,17 +15566,12 @@ static int intel_framebuffer_init(struct drm_device *dev, if (mode_cmd->offsets[0] != 0) return -EINVAL; - aligned_height = intel_fb_align_height(dev, mode_cmd->height, - mode_cmd->pixel_format, - mode_cmd->modifier[0]); - /* FIXME drm helper for size checks (especially planar formats)? */ - if (obj->base.size < aligned_height * mode_cmd->pitches[0]) - return -EINVAL; - drm_helper_mode_fill_fb_struct(&intel_fb->base, mode_cmd); intel_fb->obj = obj; - intel_fill_fb_info(dev_priv, &intel_fb->base); + ret = intel_fill_fb_info(dev_priv, &intel_fb->base); + if (ret) + return ret; ret = drm_framebuffer_init(dev, &intel_fb->base, &intel_fb_funcs); if (ret) { @@ -15768,6 +16284,13 @@ static bool intel_encoder_has_connectors(struct intel_encoder *encoder) return false; } +static bool has_pch_trancoder(struct drm_i915_private *dev_priv, + enum transcoder pch_transcoder) +{ + return HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) || + (HAS_PCH_LPT_H(dev_priv) && pch_transcoder == TRANSCODER_A); +} + static void intel_sanitize_crtc(struct intel_crtc *crtc) { struct drm_device *dev = crtc->base.dev; @@ -15846,7 +16369,17 @@ static void intel_sanitize_crtc(struct intel_crtc *crtc) * worst a fifo underrun happens which also sets this to false. */ crtc->cpu_fifo_underrun_disabled = true; - crtc->pch_fifo_underrun_disabled = true; + /* + * We track the PCH trancoder underrun reporting state + * within the crtc. With crtc for pipe A housing the underrun + * reporting state for PCH transcoder A, crtc for pipe B housing + * it for PCH transcoder B, etc. LPT-H has only PCH transcoder A, + * and marking underrun reporting as disabled for the non-existing + * PCH transcoders B and C would prevent enabling the south + * error interrupt (see cpt_can_enable_serr_int()). + */ + if (has_pch_trancoder(dev_priv, (enum transcoder)crtc->pipe)) + crtc->pch_fifo_underrun_disabled = true; } } @@ -16160,9 +16693,10 @@ void intel_display_resume(struct drm_device *dev) struct drm_atomic_state *state = dev_priv->modeset_restore_state; struct drm_modeset_acquire_ctx ctx; int ret; - bool setup = false; dev_priv->modeset_restore_state = NULL; + if (state) + state->acquire_ctx = &ctx; /* * This is a cludge because with real atomic modeset mode_config.mutex @@ -16173,43 +16707,17 @@ void intel_display_resume(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); -retry: - ret = drm_modeset_lock_all_ctx(dev, &ctx); - - if (ret == 0 && !setup) { - setup = true; - - intel_modeset_setup_hw_state(dev); - i915_redisable_vga(dev); - } - - if (ret == 0 && state) { - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i; - - state->acquire_ctx = &ctx; - - /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * Force recalculation even if we restore - * current state. With fast modeset this may not result - * in a modeset when the state is compatible. - */ - crtc_state->mode_changed = true; - } - - ret = drm_atomic_commit(state); - } + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (ret != -EDEADLK) + break; - if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); - goto retry; } + if (!ret) + ret = __intel_display_resume(dev, state); + drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); mutex_unlock(&dev->mode_config.mutex); @@ -16225,7 +16733,6 @@ void intel_modeset_gem_init(struct drm_device *dev) struct drm_i915_private *dev_priv = to_i915(dev); struct drm_crtc *c; struct drm_i915_gem_object *obj; - int ret; intel_init_gt_powersave(dev_priv); @@ -16239,15 +16746,17 @@ void intel_modeset_gem_init(struct drm_device *dev) * for this. */ for_each_crtc(dev, c) { + struct i915_vma *vma; + obj = intel_fb_obj(c->primary->fb); if (obj == NULL) continue; mutex_lock(&dev->struct_mutex); - ret = intel_pin_and_fence_fb_obj(c->primary->fb, + vma = intel_pin_and_fence_fb_obj(c->primary->fb, c->primary->state->rotation); mutex_unlock(&dev->struct_mutex); - if (ret) { + if (IS_ERR(vma)) { DRM_ERROR("failed to pin boot fb on pipe %d\n", to_intel_crtc(c)->pipe); drm_framebuffer_unreference(c->primary->fb); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 8fe2afa..364db90 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -256,6 +256,8 @@ intel_dp_init_panel_power_sequencer(struct drm_device *dev, static void intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, struct intel_dp *intel_dp); +static void +intel_dp_pps_init(struct drm_device *dev, struct intel_dp *intel_dp); static void pps_lock(struct intel_dp *intel_dp) { @@ -463,13 +465,13 @@ typedef bool (*vlv_pipe_check)(struct drm_i915_private *dev_priv, static bool vlv_pipe_has_pp_on(struct drm_i915_private *dev_priv, enum pipe pipe) { - return I915_READ(VLV_PIPE_PP_STATUS(pipe)) & PP_ON; + return I915_READ(PP_STATUS(pipe)) & PP_ON; } static bool vlv_pipe_has_vdd_on(struct drm_i915_private *dev_priv, enum pipe pipe) { - return I915_READ(VLV_PIPE_PP_CONTROL(pipe)) & EDP_FORCE_VDD; + return I915_READ(PP_CONTROL(pipe)) & EDP_FORCE_VDD; } static bool vlv_pipe_any(struct drm_i915_private *dev_priv, @@ -486,7 +488,7 @@ vlv_initial_pps_pipe(struct drm_i915_private *dev_priv, enum pipe pipe; for (pipe = PIPE_A; pipe <= PIPE_B; pipe++) { - u32 port_sel = I915_READ(VLV_PIPE_PP_ON_DELAYS(pipe)) & + u32 port_sel = I915_READ(PP_ON_DELAYS(pipe)) & PANEL_PORT_SELECT_MASK; if (port_sel != PANEL_PORT_SELECT_VLV(port)) @@ -583,30 +585,21 @@ static void intel_pps_get_registers(struct drm_i915_private *dev_priv, struct intel_dp *intel_dp, struct pps_registers *regs) { + int pps_idx = 0; + memset(regs, 0, sizeof(*regs)); - if (IS_BROXTON(dev_priv)) { - int idx = bxt_power_sequencer_idx(intel_dp); - - regs->pp_ctrl = BXT_PP_CONTROL(idx); - regs->pp_stat = BXT_PP_STATUS(idx); - regs->pp_on = BXT_PP_ON_DELAYS(idx); - regs->pp_off = BXT_PP_OFF_DELAYS(idx); - } else if (HAS_PCH_SPLIT(dev_priv)) { - regs->pp_ctrl = PCH_PP_CONTROL; - regs->pp_stat = PCH_PP_STATUS; - regs->pp_on = PCH_PP_ON_DELAYS; - regs->pp_off = PCH_PP_OFF_DELAYS; - regs->pp_div = PCH_PP_DIVISOR; - } else { - enum pipe pipe = vlv_power_sequencer_pipe(intel_dp); + if (IS_BROXTON(dev_priv)) + pps_idx = bxt_power_sequencer_idx(intel_dp); + else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + pps_idx = vlv_power_sequencer_pipe(intel_dp); - regs->pp_ctrl = VLV_PIPE_PP_CONTROL(pipe); - regs->pp_stat = VLV_PIPE_PP_STATUS(pipe); - regs->pp_on = VLV_PIPE_PP_ON_DELAYS(pipe); - regs->pp_off = VLV_PIPE_PP_OFF_DELAYS(pipe); - regs->pp_div = VLV_PIPE_PP_DIVISOR(pipe); - } + regs->pp_ctrl = PP_CONTROL(pps_idx); + regs->pp_stat = PP_STATUS(pps_idx); + regs->pp_on = PP_ON_DELAYS(pps_idx); + regs->pp_off = PP_OFF_DELAYS(pps_idx); + if (!IS_BROXTON(dev_priv)) + regs->pp_div = PP_DIVISOR(pps_idx); } static i915_reg_t @@ -651,8 +644,8 @@ static int edp_notify_handler(struct notifier_block *this, unsigned long code, i915_reg_t pp_ctrl_reg, pp_div_reg; u32 pp_div; - pp_ctrl_reg = VLV_PIPE_PP_CONTROL(pipe); - pp_div_reg = VLV_PIPE_PP_DIVISOR(pipe); + pp_ctrl_reg = PP_CONTROL(pipe); + pp_div_reg = PP_DIVISOR(pipe); pp_div = I915_READ(pp_div_reg); pp_div &= PP_REFERENCE_DIVIDER_MASK; @@ -1836,7 +1829,8 @@ static u32 ironlake_get_pp_control(struct intel_dp *intel_dp) lockdep_assert_held(&dev_priv->pps_mutex); control = I915_READ(_pp_ctrl_reg(intel_dp)); - if (!IS_BROXTON(dev)) { + if (WARN_ON(!HAS_DDI(dev_priv) && + (control & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS)) { control &= ~PANEL_UNLOCK_MASK; control |= PANEL_UNLOCK_REGS; } @@ -1957,7 +1951,7 @@ static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp) DRM_DEBUG_KMS("PP_STATUS: 0x%08x PP_CONTROL: 0x%08x\n", I915_READ(pp_stat_reg), I915_READ(pp_ctrl_reg)); - if ((pp & POWER_TARGET_ON) == 0) + if ((pp & PANEL_POWER_ON) == 0) intel_dp->panel_power_off_time = ktime_get_boottime(); power_domain = intel_display_port_aux_power_domain(intel_encoder); @@ -2044,7 +2038,7 @@ static void edp_panel_on(struct intel_dp *intel_dp) POSTING_READ(pp_ctrl_reg); } - pp |= POWER_TARGET_ON; + pp |= PANEL_POWER_ON; if (!IS_GEN5(dev)) pp |= PANEL_POWER_RESET; @@ -2096,7 +2090,7 @@ static void edp_panel_off(struct intel_dp *intel_dp) pp = ironlake_get_pp_control(intel_dp); /* We need to switch off panel power _and_ force vdd, for otherwise some * panels get very unhappy and cease to work. */ - pp &= ~(POWER_TARGET_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | + pp &= ~(PANEL_POWER_ON | PANEL_POWER_RESET | EDP_FORCE_VDD | EDP_BLC_ENABLE); pp_ctrl_reg = _pp_ctrl_reg(intel_dp); @@ -2729,7 +2723,7 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp) struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp); struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); enum pipe pipe = intel_dp->pps_pipe; - i915_reg_t pp_on_reg = VLV_PIPE_PP_ON_DELAYS(pipe); + i915_reg_t pp_on_reg = PP_ON_DELAYS(pipe); edp_panel_vdd_off_sync(intel_dp); @@ -4666,13 +4660,8 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder) pps_lock(intel_dp); - /* - * Read out the current power sequencer assignment, - * in case the BIOS did something with it. - */ - if (IS_VALLEYVIEW(encoder->dev) || IS_CHERRYVIEW(encoder->dev)) - vlv_initial_power_sequencer_setup(intel_dp); - + /* Reinit the power sequencer, in case BIOS did something with it. */ + intel_dp_pps_init(encoder->dev, intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); @@ -5020,6 +5009,17 @@ intel_dp_init_panel_power_sequencer_registers(struct drm_device *dev, I915_READ(regs.pp_div)); } +static void intel_dp_pps_init(struct drm_device *dev, + struct intel_dp *intel_dp) +{ + if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { + vlv_initial_power_sequencer_setup(intel_dp); + } else { + intel_dp_init_panel_power_sequencer(dev, intel_dp); + intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); + } +} + /** * intel_dp_set_drrs_state - program registers for RR switch to take effect * @dev: DRM device @@ -5434,14 +5434,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp, pps_lock(intel_dp); intel_dp_init_panel_power_timestamps(intel_dp); - - if (IS_VALLEYVIEW(dev) || IS_CHERRYVIEW(dev)) { - vlv_initial_power_sequencer_setup(intel_dp); - } else { - intel_dp_init_panel_power_sequencer(dev, intel_dp); - intel_dp_init_panel_power_sequencer_registers(dev, intel_dp); - } - + intel_dp_pps_init(dev, intel_dp); intel_edp_panel_vdd_sanitize(intel_dp); pps_unlock(intel_dp); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b37b8ef..774aab3 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -178,11 +178,22 @@ struct intel_framebuffer { struct drm_framebuffer base; struct drm_i915_gem_object *obj; struct intel_rotation_info rot_info; + + /* for each plane in the normal GTT view */ + struct { + unsigned int x, y; + } normal[2]; + /* for each plane in the rotated GTT view */ + struct { + unsigned int x, y; + unsigned int pitch; /* pixels */ + } rotated[2]; }; struct intel_fbdev { struct drm_fb_helper helper; struct intel_framebuffer *fb; + struct i915_vma *vma; async_cookie_t cookie; int preferred_bpp; }; @@ -340,6 +351,15 @@ struct intel_plane_state { struct drm_plane_state base; struct drm_rect clip; + struct { + u32 offset; + int x, y; + } main; + struct { + u32 offset; + int x, y; + } aux; + /* * scaler_id * = -1 : not using a scaler @@ -1153,12 +1173,18 @@ int vlv_get_cck_clock(struct drm_i915_private *dev_priv, const char *name, u32 reg, int ref_freq); extern const struct drm_plane_funcs intel_plane_funcs; void intel_init_display_hooks(struct drm_i915_private *dev_priv); +unsigned int intel_fb_xy_to_linear(int x, int y, + const struct intel_plane_state *state, + int plane); +void intel_add_fb_offsets(int *x, int *y, + const struct intel_plane_state *state, int plane); unsigned int intel_rotation_info_size(const struct intel_rotation_info *rot_info); bool intel_has_pending_fb_unpin(struct drm_device *dev); void intel_mark_busy(struct drm_i915_private *dev_priv); void intel_mark_idle(struct drm_i915_private *dev_priv); void intel_crtc_restore_mode(struct drm_crtc *crtc); int intel_display_suspend(struct drm_device *dev); +void intel_pps_unlock_regs_wa(struct drm_i915_private *dev_priv); void intel_encoder_destroy(struct drm_encoder *encoder); int intel_connector_init(struct intel_connector *); struct intel_connector *intel_connector_alloc(void); @@ -1214,8 +1240,8 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector, void intel_release_load_detect_pipe(struct drm_connector *connector, struct intel_load_detect_pipe *old, struct drm_modeset_acquire_ctx *ctx); -int intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, - unsigned int rotation); +struct i915_vma * +intel_pin_and_fence_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); void intel_unpin_fb_obj(struct drm_framebuffer *fb, unsigned int rotation); struct drm_framebuffer * __intel_framebuffer_create(struct drm_device *dev, @@ -1277,9 +1303,7 @@ void assert_pipe(struct drm_i915_private *dev_priv, enum pipe pipe, bool state); #define assert_pipe_enabled(d, p) assert_pipe(d, p, true) #define assert_pipe_disabled(d, p) assert_pipe(d, p, false) u32 intel_compute_tile_offset(int *x, int *y, - const struct drm_framebuffer *fb, int plane, - unsigned int pitch, - unsigned int rotation); + const struct intel_plane_state *state, int plane); void intel_prepare_reset(struct drm_i915_private *dev_priv); void intel_finish_reset(struct drm_i915_private *dev_priv); void hsw_enable_pc8(struct drm_i915_private *dev_priv); @@ -1322,13 +1346,14 @@ void intel_mode_from_pipe_config(struct drm_display_mode *mode, int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); -u32 intel_plane_obj_offset(struct intel_plane *intel_plane, - struct drm_i915_gem_object *obj, - unsigned int plane); +u32 intel_fb_gtt_offset(struct drm_framebuffer *fb, unsigned int rotation); u32 skl_plane_ctl_format(uint32_t pixel_format); u32 skl_plane_ctl_tiling(uint64_t fb_modifier); u32 skl_plane_ctl_rotation(unsigned int rotation); +u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, + unsigned int rotation); +int skl_check_plane_surface(struct intel_plane_state *plane_state); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index e9b301a..2e96a86 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -29,7 +29,7 @@ static const struct engine_info { const char *name; unsigned exec_id; - unsigned guc_id; + enum intel_engine_hw_id hw_id; u32 mmio_base; unsigned irq_shift; int (*init_legacy)(struct intel_engine_cs *engine); @@ -38,7 +38,7 @@ static const struct engine_info { [RCS] = { .name = "render ring", .exec_id = I915_EXEC_RENDER, - .guc_id = GUC_RENDER_ENGINE, + .hw_id = RCS_HW, .mmio_base = RENDER_RING_BASE, .irq_shift = GEN8_RCS_IRQ_SHIFT, .init_execlists = logical_render_ring_init, @@ -47,7 +47,7 @@ static const struct engine_info { [BCS] = { .name = "blitter ring", .exec_id = I915_EXEC_BLT, - .guc_id = GUC_BLITTER_ENGINE, + .hw_id = BCS_HW, .mmio_base = BLT_RING_BASE, .irq_shift = GEN8_BCS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -56,7 +56,7 @@ static const struct engine_info { [VCS] = { .name = "bsd ring", .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE, + .hw_id = VCS_HW, .mmio_base = GEN6_BSD_RING_BASE, .irq_shift = GEN8_VCS1_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -65,7 +65,7 @@ static const struct engine_info { [VCS2] = { .name = "bsd2 ring", .exec_id = I915_EXEC_BSD, - .guc_id = GUC_VIDEO_ENGINE2, + .hw_id = VCS2_HW, .mmio_base = GEN8_BSD2_RING_BASE, .irq_shift = GEN8_VCS2_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -74,7 +74,7 @@ static const struct engine_info { [VECS] = { .name = "video enhancement ring", .exec_id = I915_EXEC_VEBOX, - .guc_id = GUC_VIDEOENHANCE_ENGINE, + .hw_id = VECS_HW, .mmio_base = VEBOX_RING_BASE, .irq_shift = GEN8_VECS_IRQ_SHIFT, .init_execlists = logical_xcs_ring_init, @@ -93,7 +93,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->i915 = dev_priv; engine->name = info->name; engine->exec_id = info->exec_id; - engine->hw_id = engine->guc_id = info->guc_id; + engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = info->mmio_base; engine->irq_shift = info->irq_shift; @@ -109,6 +109,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, int intel_engines_init(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_device_info *device_info = mkwrite_device_info(dev_priv); unsigned int mask = 0; int (*init)(struct intel_engine_cs *engine); unsigned int i; @@ -142,11 +143,10 @@ int intel_engines_init(struct drm_device *dev) * are added to the driver by a warning and disabling the forgotten * engines. */ - if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) { - struct intel_device_info *info = - (struct intel_device_info *)&dev_priv->info; - info->ring_mask = mask; - } + if (WARN_ON(mask != INTEL_INFO(dev_priv)->ring_mask)) + device_info->ring_mask = mask; + + device_info->num_rings = hweight32(mask); return 0; @@ -161,9 +161,56 @@ cleanup: return ret; } +void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) +{ + struct drm_i915_private *dev_priv = engine->i915; + + /* Our semaphore implementation is strictly monotonic (i.e. we proceed + * so long as the semaphore value in the register/page is greater + * than the sync value), so whenever we reset the seqno, + * so long as we reset the tracking semaphore value to 0, it will + * always be before the next request's seqno. If we don't reset + * the semaphore value, then when the seqno moves backwards all + * future waits will complete instantly (causing rendering corruption). + */ + if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { + I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); + I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); + if (HAS_VEBOX(dev_priv)) + I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); + } + if (dev_priv->semaphore) { + struct page *page = i915_vma_first_page(dev_priv->semaphore); + void *semaphores; + + /* Semaphores are in noncoherent memory, flush to be safe */ + semaphores = kmap(page); + memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); + drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), + I915_NUM_ENGINES * gen8_semaphore_seqno_size); + kunmap(page); + } + memset(engine->semaphore.sync_seqno, 0, + sizeof(engine->semaphore.sync_seqno)); + + intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); + if (engine->irq_seqno_barrier) + engine->irq_seqno_barrier(engine); + engine->last_submitted_seqno = seqno; + + engine->hangcheck.seqno = seqno; + + /* After manually advancing the seqno, fake the interrupt in case + * there are any waiters for that seqno. + */ + intel_engine_wakeup(engine); +} + void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } static void intel_engine_init_requests(struct intel_engine_cs *engine) @@ -192,6 +239,49 @@ void intel_engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_requests(engine); intel_engine_init_hangcheck(engine); i915_gem_batch_pool_init(engine, &engine->batch_pool); + + intel_engine_init_cmd_parser(engine); +} + +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int ret; + + WARN_ON(engine->scratch); + + obj = i915_gem_object_create_stolen(&engine->i915->drm, size); + if (!obj) + obj = i915_gem_object_create(&engine->i915->drm, size); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate scratch page\n"); + return PTR_ERR(obj); + } + + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err_unref; + } + + ret = i915_vma_pin(vma, 0, 4096, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_unref; + + engine->scratch = vma; + DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); + return 0; + +err_unref: + i915_gem_object_put(obj); + return ret; +} + +static void intel_engine_cleanup_scratch(struct intel_engine_cs *engine) +{ + i915_vma_unpin_and_release(&engine->scratch); } /** @@ -213,7 +303,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) if (ret) return ret; - return intel_engine_init_cmd_parser(engine); + return 0; } /** @@ -225,7 +315,9 @@ int intel_engine_init_common(struct intel_engine_cs *engine) */ void intel_engine_cleanup_common(struct intel_engine_cs *engine) { - intel_engine_cleanup_cmd_parser(engine); + intel_engine_cleanup_scratch(engine); + intel_engine_fini_breadcrumbs(engine); + intel_engine_cleanup_cmd_parser(engine); i915_gem_batch_pool_fini(&engine->batch_pool); } diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index e67b09a..bf8b22a 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -190,9 +190,13 @@ static void g4x_fbc_activate(struct drm_i915_private *dev_priv) dpfc_ctl |= DPFC_CTL_LIMIT_2X; else dpfc_ctl |= DPFC_CTL_LIMIT_1X; - dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; - I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN | params->fb.fence_reg; + I915_WRITE(DPFC_FENCE_YOFF, params->crtc.fence_y_offset); + } else { + I915_WRITE(DPFC_FENCE_YOFF, 0); + } /* enable it... */ I915_WRITE(DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); @@ -244,21 +248,29 @@ static void ilk_fbc_activate(struct drm_i915_private *dev_priv) dpfc_ctl |= DPFC_CTL_LIMIT_1X; break; } - dpfc_ctl |= DPFC_CTL_FENCE_EN; - if (IS_GEN5(dev_priv)) - dpfc_ctl |= params->fb.fence_reg; + + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= DPFC_CTL_FENCE_EN; + if (IS_GEN5(dev_priv)) + dpfc_ctl |= params->fb.fence_reg; + if (IS_GEN6(dev_priv)) { + I915_WRITE(SNB_DPFC_CTL_SA, + SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, + params->crtc.fence_y_offset); + } + } else { + if (IS_GEN6(dev_priv)) { + I915_WRITE(SNB_DPFC_CTL_SA, 0); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0); + } + } I915_WRITE(ILK_DPFC_FENCE_YOFF, params->crtc.fence_y_offset); I915_WRITE(ILK_FBC_RT_BASE, params->fb.ggtt_offset | ILK_FBC_RT_VALID); /* enable it... */ I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - if (IS_GEN6(dev_priv)) { - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); - } - intel_fbc_recompress(dev_priv); } @@ -305,7 +317,15 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) break; } - dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + if (params->fb.fence_reg != I915_FENCE_REG_NONE) { + dpfc_ctl |= IVB_DPFC_CTL_FENCE_EN; + I915_WRITE(SNB_DPFC_CTL_SA, + SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); + } else { + I915_WRITE(SNB_DPFC_CTL_SA,0); + I915_WRITE(DPFC_CPU_FENCE_OFFSET, 0); + } if (dev_priv->fbc.false_color) dpfc_ctl |= FBC_CTL_FALSE_COLOR; @@ -324,10 +344,6 @@ static void gen7_fbc_activate(struct drm_i915_private *dev_priv) I915_WRITE(ILK_DPFC_CONTROL, dpfc_ctl | DPFC_CTL_EN); - I915_WRITE(SNB_DPFC_CTL_SA, - SNB_CPU_FENCE_ENABLE | params->fb.fence_reg); - I915_WRITE(DPFC_CPU_FENCE_OFFSET, params->crtc.fence_y_offset); - intel_fbc_recompress(dev_priv); } @@ -709,6 +725,14 @@ static bool intel_fbc_hw_tracking_covers_screen(struct intel_crtc *crtc) return effective_w <= max_w && effective_h <= max_h; } +/* XXX replace me when we have VMA tracking for intel_plane_state */ +static int get_fence_id(struct drm_framebuffer *fb) +{ + struct i915_vma *vma = i915_gem_object_to_ggtt(intel_fb_obj(fb), NULL); + + return vma && vma->fence ? vma->fence->id : I915_FENCE_REG_NONE; +} + static void intel_fbc_update_state_cache(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state) @@ -737,10 +761,10 @@ static void intel_fbc_update_state_cache(struct intel_crtc *crtc, /* FIXME: We lack the proper locking here, so only run this on the * platforms that need. */ if (IS_GEN(dev_priv, 5, 6)) - cache->fb.ilk_ggtt_offset = i915_gem_obj_ggtt_offset(obj); + cache->fb.ilk_ggtt_offset = i915_gem_object_ggtt_offset(obj, NULL); cache->fb.pixel_format = fb->pixel_format; cache->fb.stride = fb->pitches[0]; - cache->fb.fence_reg = obj->fence_reg; + cache->fb.fence_reg = get_fence_id(fb); cache->fb.tiling_mode = i915_gem_object_get_tiling(obj); } @@ -768,11 +792,17 @@ static bool intel_fbc_can_activate(struct intel_crtc *crtc) /* The use of a CPU fence is mandatory in order to detect writes * by the CPU to the scanout and trigger updates to the FBC. + * + * Note that is possible for a tiled surface to be unmappable (and + * so have no fence associated with it) due to aperture constaints + * at the time of pinning. */ if (cache->fb.tiling_mode != I915_TILING_X || cache->fb.fence_reg == I915_FENCE_REG_NONE) { - fbc->no_fbc_reason = "framebuffer not tiled or fenced"; - return false; + if (INTEL_GEN(dev_priv) < 5) { + fbc->no_fbc_reason = "framebuffer not tiled or fenced"; + return false; + } } if (INTEL_INFO(dev_priv)->gen <= 4 && !IS_G4X(dev_priv) && cache->plane.rotation != DRM_ROTATE_0) { diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 2c14dfc..4003e49 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -187,7 +187,6 @@ static int intelfb_create(struct drm_fb_helper *helper, struct fb_info *info; struct drm_framebuffer *fb; struct i915_vma *vma; - struct drm_i915_gem_object *obj; bool prealloc = false; void __iomem *vaddr; int ret; @@ -215,17 +214,17 @@ static int intelfb_create(struct drm_fb_helper *helper, sizes->fb_height = intel_fb->base.height; } - obj = intel_fb->obj; - mutex_lock(&dev->struct_mutex); /* Pin the GGTT vma for our access via info->screen_base. * This also validates that any existing fb inherited from the * BIOS is suitable for own access. */ - ret = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); - if (ret) + vma = intel_pin_and_fence_fb_obj(&ifbdev->fb->base, DRM_ROTATE_0); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); goto out_unlock; + } info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { @@ -245,13 +244,11 @@ static int intelfb_create(struct drm_fb_helper *helper, info->flags = FBINFO_DEFAULT | FBINFO_CAN_FORCE_OUTPUT; info->fbops = &intelfb_ops; - vma = i915_gem_obj_to_ggtt(obj); - /* setup aperture base/size for vesafb takeover */ info->apertures->ranges[0].base = dev->mode_config.fb_base; info->apertures->ranges[0].size = ggtt->mappable_end; - info->fix.smem_start = dev->mode_config.fb_base + vma->node.start; + info->fix.smem_start = dev->mode_config.fb_base + i915_ggtt_offset(vma); info->fix.smem_len = vma->node.size; vaddr = i915_vma_pin_iomap(vma); @@ -273,14 +270,14 @@ static int intelfb_create(struct drm_fb_helper *helper, * If the object is stolen however, it will be full of whatever * garbage was left in there. */ - if (ifbdev->fb->obj->stolen && !prealloc) + if (intel_fb->obj->stolen && !prealloc) memset_io(info->screen_base, 0, info->screen_size); /* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */ - DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08llx, bo %p\n", - fb->width, fb->height, - i915_gem_obj_ggtt_offset(obj), obj); + DRM_DEBUG_KMS("allocated %dx%d fb: 0x%08x\n", + fb->width, fb->height, i915_ggtt_offset(vma)); + ifbdev->vma = vma; mutex_unlock(&dev->struct_mutex); vga_switcheroo_client_fb_set(dev->pdev, info); diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index 623cf26..c973262 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -63,26 +63,25 @@ struct drm_i915_gem_request; * retcode: errno from last guc_submit() */ struct i915_guc_client { - struct drm_i915_gem_object *client_obj; + struct i915_vma *vma; void *client_base; /* first page (only) of above */ struct i915_gem_context *owner; struct intel_guc *guc; + + uint32_t engines; /* bitmap of (host) engine ids */ uint32_t priority; uint32_t ctx_index; - uint32_t proc_desc_offset; + uint32_t doorbell_offset; uint32_t cookie; uint16_t doorbell_id; - uint16_t padding; /* Maintain alignment */ + uint16_t padding[3]; /* Maintain alignment */ uint32_t wq_offset; uint32_t wq_size; uint32_t wq_tail; - uint32_t unused; /* Was 'wq_head' */ - uint32_t no_wq_space; - uint32_t q_fail; /* No longer used */ uint32_t b_fail; int retcode; @@ -125,11 +124,10 @@ struct intel_guc_fw { struct intel_guc { struct intel_guc_fw guc_fw; uint32_t log_flags; - struct drm_i915_gem_object *log_obj; - - struct drm_i915_gem_object *ads_obj; + struct i915_vma *log_vma; - struct drm_i915_gem_object *ctx_pool_obj; + struct i915_vma *ads_vma; + struct i915_vma *ctx_pool_vma; struct ida ctx_ids; struct i915_guc_client *execbuf_client; diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c index 3763e30..324812d 100644 --- a/drivers/gpu/drm/i915/intel_guc_loader.c +++ b/drivers/gpu/drm/i915/intel_guc_loader.c @@ -59,13 +59,25 @@ * */ -#define I915_SKL_GUC_UCODE "i915/skl_guc_ver6_1.bin" +#define SKL_FW_MAJOR 6 +#define SKL_FW_MINOR 1 + +#define BXT_FW_MAJOR 8 +#define BXT_FW_MINOR 7 + +#define KBL_FW_MAJOR 9 +#define KBL_FW_MINOR 14 + +#define GUC_FW_PATH(platform, major, minor) \ + "i915/" __stringify(platform) "_guc_ver" __stringify(major) "_" __stringify(minor) ".bin" + +#define I915_SKL_GUC_UCODE GUC_FW_PATH(skl, SKL_FW_MAJOR, SKL_FW_MINOR) MODULE_FIRMWARE(I915_SKL_GUC_UCODE); -#define I915_BXT_GUC_UCODE "i915/bxt_guc_ver8_7.bin" +#define I915_BXT_GUC_UCODE GUC_FW_PATH(bxt, BXT_FW_MAJOR, BXT_FW_MINOR) MODULE_FIRMWARE(I915_BXT_GUC_UCODE); -#define I915_KBL_GUC_UCODE "i915/kbl_guc_ver9_14.bin" +#define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR) MODULE_FIRMWARE(I915_KBL_GUC_UCODE); /* User-friendly representation of an enum */ @@ -181,16 +193,15 @@ static void set_guc_init_params(struct drm_i915_private *dev_priv) i915.guc_log_level << GUC_LOG_VERBOSITY_SHIFT; } - if (guc->ads_obj) { - u32 ads = (u32)i915_gem_obj_ggtt_offset(guc->ads_obj) - >> PAGE_SHIFT; + if (guc->ads_vma) { + u32 ads = i915_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED; } /* If GuC submission is enabled, set up additional parameters here */ if (i915.enable_guc_submission) { - u32 pgs = i915_gem_obj_ggtt_offset(dev_priv->guc.ctx_pool_obj); + u32 pgs = i915_ggtt_offset(dev_priv->guc.ctx_pool_vma); u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16; pgs >>= PAGE_SHIFT; @@ -238,12 +249,12 @@ static inline bool guc_ucode_response(struct drm_i915_private *dev_priv, * Note that GuC needs the CSS header plus uKernel code to be copied by the * DMA engine in one operation, whereas the RSA signature is loaded via MMIO. */ -static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) +static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv, + struct i915_vma *vma) { struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; - struct drm_i915_gem_object *fw_obj = guc_fw->guc_fw_obj; unsigned long offset; - struct sg_table *sg = fw_obj->pages; + struct sg_table *sg = vma->pages; u32 status, rsa[UOS_RSA_SCRATCH_MAX_COUNT]; int i, ret = 0; @@ -260,7 +271,7 @@ static int guc_ucode_xfer_dma(struct drm_i915_private *dev_priv) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = i915_gem_obj_ggtt_offset(fw_obj) + guc_fw->header_offset; + offset = i915_ggtt_offset(vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -315,6 +326,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) { struct intel_guc_fw *guc_fw = &dev_priv->guc.guc_fw; struct drm_device *dev = &dev_priv->drm; + struct i915_vma *vma; int ret; ret = i915_gem_object_set_to_gtt_domain(guc_fw->guc_fw_obj, false); @@ -323,10 +335,10 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) return ret; } - ret = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); - if (ret) { - DRM_DEBUG_DRIVER("pin failed %d\n", ret); - return ret; + vma = i915_gem_object_ggtt_pin(guc_fw->guc_fw_obj, NULL, 0, 0, 0); + if (IS_ERR(vma)) { + DRM_DEBUG_DRIVER("pin failed %d\n", (int)PTR_ERR(vma)); + return PTR_ERR(vma); } /* Invalidate GuC TLB to let GuC take the latest updates to GTT. */ @@ -369,7 +381,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) set_guc_init_params(dev_priv); - ret = guc_ucode_xfer_dma(dev_priv); + ret = guc_ucode_xfer_dma(dev_priv, vma); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); @@ -377,7 +389,7 @@ static int guc_ucode_xfer(struct drm_i915_private *dev_priv) * We keep the object pages for reuse during resume. But we can unpin it * now that DMA has completed, so it doesn't continue to take up space. */ - i915_gem_object_ggtt_unpin(guc_fw->guc_fw_obj); + i915_vma_unpin(vma); return ret; } @@ -697,16 +709,16 @@ void intel_guc_init(struct drm_device *dev) fw_path = NULL; } else if (IS_SKYLAKE(dev)) { fw_path = I915_SKL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 6; - guc_fw->guc_fw_minor_wanted = 1; + guc_fw->guc_fw_major_wanted = SKL_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = SKL_FW_MINOR; } else if (IS_BROXTON(dev)) { fw_path = I915_BXT_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 8; - guc_fw->guc_fw_minor_wanted = 7; + guc_fw->guc_fw_major_wanted = BXT_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = BXT_FW_MINOR; } else if (IS_KABYLAKE(dev)) { fw_path = I915_KBL_GUC_UCODE; - guc_fw->guc_fw_major_wanted = 9; - guc_fw->guc_fw_minor_wanted = 14; + guc_fw->guc_fw_major_wanted = KBL_FW_MAJOR; + guc_fw->guc_fw_minor_wanted = KBL_FW_MINOR; } else { fw_path = ""; /* unknown device */ } diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 5dc2c20..334d47b 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -477,7 +477,8 @@ void intel_hpd_init(struct drm_i915_private *dev_priv) spin_unlock_irq(&dev_priv->irq_lock); } -void i915_hpd_poll_init_work(struct work_struct *work) { +static void i915_hpd_poll_init_work(struct work_struct *work) +{ struct drm_i915_private *dev_priv = container_of(work, struct drm_i915_private, hotplug.poll_init_work); diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 309c5d9..6b49df4 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -315,7 +315,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, desc = ctx->desc_template; /* bits 3-4 */ desc |= engine->ctx_desc_template; /* bits 0-11 */ - desc |= ce->lrc_vma->node.start + LRC_PPHWSP_PN * PAGE_SIZE; + desc |= i915_ggtt_offset(ce->state) + LRC_PPHWSP_PN * PAGE_SIZE; /* bits 12-31 */ desc |= (u64)ctx->hw_id << GEN8_CTX_ID_SHIFT; /* bits 32-52 */ @@ -763,7 +763,6 @@ void intel_execlists_cancel_requests(struct intel_engine_cs *engine) static int intel_lr_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = ctx->i915; struct intel_context *ce = &ctx->engine[engine->id]; void *vaddr; u32 *lrc_reg_state; @@ -774,16 +773,15 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ce->pin_count++) return 0; - ret = i915_gem_object_ggtt_pin(ce->state, NULL, - 0, GEN8_LR_CONTEXT_ALIGN, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + ret = i915_vma_pin(ce->state, 0, GEN8_LR_CONTEXT_ALIGN, + PIN_OFFSET_BIAS | GUC_WOPCM_TOP | PIN_GLOBAL); if (ret) goto err; - vaddr = i915_gem_object_pin_map(ce->state); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); - goto unpin_ctx_obj; + goto unpin_vma; } lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; @@ -792,24 +790,26 @@ static int intel_lr_context_pin(struct i915_gem_context *ctx, if (ret) goto unpin_map; - ce->lrc_vma = i915_gem_obj_to_ggtt(ce->state); intel_lr_context_descriptor_update(ctx, engine); - lrc_reg_state[CTX_RING_BUFFER_START+1] = ce->ring->vma->node.start; + lrc_reg_state[CTX_RING_BUFFER_START+1] = + i915_ggtt_offset(ce->ring->vma); ce->lrc_reg_state = lrc_reg_state; - ce->state->dirty = true; + ce->state->obj->dirty = true; /* Invalidate GuC TLB. */ - if (i915.enable_guc_submission) + if (i915.enable_guc_submission) { + struct drm_i915_private *dev_priv = ctx->i915; I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE); + } i915_gem_context_get(ctx); return 0; unpin_map: - i915_gem_object_unpin_map(ce->state); -unpin_ctx_obj: - i915_gem_object_ggtt_unpin(ce->state); + i915_gem_object_unpin_map(ce->state->obj); +unpin_vma: + __i915_vma_unpin(ce->state); err: ce->pin_count = 0; return ret; @@ -828,12 +828,8 @@ void intel_lr_context_unpin(struct i915_gem_context *ctx, intel_ring_unpin(ce->ring); - i915_gem_object_unpin_map(ce->state); - i915_gem_object_ggtt_unpin(ce->state); - - ce->lrc_vma = NULL; - ce->lrc_desc = 0; - ce->lrc_reg_state = NULL; + i915_gem_object_unpin_map(ce->state->obj); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } @@ -919,7 +915,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); wa_ctx_emit(batch, index, MI_LOAD_REGISTER_IMM(1)); @@ -937,7 +933,7 @@ static inline int gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, wa_ctx_emit(batch, index, (MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT)); wa_ctx_emit_reg(batch, index, GEN8_L3SQCREG4); - wa_ctx_emit(batch, index, engine->scratch.gtt_offset + 256); + wa_ctx_emit(batch, index, i915_ggtt_offset(engine->scratch) + 256); wa_ctx_emit(batch, index, 0); return index; @@ -998,7 +994,7 @@ static int gen8_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:bdw,chv */ /* Actual scratch location is at 128 bytes offset */ - scratch_addr = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + scratch_addr = i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1077,8 +1073,8 @@ static int gen9_init_indirectctx_bb(struct intel_engine_cs *engine, /* WaClearSlmSpaceAtContextSwitch:kbl */ /* Actual scratch location is at 128 bytes offset */ if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_A0)) { - uint32_t scratch_addr - = engine->scratch.gtt_offset + 2*CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; wa_ctx_emit(batch, index, GFX_OP_PIPE_CONTROL(6)); wa_ctx_emit(batch, index, (PIPE_CONTROL_FLUSH_L3 | @@ -1170,45 +1166,44 @@ static int gen9_init_perctx_bb(struct intel_engine_cs *engine, static int lrc_setup_wa_ctx_obj(struct intel_engine_cs *engine, u32 size) { - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; - engine->wa_ctx.obj = i915_gem_object_create(&engine->i915->drm, - PAGE_ALIGN(size)); - if (IS_ERR(engine->wa_ctx.obj)) { - DRM_DEBUG_DRIVER("alloc LRC WA ctx backing obj failed.\n"); - ret = PTR_ERR(engine->wa_ctx.obj); - engine->wa_ctx.obj = NULL; - return ret; - } + obj = i915_gem_object_create(&engine->i915->drm, PAGE_ALIGN(size)); + if (IS_ERR(obj)) + return PTR_ERR(obj); - ret = i915_gem_object_ggtt_pin(engine->wa_ctx.obj, NULL, - 0, PAGE_SIZE, 0); - if (ret) { - DRM_DEBUG_DRIVER("pin LRC WA ctx backing obj failed: %d\n", - ret); - i915_gem_object_put(engine->wa_ctx.obj); - return ret; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err; } + err = i915_vma_pin(vma, 0, PAGE_SIZE, PIN_GLOBAL | PIN_HIGH); + if (err) + goto err; + + engine->wa_ctx.vma = vma; return 0; + +err: + i915_gem_object_put(obj); + return err; } static void lrc_destroy_wa_ctx_obj(struct intel_engine_cs *engine) { - if (engine->wa_ctx.obj) { - i915_gem_object_ggtt_unpin(engine->wa_ctx.obj); - i915_gem_object_put(engine->wa_ctx.obj); - engine->wa_ctx.obj = NULL; - } + i915_vma_unpin_and_release(&engine->wa_ctx.vma); } static int intel_init_workaround_bb(struct intel_engine_cs *engine) { - int ret; + struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; uint32_t *batch; uint32_t offset; struct page *page; - struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; + int ret; WARN_ON(engine->id != RCS); @@ -1220,7 +1215,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) } /* some WA perform writes to scratch page, ensure it is valid */ - if (engine->scratch.obj == NULL) { + if (!engine->scratch) { DRM_ERROR("scratch page not allocated for %s\n", engine->name); return -EINVAL; } @@ -1231,7 +1226,7 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } - page = i915_gem_object_get_dirty_page(wa_ctx->obj, 0); + page = i915_gem_object_get_dirty_page(wa_ctx->vma->obj, 0); batch = kmap_atomic(page); offset = 0; @@ -1278,7 +1273,7 @@ static void lrc_init_hws(struct intel_engine_cs *engine) struct drm_i915_private *dev_priv = engine->i915; I915_WRITE(RING_HWS_PGA(engine->mmio_base), - (u32)engine->status_page.gfx_addr); + engine->status_page.ggtt_offset); POSTING_READ(RING_HWS_PGA(engine->mmio_base)); } @@ -1488,7 +1483,8 @@ static int gen8_emit_flush_render(struct drm_i915_gem_request *request, { struct intel_ring *ring = request->ring; struct intel_engine_cs *engine = request->engine; - u32 scratch_addr = engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES; bool vf_flush_wa = false, dc_flush_wa = false; u32 flags = 0; int ret; @@ -1700,9 +1696,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *engine) intel_engine_cleanup_common(engine); - if (engine->status_page.obj) { - i915_gem_object_unpin_map(engine->status_page.obj); - engine->status_page.obj = NULL; + if (engine->status_page.vma) { + i915_gem_object_unpin_map(engine->status_page.vma->obj); + engine->status_page.vma = NULL; } intel_lr_context_unpin(dev_priv->kernel_context, engine); @@ -1747,19 +1743,19 @@ logical_ring_default_irqs(struct intel_engine_cs *engine) } static int -lrc_setup_hws(struct intel_engine_cs *engine, - struct drm_i915_gem_object *dctx_obj) +lrc_setup_hws(struct intel_engine_cs *engine, struct i915_vma *vma) { + const int hws_offset = LRC_PPHWSP_PN * PAGE_SIZE; void *hws; /* The HWSP is part of the default context object in LRC mode. */ - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) + - LRC_PPHWSP_PN * PAGE_SIZE; - hws = i915_gem_object_pin_map(dctx_obj); + hws = i915_gem_object_pin_map(vma->obj, I915_MAP_WB); if (IS_ERR(hws)) return PTR_ERR(hws); - engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE; - engine->status_page.obj = dctx_obj; + + engine->status_page.page_addr = hws + hws_offset; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma) + hws_offset; + engine->status_page.vma = vma; return 0; } @@ -1849,11 +1845,10 @@ int logical_render_ring_init(struct intel_engine_cs *engine) else engine->init_hw = gen8_init_render_ring; engine->init_context = gen8_init_rcs_context; - engine->cleanup = intel_fini_pipe_control; engine->emit_flush = gen8_emit_flush_render; engine->emit_request = gen8_emit_request_render; - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; @@ -1968,7 +1963,7 @@ populate_lr_context(struct i915_gem_context *ctx, return ret; } - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); if (IS_ERR(vaddr)) { ret = PTR_ERR(vaddr); DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret); @@ -2025,9 +2020,9 @@ populate_lr_context(struct i915_gem_context *ctx, RING_INDIRECT_CTX(engine->mmio_base), 0); ASSIGN_CTX_REG(reg_state, CTX_RCS_INDIRECT_CTX_OFFSET, RING_INDIRECT_CTX_OFFSET(engine->mmio_base), 0); - if (engine->wa_ctx.obj) { + if (engine->wa_ctx.vma) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - uint32_t ggtt_offset = i915_gem_obj_ggtt_offset(wa_ctx->obj); + u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); reg_state[CTX_RCS_INDIRECT_CTX+1] = (ggtt_offset + wa_ctx->indirect_ctx.offset * sizeof(uint32_t)) | @@ -2131,6 +2126,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, { struct drm_i915_gem_object *ctx_obj; struct intel_context *ce = &ctx->engine[engine->id]; + struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; int ret; @@ -2148,6 +2144,12 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, return PTR_ERR(ctx_obj); } + vma = i915_vma_create(ctx_obj, &ctx->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto error_deref_obj; + } + ring = intel_engine_create_ring(engine, ctx->ring_size); if (IS_ERR(ring)) { ret = PTR_ERR(ring); @@ -2161,7 +2163,7 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, } ce->ring = ring; - ce->state = ctx_obj; + ce->state = vma; ce->initialised = engine->init_context == NULL; return 0; @@ -2170,8 +2172,6 @@ error_ring_free: intel_ring_free(ring); error_deref_obj: i915_gem_object_put(ctx_obj); - ce->ring = NULL; - ce->state = NULL; return ret; } @@ -2182,24 +2182,23 @@ void intel_lr_context_reset(struct drm_i915_private *dev_priv, for_each_engine(engine, dev_priv) { struct intel_context *ce = &ctx->engine[engine->id]; - struct drm_i915_gem_object *ctx_obj = ce->state; void *vaddr; uint32_t *reg_state; - if (!ctx_obj) + if (!ce->state) continue; - vaddr = i915_gem_object_pin_map(ctx_obj); + vaddr = i915_gem_object_pin_map(ce->state->obj, I915_MAP_WB); if (WARN_ON(IS_ERR(vaddr))) continue; reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; - ctx_obj->dirty = true; reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL+1] = 0; - i915_gem_object_unpin_map(ctx_obj); + ce->state->obj->dirty = true; + i915_gem_object_unpin_map(ce->state->obj); ce->ring->head = 0; ce->ring->tail = 0; diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index e29f3d1..52d6ed6 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -48,6 +48,20 @@ struct intel_lvds_connector { struct notifier_block lid_notifier; }; +struct intel_lvds_pps { + /* 100us units */ + int t1_t2; + int t3; + int t4; + int t5; + int tx; + + int divider; + + int port; + bool powerdown_on_reset; +}; + struct intel_lvds_encoder { struct intel_encoder base; @@ -55,6 +69,9 @@ struct intel_lvds_encoder { i915_reg_t reg; u32 a3_power; + struct intel_lvds_pps init_pps; + u32 init_lvds_val; + struct intel_lvds_connector *attached_connector; }; @@ -136,6 +153,83 @@ static void intel_lvds_get_config(struct intel_encoder *encoder, pipe_config->base.adjusted_mode.crtc_clock = pipe_config->port_clock; } +static void intel_lvds_pps_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_lvds_pps *pps) +{ + u32 val; + + pps->powerdown_on_reset = I915_READ(PP_CONTROL(0)) & PANEL_POWER_RESET; + + val = I915_READ(PP_ON_DELAYS(0)); + pps->port = (val & PANEL_PORT_SELECT_MASK) >> + PANEL_PORT_SELECT_SHIFT; + pps->t1_t2 = (val & PANEL_POWER_UP_DELAY_MASK) >> + PANEL_POWER_UP_DELAY_SHIFT; + pps->t5 = (val & PANEL_LIGHT_ON_DELAY_MASK) >> + PANEL_LIGHT_ON_DELAY_SHIFT; + + val = I915_READ(PP_OFF_DELAYS(0)); + pps->t3 = (val & PANEL_POWER_DOWN_DELAY_MASK) >> + PANEL_POWER_DOWN_DELAY_SHIFT; + pps->tx = (val & PANEL_LIGHT_OFF_DELAY_MASK) >> + PANEL_LIGHT_OFF_DELAY_SHIFT; + + val = I915_READ(PP_DIVISOR(0)); + pps->divider = (val & PP_REFERENCE_DIVIDER_MASK) >> + PP_REFERENCE_DIVIDER_SHIFT; + val = (val & PANEL_POWER_CYCLE_DELAY_MASK) >> + PANEL_POWER_CYCLE_DELAY_SHIFT; + /* + * Remove the BSpec specified +1 (100ms) offset that accounts for a + * too short power-cycle delay due to the asynchronous programming of + * the register. + */ + if (val) + val--; + /* Convert from 100ms to 100us units */ + pps->t4 = val * 1000; + + if (INTEL_INFO(dev_priv)->gen <= 4 && + pps->t1_t2 == 0 && pps->t5 == 0 && pps->t3 == 0 && pps->tx == 0) { + DRM_DEBUG_KMS("Panel power timings uninitialized, " + "setting defaults\n"); + /* Set T2 to 40ms and T5 to 200ms in 100 usec units */ + pps->t1_t2 = 40 * 10; + pps->t5 = 200 * 10; + /* Set T3 to 35ms and Tx to 200ms in 100 usec units */ + pps->t3 = 35 * 10; + pps->tx = 200 * 10; + } + + DRM_DEBUG_DRIVER("LVDS PPS:t1+t2 %d t3 %d t4 %d t5 %d tx %d " + "divider %d port %d powerdown_on_reset %d\n", + pps->t1_t2, pps->t3, pps->t4, pps->t5, pps->tx, + pps->divider, pps->port, pps->powerdown_on_reset); +} + +static void intel_lvds_pps_init_hw(struct drm_i915_private *dev_priv, + struct intel_lvds_pps *pps) +{ + u32 val; + + val = I915_READ(PP_CONTROL(0)); + WARN_ON((val & PANEL_UNLOCK_MASK) != PANEL_UNLOCK_REGS); + if (pps->powerdown_on_reset) + val |= PANEL_POWER_RESET; + I915_WRITE(PP_CONTROL(0), val); + + I915_WRITE(PP_ON_DELAYS(0), (pps->port << PANEL_PORT_SELECT_SHIFT) | + (pps->t1_t2 << PANEL_POWER_UP_DELAY_SHIFT) | + (pps->t5 << PANEL_LIGHT_ON_DELAY_SHIFT)); + I915_WRITE(PP_OFF_DELAYS(0), (pps->t3 << PANEL_POWER_DOWN_DELAY_SHIFT) | + (pps->tx << PANEL_LIGHT_OFF_DELAY_SHIFT)); + + val = pps->divider << PP_REFERENCE_DIVIDER_SHIFT; + val |= (DIV_ROUND_UP(pps->t4, 1000) + 1) << + PANEL_POWER_CYCLE_DELAY_SHIFT; + I915_WRITE(PP_DIVISOR(0), val); +} + static void intel_pre_enable_lvds(struct intel_encoder *encoder) { struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); @@ -154,7 +248,9 @@ static void intel_pre_enable_lvds(struct intel_encoder *encoder) assert_pll_disabled(dev_priv, pipe); } - temp = I915_READ(lvds_encoder->reg); + intel_lvds_pps_init_hw(dev_priv, &lvds_encoder->init_pps); + + temp = lvds_encoder->init_lvds_val; temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP; if (HAS_PCH_CPT(dev)) { @@ -217,21 +313,12 @@ static void intel_enable_lvds(struct intel_encoder *encoder) struct intel_connector *intel_connector = &lvds_encoder->attached_connector->base; struct drm_i915_private *dev_priv = to_i915(dev); - i915_reg_t ctl_reg, stat_reg; - - if (HAS_PCH_SPLIT(dev)) { - ctl_reg = PCH_PP_CONTROL; - stat_reg = PCH_PP_STATUS; - } else { - ctl_reg = PP_CONTROL; - stat_reg = PP_STATUS; - } I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) | LVDS_PORT_EN); - I915_WRITE(ctl_reg, I915_READ(ctl_reg) | POWER_TARGET_ON); + I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, PP_ON, 1000)) + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(intel_connector); @@ -242,18 +329,9 @@ static void intel_disable_lvds(struct intel_encoder *encoder) struct drm_device *dev = encoder->base.dev; struct intel_lvds_encoder *lvds_encoder = to_lvds_encoder(&encoder->base); struct drm_i915_private *dev_priv = to_i915(dev); - i915_reg_t ctl_reg, stat_reg; - - if (HAS_PCH_SPLIT(dev)) { - ctl_reg = PCH_PP_CONTROL; - stat_reg = PCH_PP_STATUS; - } else { - ctl_reg = PP_CONTROL; - stat_reg = PP_STATUS; - } - I915_WRITE(ctl_reg, I915_READ(ctl_reg) & ~POWER_TARGET_ON); - if (intel_wait_for_register(dev_priv, stat_reg, PP_ON, 0, 1000)) + I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) & ~PANEL_POWER_ON); + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, 0, 1000)) DRM_ERROR("timed out waiting for panel to power off\n"); I915_WRITE(lvds_encoder->reg, I915_READ(lvds_encoder->reg) & ~LVDS_PORT_EN); @@ -900,17 +978,6 @@ void intel_lvds_init(struct drm_device *dev) int pipe; u8 pin; - /* - * Unlock registers and just leave them unlocked. Do this before - * checking quirk lists to avoid bogus WARNINGs. - */ - if (HAS_PCH_SPLIT(dev)) { - I915_WRITE(PCH_PP_CONTROL, - I915_READ(PCH_PP_CONTROL) | PANEL_UNLOCK_REGS); - } else if (INTEL_INFO(dev_priv)->gen < 5) { - I915_WRITE(PP_CONTROL, - I915_READ(PP_CONTROL) | PANEL_UNLOCK_REGS); - } if (!intel_lvds_supported(dev)) return; @@ -943,18 +1010,6 @@ void intel_lvds_init(struct drm_device *dev) DRM_DEBUG_KMS("LVDS is not present in VBT, but enabled anyway\n"); } - /* Set the Panel Power On/Off timings if uninitialized. */ - if (INTEL_INFO(dev_priv)->gen < 5 && - I915_READ(PP_ON_DELAYS) == 0 && I915_READ(PP_OFF_DELAYS) == 0) { - /* Set T2 to 40ms and T5 to 200ms */ - I915_WRITE(PP_ON_DELAYS, 0x019007d0); - - /* Set T3 to 35ms and Tx to 200ms */ - I915_WRITE(PP_OFF_DELAYS, 0x015e07d0); - - DRM_DEBUG_KMS("Panel power timings uninitialized, setting defaults\n"); - } - lvds_encoder = kzalloc(sizeof(*lvds_encoder), GFP_KERNEL); if (!lvds_encoder) return; @@ -1020,6 +1075,10 @@ void intel_lvds_init(struct drm_device *dev) dev->mode_config.scaling_mode_property, DRM_MODE_SCALE_ASPECT); intel_connector->panel.fitting_mode = DRM_MODE_SCALE_ASPECT; + + intel_lvds_pps_get_hw_state(dev_priv, &lvds_encoder->init_pps); + lvds_encoder->init_lvds_val = lvds; + /* * LVDS discovery: * 1) check for EDID on DDC diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 90f3ab4..a24bc8c 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -171,8 +171,8 @@ struct overlay_registers { struct intel_overlay { struct drm_i915_private *i915; struct intel_crtc *crtc; - struct drm_i915_gem_object *vid_bo; - struct drm_i915_gem_object *old_vid_bo; + struct i915_vma *vma; + struct i915_vma *old_vma; bool active; bool pfit_active; u32 pfit_vscale_ratio; /* shifted-point number, (1<<12) == 1.0 */ @@ -196,7 +196,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) if (OVERLAY_NEEDS_PHYSICAL(dev_priv)) regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_wc(dev_priv->ggtt.mappable, + regs = io_mapping_map_wc(&dev_priv->ggtt.mappable, overlay->flip_addr, PAGE_SIZE); @@ -317,15 +317,17 @@ static void intel_overlay_release_old_vid_tail(struct i915_gem_active *active, { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); - struct drm_i915_gem_object *obj = overlay->old_vid_bo; + struct i915_vma *vma; - i915_gem_track_fb(obj, NULL, - INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); + vma = fetch_and_zero(&overlay->old_vma); + if (WARN_ON(!vma)) + return; - i915_gem_object_ggtt_unpin(obj); - i915_gem_object_put(obj); + i915_gem_track_fb(vma->obj, NULL, + INTEL_FRONTBUFFER_OVERLAY(overlay->crtc->pipe)); - overlay->old_vid_bo = NULL; + i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); } static void intel_overlay_off_tail(struct i915_gem_active *active, @@ -333,15 +335,15 @@ static void intel_overlay_off_tail(struct i915_gem_active *active, { struct intel_overlay *overlay = container_of(active, typeof(*overlay), last_flip); - struct drm_i915_gem_object *obj = overlay->vid_bo; + struct i915_vma *vma; /* never have the overlay hw on without showing a frame */ - if (WARN_ON(!obj)) + vma = fetch_and_zero(&overlay->vma); + if (WARN_ON(!vma)) return; - i915_gem_object_ggtt_unpin(obj); - i915_gem_object_put(obj); - overlay->vid_bo = NULL; + i915_gem_object_unpin_from_display_plane(vma); + i915_vma_put(vma); overlay->crtc->overlay = NULL; overlay->crtc = NULL; @@ -421,7 +423,7 @@ static int intel_overlay_release_old_vid(struct intel_overlay *overlay) /* Only wait if there is actually an old frame to release to * guarantee forward progress. */ - if (!overlay->old_vid_bo) + if (!overlay->old_vma) return 0; if (I915_READ(ISR) & I915_OVERLAY_PLANE_FLIP_PENDING_INTERRUPT) { @@ -744,6 +746,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, struct drm_i915_private *dev_priv = overlay->i915; u32 swidth, swidthsw, sheight, ostride; enum pipe pipe = overlay->crtc->pipe; + struct i915_vma *vma; lockdep_assert_held(&dev_priv->drm.struct_mutex); WARN_ON(!drm_modeset_is_locked(&dev_priv->drm.mode_config.connection_mutex)); @@ -752,12 +755,12 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret != 0) return ret; - ret = i915_gem_object_pin_to_display_plane(new_bo, 0, + vma = i915_gem_object_pin_to_display_plane(new_bo, 0, &i915_ggtt_view_normal); - if (ret != 0) - return ret; + if (IS_ERR(vma)) + return PTR_ERR(vma); - ret = i915_gem_object_put_fence(new_bo); + ret = i915_vma_put_fence(vma); if (ret) goto out_unpin; @@ -798,7 +801,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, swidth = params->src_w; swidthsw = calc_swidthsw(dev_priv, params->offset_Y, tmp_width); sheight = params->src_h; - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_Y, ®s->OBUF_0Y); + iowrite32(i915_ggtt_offset(vma) + params->offset_Y, ®s->OBUF_0Y); ostride = params->stride_Y; if (params->format & I915_OVERLAY_YUV_PLANAR) { @@ -812,8 +815,10 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, params->src_w/uv_hscale); swidthsw |= max_t(u32, tmp_U, tmp_V) << 16; sheight |= (params->src_h/uv_vscale) << 16; - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_U, ®s->OBUF_0U); - iowrite32(i915_gem_obj_ggtt_offset(new_bo) + params->offset_V, ®s->OBUF_0V); + iowrite32(i915_ggtt_offset(vma) + params->offset_U, + ®s->OBUF_0U); + iowrite32(i915_ggtt_offset(vma) + params->offset_V, + ®s->OBUF_0V); ostride |= params->stride_UV << 16; } @@ -834,18 +839,18 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, if (ret) goto out_unpin; - i915_gem_track_fb(overlay->vid_bo, new_bo, + i915_gem_track_fb(overlay->vma->obj, new_bo, INTEL_FRONTBUFFER_OVERLAY(pipe)); - overlay->old_vid_bo = overlay->vid_bo; - overlay->vid_bo = new_bo; + overlay->old_vma = overlay->vma; + overlay->vma = vma; intel_frontbuffer_flip(dev_priv, INTEL_FRONTBUFFER_OVERLAY(pipe)); return 0; out_unpin: - i915_gem_object_ggtt_unpin(new_bo); + i915_gem_object_unpin_from_display_plane(vma); return ret; } @@ -1368,6 +1373,7 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) struct intel_overlay *overlay; struct drm_i915_gem_object *reg_bo; struct overlay_registers __iomem *regs; + struct i915_vma *vma = NULL; int ret; if (!HAS_OVERLAY(dev_priv)) @@ -1401,13 +1407,14 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) } overlay->flip_addr = reg_bo->phys_handle->busaddr; } else { - ret = i915_gem_object_ggtt_pin(reg_bo, NULL, + vma = i915_gem_object_ggtt_pin(reg_bo, NULL, 0, PAGE_SIZE, PIN_MAPPABLE); - if (ret) { + if (IS_ERR(vma)) { DRM_ERROR("failed to pin overlay register bo\n"); + ret = PTR_ERR(vma); goto out_free_bo; } - overlay->flip_addr = i915_gem_obj_ggtt_offset(reg_bo); + overlay->flip_addr = i915_ggtt_offset(vma); ret = i915_gem_object_set_to_gtt_domain(reg_bo, true); if (ret) { @@ -1439,8 +1446,8 @@ void intel_setup_overlay(struct drm_i915_private *dev_priv) return; out_unpin_bo: - if (!OVERLAY_NEEDS_PHYSICAL(dev_priv)) - i915_gem_object_ggtt_unpin(reg_bo); + if (vma) + i915_vma_unpin(vma); out_free_bo: i915_gem_object_put(reg_bo); out_free: @@ -1482,7 +1489,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay) regs = (struct overlay_registers __iomem *) overlay->reg_bo->phys_handle->vaddr; else - regs = io_mapping_map_atomic_wc(dev_priv->ggtt.mappable, + regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable, overlay->flip_addr); return regs; diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 81ab119..8a6751e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3115,8 +3115,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; } - WARN_ON(cstate->plane_mask && total_data_rate == 0); - return total_data_rate; } @@ -3920,9 +3918,24 @@ skl_compute_ddb(struct drm_atomic_state *state) * pretend that all pipes switched active status so that we'll * ensure a full DDB recompute. */ - if (dev_priv->wm.distrust_bios_wm) + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + intel_state->active_pipe_changes = ~0; + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + /* * If the modeset changes which CRTC's are active, we need to * recompute the DDB allocation for *all* active pipes, even @@ -5675,8 +5688,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) u32 pcbr; int pctx_size = 24*1024; - mutex_lock(&dev_priv->drm.struct_mutex); - pcbr = I915_READ(VLV_PCBR); if (pcbr) { /* BIOS set it up already, grab the pre-alloc'd space */ @@ -5712,7 +5723,6 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv) out: DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR)); dev_priv->vlv_pctx = pctx; - mutex_unlock(&dev_priv->drm.struct_mutex); } static void valleyview_cleanup_pctx(struct drm_i915_private *dev_priv) @@ -6488,6 +6498,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) intel_runtime_pm_get(dev_priv); } + mutex_lock(&dev_priv->drm.struct_mutex); mutex_lock(&dev_priv->rps.hw_lock); /* Initialize RPS limits (for userspace) */ @@ -6529,6 +6540,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->rps.boost_freq = dev_priv->rps.max_freq; mutex_unlock(&dev_priv->rps.hw_lock); + mutex_unlock(&dev_priv->drm.struct_mutex); intel_autoenable_gt_powersave(dev_priv); } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index e08a1e1..cc5bcd1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -176,7 +176,7 @@ intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; int ret; ret = intel_ring_begin(req, 6); @@ -212,7 +212,7 @@ gen6_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -286,7 +286,7 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { struct intel_ring *ring = req->ring; u32 scratch_addr = - req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -370,7 +370,8 @@ gen8_emit_pipe_control(struct drm_i915_gem_request *req, static int gen8_render_ring_flush(struct drm_i915_gem_request *req, u32 mode) { - u32 scratch_addr = req->engine->scratch.gtt_offset + 2 * CACHELINE_BYTES; + u32 scratch_addr = + i915_ggtt_offset(req->engine->scratch) + 2 * CACHELINE_BYTES; u32 flags = 0; int ret; @@ -466,7 +467,7 @@ static void intel_ring_setup_status_page(struct intel_engine_cs *engine) mmio = RING_HWS_PGA(engine->mmio_base); } - I915_WRITE(mmio, (u32)engine->status_page.gfx_addr); + I915_WRITE(mmio, engine->status_page.ggtt_offset); POSTING_READ(mmio); /* @@ -497,7 +498,7 @@ static bool stop_ring(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { I915_WRITE_MODE(engine, _MASKED_BIT_ENABLE(STOP_RING)); if (intel_wait_for_register(dev_priv, RING_MI_MODE(engine->mmio_base), @@ -519,7 +520,7 @@ static bool stop_ring(struct intel_engine_cs *engine) I915_WRITE_HEAD(engine, 0); I915_WRITE_TAIL(engine, 0); - if (!IS_GEN2(dev_priv)) { + if (INTEL_GEN(dev_priv) > 2) { (void)I915_READ_CTL(engine); I915_WRITE_MODE(engine, _MASKED_BIT_DISABLE(STOP_RING)); } @@ -531,7 +532,6 @@ static int init_ring_common(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; struct intel_ring *ring = engine->buffer; - struct drm_i915_gem_object *obj = ring->obj; int ret = 0; intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); @@ -571,7 +571,7 @@ static int init_ring_common(struct intel_engine_cs *engine) * registers with the above sequence (the readback of the HEAD registers * also enforces ordering), otherwise the hw might lose the new ring * register values. */ - I915_WRITE_START(engine, i915_gem_obj_ggtt_offset(obj)); + I915_WRITE_START(engine, i915_ggtt_offset(ring->vma)); /* WaClearRingBufHeadRegAtInit:ctg,elk */ if (I915_READ_HEAD(engine)) @@ -586,16 +586,16 @@ static int init_ring_common(struct intel_engine_cs *engine) /* If the head is still not zero, the ring is dead */ if (wait_for((I915_READ_CTL(engine) & RING_VALID) != 0 && - I915_READ_START(engine) == i915_gem_obj_ggtt_offset(obj) && + I915_READ_START(engine) == i915_ggtt_offset(ring->vma) && (I915_READ_HEAD(engine) & HEAD_ADDR) == 0, 50)) { DRM_ERROR("%s initialization failed " - "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", + "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08x]\n", engine->name, I915_READ_CTL(engine), I915_READ_CTL(engine) & RING_VALID, I915_READ_HEAD(engine), I915_READ_TAIL(engine), I915_READ_START(engine), - (unsigned long)i915_gem_obj_ggtt_offset(obj)); + i915_ggtt_offset(ring->vma)); ret = -EIO; goto out; } @@ -613,48 +613,6 @@ out: return ret; } -void intel_fini_pipe_control(struct intel_engine_cs *engine) -{ - if (engine->scratch.obj == NULL) - return; - - i915_gem_object_ggtt_unpin(engine->scratch.obj); - i915_gem_object_put(engine->scratch.obj); - engine->scratch.obj = NULL; -} - -int intel_init_pipe_control(struct intel_engine_cs *engine, int size) -{ - struct drm_i915_gem_object *obj; - int ret; - - WARN_ON(engine->scratch.obj); - - obj = i915_gem_object_create_stolen(&engine->i915->drm, size); - if (!obj) - obj = i915_gem_object_create(&engine->i915->drm, size); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate scratch page\n"); - ret = PTR_ERR(obj); - goto err; - } - - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, PIN_HIGH); - if (ret) - goto err_unref; - - engine->scratch.obj = obj; - engine->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj); - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, engine->scratch.gtt_offset); - return 0; - -err_unref: - i915_gem_object_put(engine->scratch.obj); -err: - return ret; -} - static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; @@ -1300,13 +1258,7 @@ static void render_ring_cleanup(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (dev_priv->semaphore_obj) { - i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj); - i915_gem_object_put(dev_priv->semaphore_obj); - dev_priv->semaphore_obj = NULL; - } - - intel_fini_pipe_control(engine); + i915_vma_unpin_and_release(&dev_priv->semaphore); } static int gen8_rcs_signal(struct drm_i915_gem_request *req) @@ -1317,7 +1269,7 @@ static int gen8_rcs_signal(struct drm_i915_gem_request *req) enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, (num_rings-1) * 8); if (ret) return ret; @@ -1354,7 +1306,7 @@ static int gen8_xcs_signal(struct drm_i915_gem_request *req) enum intel_engine_id id; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, (num_rings-1) * 6); if (ret) return ret; @@ -1385,18 +1337,21 @@ static int gen6_signal(struct drm_i915_gem_request *req) { struct intel_ring *ring = req->ring; struct drm_i915_private *dev_priv = req->i915; - struct intel_engine_cs *useless; - enum intel_engine_id id; + struct intel_engine_cs *engine; int ret, num_rings; - num_rings = hweight32(INTEL_INFO(dev_priv)->ring_mask); + num_rings = INTEL_INFO(dev_priv)->num_rings; ret = intel_ring_begin(req, round_up((num_rings-1) * 3, 2)); if (ret) return ret; - for_each_engine_id(useless, dev_priv, id) { - i915_reg_t mbox_reg = req->engine->semaphore.mbox.signal[id]; + for_each_engine(engine, dev_priv) { + i915_reg_t mbox_reg; + + if (!(BIT(engine->hw_id) & GEN6_SEMAPHORES_MASK)) + continue; + mbox_reg = req->engine->semaphore.mbox.signal[engine->hw_id]; if (i915_mmio_reg_valid(mbox_reg)) { intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); intel_ring_emit_reg(ring, mbox_reg); @@ -1543,7 +1498,7 @@ gen6_ring_sync_to(struct drm_i915_gem_request *req, u32 dw1 = MI_SEMAPHORE_MBOX | MI_SEMAPHORE_COMPARE | MI_SEMAPHORE_REGISTER; - u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->id]; + u32 wait_mbox = signal->engine->semaphore.mbox.wait[req->engine->hw_id]; int ret; WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); @@ -1764,7 +1719,7 @@ i830_emit_bb_start(struct drm_i915_gem_request *req, unsigned int dispatch_flags) { struct intel_ring *ring = req->ring; - u32 cs_offset = req->engine->scratch.gtt_offset; + u32 cs_offset = i915_ggtt_offset(req->engine->scratch); int ret; ret = intel_ring_begin(req, 6); @@ -1853,79 +1808,79 @@ static void cleanup_phys_status_page(struct intel_engine_cs *engine) static void cleanup_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = engine->status_page.obj; - if (obj == NULL) + vma = fetch_and_zero(&engine->status_page.vma); + if (!vma) return; - kunmap(sg_page(obj->pages->sgl)); - i915_gem_object_ggtt_unpin(obj); - i915_gem_object_put(obj); - engine->status_page.obj = NULL; + i915_vma_unpin(vma); + i915_gem_object_unpin_map(vma->obj); + i915_vma_put(vma); } static int init_status_page(struct intel_engine_cs *engine) { - struct drm_i915_gem_object *obj = engine->status_page.obj; - - if (obj == NULL) { - unsigned flags; - int ret; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + unsigned int flags; + int ret; - obj = i915_gem_object_create(&engine->i915->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate status page\n"); - return PTR_ERR(obj); - } + obj = i915_gem_object_create(&engine->i915->drm, 4096); + if (IS_ERR(obj)) { + DRM_ERROR("Failed to allocate status page\n"); + return PTR_ERR(obj); + } - ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - if (ret) - goto err_unref; - - flags = 0; - if (!HAS_LLC(engine->i915)) - /* On g33, we cannot place HWS above 256MiB, so - * restrict its pinning to the low mappable arena. - * Though this restriction is not documented for - * gen4, gen5, or byt, they also behave similarly - * and hang if the HWS is placed at the top of the - * GTT. To generalise, it appears that all !llc - * platforms have issues with us placing the HWS - * above the mappable region (even though we never - * actualy map it). - */ - flags |= PIN_MAPPABLE; - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 4096, flags); - if (ret) { -err_unref: - i915_gem_object_put(obj); - return ret; - } + ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); + if (ret) + goto err; - engine->status_page.obj = obj; + vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto err; } - engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); - engine->status_page.page_addr = kmap(sg_page(obj->pages->sgl)); - memset(engine->status_page.page_addr, 0, PAGE_SIZE); + flags = PIN_GLOBAL; + if (!HAS_LLC(engine->i915)) + /* On g33, we cannot place HWS above 256MiB, so + * restrict its pinning to the low mappable arena. + * Though this restriction is not documented for + * gen4, gen5, or byt, they also behave similarly + * and hang if the HWS is placed at the top of the + * GTT. To generalise, it appears that all !llc + * platforms have issues with us placing the HWS + * above the mappable region (even though we never + * actualy map it). + */ + flags |= PIN_MAPPABLE; + ret = i915_vma_pin(vma, 0, 4096, flags); + if (ret) + goto err; - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, engine->status_page.gfx_addr); + engine->status_page.vma = vma; + engine->status_page.ggtt_offset = i915_ggtt_offset(vma); + engine->status_page.page_addr = + i915_gem_object_pin_map(obj, I915_MAP_WB); + DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", + engine->name, i915_ggtt_offset(vma)); return 0; + +err: + i915_gem_object_put(obj); + return ret; } static int init_phys_status_page(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; - if (!dev_priv->status_page_dmah) { - dev_priv->status_page_dmah = - drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); - if (!dev_priv->status_page_dmah) - return -ENOMEM; - } + dev_priv->status_page_dmah = + drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE); + if (!dev_priv->status_page_dmah) + return -ENOMEM; engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr; memset(engine->status_page.page_addr, 0, PAGE_SIZE); @@ -1935,55 +1890,46 @@ static int init_phys_status_page(struct intel_engine_cs *engine) int intel_ring_pin(struct intel_ring *ring) { - struct drm_i915_private *dev_priv = ring->engine->i915; - struct drm_i915_gem_object *obj = ring->obj; /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - unsigned flags = PIN_OFFSET_BIAS | 4096; + unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096; + enum i915_map_type map; + struct i915_vma *vma = ring->vma; void *addr; int ret; - if (HAS_LLC(dev_priv) && !obj->stolen) { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags); - if (ret) - return ret; + GEM_BUG_ON(ring->vaddr); - ret = i915_gem_object_set_to_cpu_domain(obj, true); - if (ret) - goto err_unpin; + map = HAS_LLC(ring->engine->i915) ? I915_MAP_WB : I915_MAP_WC; - addr = i915_gem_object_pin_map(obj); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } else { - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, - flags | PIN_MAPPABLE); - if (ret) - return ret; + if (vma->obj->stolen) + flags |= PIN_MAPPABLE; - ret = i915_gem_object_set_to_gtt_domain(obj, true); - if (ret) - goto err_unpin; + if (!(vma->flags & I915_VMA_GLOBAL_BIND)) { + if (flags & PIN_MAPPABLE || map == I915_MAP_WC) + ret = i915_gem_object_set_to_gtt_domain(vma->obj, true); + else + ret = i915_gem_object_set_to_cpu_domain(vma->obj, true); + if (unlikely(ret)) + return ret; + } - /* Access through the GTT requires the device to be awake. */ - assert_rpm_wakelock_held(dev_priv); + ret = i915_vma_pin(vma, 0, PAGE_SIZE, flags); + if (unlikely(ret)) + return ret; - addr = (void __force *) - i915_vma_pin_iomap(i915_gem_obj_to_ggtt(obj)); - if (IS_ERR(addr)) { - ret = PTR_ERR(addr); - goto err_unpin; - } - } + if (i915_vma_is_map_and_fenceable(vma)) + addr = (void __force *)i915_vma_pin_iomap(vma); + else + addr = i915_gem_object_pin_map(vma->obj, map); + if (IS_ERR(addr)) + goto err; ring->vaddr = addr; - ring->vma = i915_gem_obj_to_ggtt(obj); return 0; -err_unpin: - i915_gem_object_ggtt_unpin(obj); - return ret; +err: + i915_vma_unpin(vma); + return PTR_ERR(addr); } void intel_ring_unpin(struct intel_ring *ring) @@ -1991,60 +1937,54 @@ void intel_ring_unpin(struct intel_ring *ring) GEM_BUG_ON(!ring->vma); GEM_BUG_ON(!ring->vaddr); - if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen) - i915_gem_object_unpin_map(ring->obj); - else + if (i915_vma_is_map_and_fenceable(ring->vma)) i915_vma_unpin_iomap(ring->vma); + else + i915_gem_object_unpin_map(ring->vma->obj); ring->vaddr = NULL; - i915_gem_object_ggtt_unpin(ring->obj); - ring->vma = NULL; + i915_vma_unpin(ring->vma); } -static void intel_destroy_ringbuffer_obj(struct intel_ring *ring) -{ - i915_gem_object_put(ring->obj); - ring->obj = NULL; -} - -static int intel_alloc_ringbuffer_obj(struct drm_device *dev, - struct intel_ring *ring) +static struct i915_vma * +intel_ring_create_vma(struct drm_i915_private *dev_priv, int size) { struct drm_i915_gem_object *obj; + struct i915_vma *vma; - obj = NULL; - if (!HAS_LLC(dev)) - obj = i915_gem_object_create_stolen(dev, ring->size); - if (obj == NULL) - obj = i915_gem_object_create(dev, ring->size); + obj = i915_gem_object_create_stolen(&dev_priv->drm, size); + if (!obj) + obj = i915_gem_object_create(&dev_priv->drm, size); if (IS_ERR(obj)) - return PTR_ERR(obj); + return ERR_CAST(obj); /* mark ring buffers as read-only from GPU side by default */ obj->gt_ro = 1; - ring->obj = obj; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err; + + return vma; - return 0; +err: + i915_gem_object_put(obj); + return vma; } struct intel_ring * intel_engine_create_ring(struct intel_engine_cs *engine, int size) { struct intel_ring *ring; - int ret; + struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); - if (ring == NULL) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n", - engine->name); + if (!ring) return ERR_PTR(-ENOMEM); - } ring->engine = engine; - list_add(&ring->link, &engine->buffers); INIT_LIST_HEAD(&ring->request_list); @@ -2060,22 +2000,21 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size) ring->last_retired_head = -1; intel_ring_update_space(ring); - ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring); - if (ret) { - DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n", - engine->name, ret); - list_del(&ring->link); + vma = intel_ring_create_vma(engine->i915, size); + if (IS_ERR(vma)) { kfree(ring); - return ERR_PTR(ret); + return ERR_CAST(vma); } + ring->vma = vma; + list_add(&ring->link, &engine->buffers); return ring; } void intel_ring_free(struct intel_ring *ring) { - intel_destroy_ringbuffer_obj(ring); + i915_vma_put(ring->vma); list_del(&ring->link); kfree(ring); } @@ -2092,8 +2031,12 @@ static int intel_ring_context_pin(struct i915_gem_context *ctx, return 0; if (ce->state) { - ret = i915_gem_object_ggtt_pin(ce->state, NULL, 0, - ctx->ggtt_alignment, 0); + ret = i915_gem_object_set_to_gtt_domain(ce->state->obj, false); + if (ret) + goto error; + + ret = i915_vma_pin(ce->state, 0, ctx->ggtt_alignment, + PIN_GLOBAL | PIN_HIGH); if (ret) goto error; } @@ -2127,7 +2070,7 @@ static void intel_ring_context_unpin(struct i915_gem_context *ctx, return; if (ce->state) - i915_gem_object_ggtt_unpin(ce->state); + i915_vma_unpin(ce->state); i915_gem_context_put(ctx); } @@ -2165,7 +2108,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = PTR_ERR(ring); goto error; } - engine->buffer = ring; if (I915_NEED_GFX_HWS(dev_priv)) { ret = init_status_page(engine); @@ -2180,11 +2122,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) ret = intel_ring_pin(ring); if (ret) { - DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n", - engine->name, ret); - intel_destroy_ringbuffer_obj(ring); + intel_ring_free(ring); goto error; } + engine->buffer = ring; return 0; @@ -2203,7 +2144,8 @@ void intel_engine_cleanup(struct intel_engine_cs *engine) dev_priv = engine->i915; if (engine->buffer) { - WARN_ON(!IS_GEN2(dev_priv) && (I915_READ_MODE(engine) & MODE_IDLE) == 0); + WARN_ON(INTEL_GEN(dev_priv) > 2 && + (I915_READ_MODE(engine) & MODE_IDLE) == 0); intel_ring_unpin(engine->buffer); intel_ring_free(engine->buffer); @@ -2371,50 +2313,6 @@ int intel_ring_cacheline_align(struct drm_i915_gem_request *req) return 0; } -void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno) -{ - struct drm_i915_private *dev_priv = engine->i915; - - /* Our semaphore implementation is strictly monotonic (i.e. we proceed - * so long as the semaphore value in the register/page is greater - * than the sync value), so whenever we reset the seqno, - * so long as we reset the tracking semaphore value to 0, it will - * always be before the next request's seqno. If we don't reset - * the semaphore value, then when the seqno moves backwards all - * future waits will complete instantly (causing rendering corruption). - */ - if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) { - I915_WRITE(RING_SYNC_0(engine->mmio_base), 0); - I915_WRITE(RING_SYNC_1(engine->mmio_base), 0); - if (HAS_VEBOX(dev_priv)) - I915_WRITE(RING_SYNC_2(engine->mmio_base), 0); - } - if (dev_priv->semaphore_obj) { - struct drm_i915_gem_object *obj = dev_priv->semaphore_obj; - struct page *page = i915_gem_object_get_dirty_page(obj, 0); - void *semaphores = kmap(page); - memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0), - 0, I915_NUM_ENGINES * gen8_semaphore_seqno_size); - kunmap(page); - } - memset(engine->semaphore.sync_seqno, 0, - sizeof(engine->semaphore.sync_seqno)); - - intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno); - if (engine->irq_seqno_barrier) - engine->irq_seqno_barrier(engine); - engine->last_submitted_seqno = seqno; - - engine->hangcheck.seqno = seqno; - - /* After manually advancing the seqno, fake the interrupt in case - * there are any waiters for that seqno. - */ - rcu_read_lock(); - intel_engine_wakeup(engine); - rcu_read_unlock(); -} - static void gen6_bsd_submit_request(struct drm_i915_gem_request *request) { struct drm_i915_private *dev_priv = request->i915; @@ -2624,35 +2522,36 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, if (!i915.semaphores) return; - if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore_obj) { + if (INTEL_GEN(dev_priv) >= 8 && !dev_priv->semaphore) { + struct i915_vma *vma; + obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(obj)) { - DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); - ret = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); - if (ret != 0) { - i915_gem_object_put(obj); - DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n"); - i915.semaphores = 0; - } else { - dev_priv->semaphore_obj = obj; - } - } - } + if (IS_ERR(obj)) + goto err; - if (!i915.semaphores) - return; + vma = i915_vma_create(obj, &dev_priv->ggtt.base, NULL); + if (IS_ERR(vma)) + goto err_obj; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto err_obj; + + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + if (ret) + goto err_obj; + + dev_priv->semaphore = vma; + } if (INTEL_GEN(dev_priv) >= 8) { - u64 offset = i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj); + u32 offset = i915_ggtt_offset(dev_priv->semaphore); engine->semaphore.sync_to = gen8_ring_sync_to; engine->semaphore.signal = gen8_xcs_signal; for (i = 0; i < I915_NUM_ENGINES; i++) { - u64 ring_offset; + u32 ring_offset; if (i != engine->id) ring_offset = offset + GEN8_SEMAPHORE_OFFSET(engine->id, i); @@ -2672,47 +2571,55 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, * initialized as INVALID. Gen8 will initialize the * sema between VCS2 and RCS later. */ - for (i = 0; i < I915_NUM_ENGINES; i++) { + for (i = 0; i < GEN6_NUM_SEMAPHORES; i++) { static const struct { u32 wait_mbox; i915_reg_t mbox_reg; - } sem_data[I915_NUM_ENGINES][I915_NUM_ENGINES] = { - [RCS] = { - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, + } sem_data[GEN6_NUM_SEMAPHORES][GEN6_NUM_SEMAPHORES] = { + [RCS_HW] = { + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RV, .mbox_reg = GEN6_VRSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RB, .mbox_reg = GEN6_BRSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_RVE, .mbox_reg = GEN6_VERSYNC }, }, - [VCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, + [VCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VR, .mbox_reg = GEN6_RVSYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VB, .mbox_reg = GEN6_BVSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VVE, .mbox_reg = GEN6_VEVSYNC }, }, - [BCS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, - [VECS] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, + [BCS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BR, .mbox_reg = GEN6_RBSYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BV, .mbox_reg = GEN6_VBSYNC }, + [VECS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_BVE, .mbox_reg = GEN6_VEBSYNC }, }, - [VECS] = { - [RCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, - [VCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, - [BCS] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, + [VECS_HW] = { + [RCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VER, .mbox_reg = GEN6_RVESYNC }, + [VCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEV, .mbox_reg = GEN6_VVESYNC }, + [BCS_HW] = { .wait_mbox = MI_SEMAPHORE_SYNC_VEB, .mbox_reg = GEN6_BVESYNC }, }, }; u32 wait_mbox; i915_reg_t mbox_reg; - if (i == engine->id || i == VCS2) { + if (i == engine->hw_id) { wait_mbox = MI_SEMAPHORE_SYNC_INVALID; mbox_reg = GEN6_NOSYNC; } else { - wait_mbox = sem_data[engine->id][i].wait_mbox; - mbox_reg = sem_data[engine->id][i].mbox_reg; + wait_mbox = sem_data[engine->hw_id][i].wait_mbox; + mbox_reg = sem_data[engine->hw_id][i].mbox_reg; } engine->semaphore.mbox.wait[i] = wait_mbox; engine->semaphore.mbox.signal[i] = mbox_reg; } } + + return; + +err_obj: + i915_gem_object_put(obj); +err: + DRM_DEBUG_DRIVER("Failed to allocate space for semaphores, disabling\n"); + i915.semaphores = 0; } static void intel_ring_init_irq(struct drm_i915_private *dev_priv, @@ -2808,11 +2715,11 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) return ret; if (INTEL_GEN(dev_priv) >= 6) { - ret = intel_init_pipe_control(engine, 4096); + ret = intel_engine_create_scratch(engine, 4096); if (ret) return ret; } else if (HAS_BROKEN_CS_TLB(dev_priv)) { - ret = intel_init_pipe_control(engine, I830_WA_SIZE); + ret = intel_engine_create_scratch(engine, I830_WA_SIZE); if (ret) return ret; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 43e545e..84aea54 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -26,10 +26,10 @@ */ #define I915_RING_FREE_SPACE 64 -struct intel_hw_status_page { - u32 *page_addr; - unsigned int gfx_addr; - struct drm_i915_gem_object *obj; +struct intel_hw_status_page { + struct i915_vma *vma; + u32 *page_addr; + u32 ggtt_offset; }; #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base)) @@ -57,10 +57,10 @@ struct intel_hw_status_page { #define GEN8_SEMAPHORE_OFFSET(__from, __to) \ (((__from) * I915_NUM_ENGINES + (__to)) * gen8_semaphore_seqno_size) #define GEN8_SIGNAL_OFFSET(__ring, to) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET((__ring)->id, (to))) #define GEN8_WAIT_OFFSET(__ring, from) \ - (i915_gem_obj_ggtt_offset(dev_priv->semaphore_obj) + \ + (dev_priv->semaphore->node.start + \ GEN8_SEMAPHORE_OFFSET(from, (__ring)->id)) enum intel_engine_hangcheck_action { @@ -75,7 +75,6 @@ enum intel_engine_hangcheck_action { struct intel_engine_hangcheck { u64 acthd; - unsigned long user_interrupts; u32 seqno; int score; enum intel_engine_hangcheck_action action; @@ -84,9 +83,8 @@ struct intel_engine_hangcheck { }; struct intel_ring { - struct drm_i915_gem_object *obj; - void *vaddr; struct i915_vma *vma; + void *vaddr; struct intel_engine_cs *engine; struct list_head link; @@ -124,12 +122,12 @@ struct drm_i915_reg_table; * an option for future use. * size: size of the batch in DWORDS */ -struct i915_ctx_workarounds { +struct i915_ctx_workarounds { struct i915_wa_ctx_bb { u32 offset; u32 size; } indirect_ctx, per_ctx; - struct drm_i915_gem_object *obj; + struct i915_vma *vma; }; struct drm_i915_gem_request; @@ -147,8 +145,14 @@ struct intel_engine_cs { #define I915_NUM_ENGINES 5 #define _VCS(n) (VCS + (n)) unsigned int exec_id; - unsigned int hw_id; - unsigned int guc_id; /* XXX same as hw_id? */ + enum intel_engine_hw_id { + RCS_HW = 0, + VCS_HW, + BCS_HW, + VECS_HW, + VCS2_HW + } hw_id; + enum intel_engine_hw_id guc_id; /* XXX same as hw_id? */ u64 fence_context; u32 mmio_base; unsigned int irq_shift; @@ -172,8 +176,7 @@ struct intel_engine_cs { * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - struct task_struct *irq_seqno_bh; /* bh for user interrupts */ - unsigned long irq_wakeups; + struct task_struct __rcu *irq_seqno_bh; /* bh for interrupts */ bool irq_posted; spinlock_t lock; /* protects the lists of requests */ @@ -183,6 +186,9 @@ struct intel_engine_cs { struct task_struct *signaler; /* used for fence signalling */ struct drm_i915_gem_request *first_signal; struct timer_list fake_irq; /* used after a missed interrupt */ + struct timer_list hangcheck; /* detect missed interrupts */ + + unsigned long timeout; bool irq_enabled : 1; bool rpm_wakelock : 1; @@ -197,6 +203,7 @@ struct intel_engine_cs { struct intel_hw_status_page status_page; struct i915_ctx_workarounds wa_ctx; + struct i915_vma *scratch; u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ @@ -270,11 +277,14 @@ struct intel_engine_cs { u32 sync_seqno[I915_NUM_ENGINES-1]; union { +#define GEN6_SEMAPHORE_LAST VECS_HW +#define GEN6_NUM_SEMAPHORES (GEN6_SEMAPHORE_LAST + 1) +#define GEN6_SEMAPHORES_MASK GENMASK(GEN6_SEMAPHORE_LAST, 0) struct { /* our mbox written by others */ - u32 wait[I915_NUM_ENGINES]; + u32 wait[GEN6_NUM_SEMAPHORES]; /* mboxes this ring signals to */ - i915_reg_t signal[I915_NUM_ENGINES]; + i915_reg_t signal[GEN6_NUM_SEMAPHORES]; } mbox; u64 signal_ggtt[I915_NUM_ENGINES]; }; @@ -310,7 +320,7 @@ struct intel_engine_cs { /* An RCU guarded pointer to the last request. No reference is * held to the request, users must carefully acquire a reference to - * the request using i915_gem_active_get_request_rcu(), or hold the + * the request using i915_gem_active_get_rcu(), or hold the * struct_mutex. */ struct i915_gem_active last_request; @@ -319,11 +329,6 @@ struct intel_engine_cs { struct intel_engine_hangcheck hangcheck; - struct { - struct drm_i915_gem_object *obj; - u32 gtt_offset; - } scratch; - bool needs_cmd_parser; /* @@ -475,11 +480,9 @@ void intel_ring_update_space(struct intel_ring *ring); void intel_engine_init_seqno(struct intel_engine_cs *engine, u32 seqno); -int intel_init_pipe_control(struct intel_engine_cs *engine, int size); -void intel_fini_pipe_control(struct intel_engine_cs *engine); - void intel_engine_setup_common(struct intel_engine_cs *engine); int intel_engine_init_common(struct intel_engine_cs *engine); +int intel_engine_create_scratch(struct intel_engine_cs *engine, int size); void intel_engine_cleanup_common(struct intel_engine_cs *engine); static inline int intel_engine_idle(struct intel_engine_cs *engine, @@ -515,7 +518,7 @@ int init_workarounds_ring(struct intel_engine_cs *engine); static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine) { - return engine->status_page.gfx_addr + I915_GEM_HWS_INDEX_ADDR; + return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR; } /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */ @@ -538,29 +541,35 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_enable_signaling(struct drm_i915_gem_request *request); -static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine) +static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) { - return READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + return rcu_access_pointer(engine->breadcrumbs.irq_seqno_bh); } -static inline bool intel_engine_wakeup(struct intel_engine_cs *engine) +static inline bool intel_engine_wakeup(const struct intel_engine_cs *engine) { bool wakeup = false; - struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh); + /* Note that for this not to dangerously chase a dangling pointer, - * the caller is responsible for ensure that the task remain valid for - * wake_up_process() i.e. that the RCU grace period cannot expire. + * we must hold the rcu_read_lock here. * * Also note that tsk is likely to be in !TASK_RUNNING state so an * early test for tsk->state != TASK_RUNNING before wake_up_process() * is unlikely to be beneficial. */ - if (tsk) - wakeup = wake_up_process(tsk); + if (intel_engine_has_waiter(engine)) { + struct task_struct *tsk; + + rcu_read_lock(); + tsk = rcu_dereference(engine->breadcrumbs.irq_seqno_bh); + if (tsk) + wakeup = wake_up_process(tsk); + rcu_read_unlock(); + } + return wakeup; } -void intel_engine_enable_fake_irq(struct intel_engine_cs *engine); void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine); unsigned int intel_kick_waiters(struct drm_i915_private *i915); unsigned int intel_kick_signalers(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 1c603bb..a1d73c2 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -592,6 +592,8 @@ void bxt_disable_dc9(struct drm_i915_private *dev_priv) DRM_DEBUG_KMS("Disabling DC9\n"); gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + + intel_pps_unlock_regs_wa(dev_priv); } static void assert_csr_loaded(struct drm_i915_private *dev_priv) @@ -854,7 +856,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, struct i915_power_well *power_well) { enum skl_disp_power_wells power_well_id = power_well->data; - struct i915_power_well *cmn_a_well; + struct i915_power_well *cmn_a_well = NULL; if (power_well_id == BXT_DPIO_CMN_BC) { /* @@ -867,7 +869,7 @@ static void bxt_dpio_cmn_power_well_enable(struct drm_i915_private *dev_priv, bxt_ddi_phy_init(dev_priv, bxt_power_well_to_phy(power_well)); - if (power_well_id == BXT_DPIO_CMN_BC) + if (cmn_a_well) intel_power_well_put(dev_priv, cmn_a_well); } @@ -1121,6 +1123,8 @@ static void vlv_display_power_well_init(struct drm_i915_private *dev_priv) } i915_redisable_vga_power_on(&dev_priv->drm); + + intel_pps_unlock_regs_wa(dev_priv); } static void vlv_display_power_well_deinit(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index cbdca7e4..366900d 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -203,21 +203,19 @@ skl_update_plane(struct drm_plane *drm_plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(drm_plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); const int pipe = intel_plane->pipe; const int plane = intel_plane->plane + 1; - u32 plane_ctl, stride_div, stride; + u32 plane_ctl; const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; - u32 surf_addr; - u32 tile_height, plane_offset, plane_size; + u32 surf_addr = plane_state->main.offset; unsigned int rotation = plane_state->base.rotation; - int x_offset, y_offset; + u32 stride = skl_plane_stride(fb, 0, rotation); int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; uint32_t crtc_w = drm_rect_width(&plane_state->base.dst); uint32_t crtc_h = drm_rect_height(&plane_state->base.dst); - uint32_t x = plane_state->base.src.x1 >> 16; - uint32_t y = plane_state->base.src.y1 >> 16; + uint32_t x = plane_state->main.x; + uint32_t y = plane_state->main.y; uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16; uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16; @@ -230,15 +228,6 @@ skl_update_plane(struct drm_plane *drm_plane, plane_ctl |= skl_plane_ctl_rotation(rotation); - stride_div = intel_fb_stride_alignment(dev_priv, fb->modifier[0], - fb->pixel_format); - - /* Sizes are 0 based */ - src_w--; - src_h--; - crtc_w--; - crtc_h--; - if (key->flags) { I915_WRITE(PLANE_KEYVAL(pipe, plane), key->min_value); I915_WRITE(PLANE_KEYMAX(pipe, plane), key->max_value); @@ -250,28 +239,15 @@ skl_update_plane(struct drm_plane *drm_plane, else if (key->flags & I915_SET_COLORKEY_SOURCE) plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE; - surf_addr = intel_plane_obj_offset(intel_plane, obj, 0); - - if (intel_rotation_90_or_270(rotation)) { - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); - - /* stride: Surface height in tiles */ - tile_height = intel_tile_height(dev_priv, fb->modifier[0], cpp); - stride = DIV_ROUND_UP(fb->height, tile_height); - plane_size = (src_w << 16) | src_h; - x_offset = stride * tile_height - y - (src_h + 1); - y_offset = x; - } else { - stride = fb->pitches[0] / stride_div; - plane_size = (src_h << 16) | src_w; - x_offset = x; - y_offset = y; - } - plane_offset = y_offset << 16 | x_offset; + /* Sizes are 0 based */ + src_w--; + src_h--; + crtc_w--; + crtc_h--; - I915_WRITE(PLANE_OFFSET(pipe, plane), plane_offset); + I915_WRITE(PLANE_OFFSET(pipe, plane), (y << 16) | x); I915_WRITE(PLANE_STRIDE(pipe, plane), stride); - I915_WRITE(PLANE_SIZE(pipe, plane), plane_size); + I915_WRITE(PLANE_SIZE(pipe, plane), (src_h << 16) | src_w); /* program plane scaler */ if (plane_state->scaler_id >= 0) { @@ -296,7 +272,8 @@ skl_update_plane(struct drm_plane *drm_plane, } I915_WRITE(PLANE_CTL(pipe, plane), plane_ctl); - I915_WRITE(PLANE_SURF(pipe, plane), surf_addr); + I915_WRITE(PLANE_SURF(pipe, plane), + intel_fb_gtt_offset(fb, rotation) + surf_addr); POSTING_READ(PLANE_SURF(pipe, plane)); } @@ -363,13 +340,11 @@ vlv_update_plane(struct drm_plane *dplane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(dplane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; int plane = intel_plane->plane; u32 sprctl; u32 sprsurf_offset, linear_offset; unsigned int rotation = dplane->state->rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; @@ -431,7 +406,7 @@ vlv_update_plane(struct drm_plane *dplane, */ sprctl |= SP_GAMMA_ENABLE; - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SP_TILED; /* Sizes are 0 based */ @@ -440,19 +415,18 @@ vlv_update_plane(struct drm_plane *dplane, crtc_w--; crtc_h--; - linear_offset = y * fb->pitches[0] + x * cpp; - sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= sprsurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); if (rotation == DRM_ROTATE_180) { sprctl |= SP_ROTATE_180; x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(SPKEYMINVAL(pipe, plane), key->min_value); I915_WRITE(SPKEYMAXVAL(pipe, plane), key->max_value); @@ -468,7 +442,7 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSTRIDE(pipe, plane), fb->pitches[0]); I915_WRITE(SPPOS(pipe, plane), (crtc_y << 16) | crtc_x); - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(SPTILEOFF(pipe, plane), (y << 16) | x); else I915_WRITE(SPLINOFF(pipe, plane), linear_offset); @@ -477,8 +451,8 @@ vlv_update_plane(struct drm_plane *dplane, I915_WRITE(SPSIZE(pipe, plane), (crtc_h << 16) | crtc_w); I915_WRITE(SPCNTR(pipe, plane), sprctl); - I915_WRITE(SPSURF(pipe, plane), i915_gem_obj_ggtt_offset(obj) + - sprsurf_offset); + I915_WRITE(SPSURF(pipe, plane), + intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); POSTING_READ(SPSURF(pipe, plane)); } @@ -506,12 +480,10 @@ ivb_update_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); enum pipe pipe = intel_plane->pipe; u32 sprctl, sprscale = 0; u32 sprsurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; @@ -553,7 +525,7 @@ ivb_update_plane(struct drm_plane *plane, */ sprctl |= SPRITE_GAMMA_ENABLE; - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) sprctl |= SPRITE_TILED; if (IS_HASWELL(dev) || IS_BROADWELL(dev)) @@ -573,10 +545,8 @@ ivb_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h; - linear_offset = y * fb->pitches[0] + x * cpp; - sprsurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= sprsurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); if (rotation == DRM_ROTATE_180) { sprctl |= SPRITE_ROTATE_180; @@ -585,10 +555,11 @@ ivb_update_plane(struct drm_plane *plane, if (!IS_HASWELL(dev) && !IS_BROADWELL(dev)) { x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(SPRKEYVAL(pipe), key->min_value); I915_WRITE(SPRKEYMAX(pipe), key->max_value); @@ -607,7 +578,7 @@ ivb_update_plane(struct drm_plane *plane, * register */ if (IS_HASWELL(dev) || IS_BROADWELL(dev)) I915_WRITE(SPROFFSET(pipe), (y << 16) | x); - else if (i915_gem_object_is_tiled(obj)) + else if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(SPRTILEOFF(pipe), (y << 16) | x); else I915_WRITE(SPRLINOFF(pipe), linear_offset); @@ -617,7 +588,7 @@ ivb_update_plane(struct drm_plane *plane, I915_WRITE(SPRSCALE(pipe), sprscale); I915_WRITE(SPRCTL(pipe), sprctl); I915_WRITE(SPRSURF(pipe), - i915_gem_obj_ggtt_offset(obj) + sprsurf_offset); + intel_fb_gtt_offset(fb, rotation) + sprsurf_offset); POSTING_READ(SPRSURF(pipe)); } @@ -647,12 +618,10 @@ ilk_update_plane(struct drm_plane *plane, struct drm_i915_private *dev_priv = to_i915(dev); struct intel_plane *intel_plane = to_intel_plane(plane); struct drm_framebuffer *fb = plane_state->base.fb; - struct drm_i915_gem_object *obj = intel_fb_obj(fb); int pipe = intel_plane->pipe; u32 dvscntr, dvsscale; u32 dvssurf_offset, linear_offset; unsigned int rotation = plane_state->base.rotation; - int cpp = drm_format_plane_cpp(fb->pixel_format, 0); const struct drm_intel_sprite_colorkey *key = &plane_state->ckey; int crtc_x = plane_state->base.dst.x1; int crtc_y = plane_state->base.dst.y1; @@ -694,7 +663,7 @@ ilk_update_plane(struct drm_plane *plane, */ dvscntr |= DVS_GAMMA_ENABLE; - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) dvscntr |= DVS_TILED; if (IS_GEN6(dev)) @@ -710,19 +679,18 @@ ilk_update_plane(struct drm_plane *plane, if (crtc_w != src_w || crtc_h != src_h) dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h; - linear_offset = y * fb->pitches[0] + x * cpp; - dvssurf_offset = intel_compute_tile_offset(&x, &y, fb, 0, - fb->pitches[0], rotation); - linear_offset -= dvssurf_offset; + intel_add_fb_offsets(&x, &y, plane_state, 0); + dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0); if (rotation == DRM_ROTATE_180) { dvscntr |= DVS_ROTATE_180; x += src_w; y += src_h; - linear_offset += src_h * fb->pitches[0] + src_w * cpp; } + linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0); + if (key->flags) { I915_WRITE(DVSKEYVAL(pipe), key->min_value); I915_WRITE(DVSKEYMAX(pipe), key->max_value); @@ -737,7 +705,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSTRIDE(pipe), fb->pitches[0]); I915_WRITE(DVSPOS(pipe), (crtc_y << 16) | crtc_x); - if (i915_gem_object_is_tiled(obj)) + if (fb->modifier[0] == I915_FORMAT_MOD_X_TILED) I915_WRITE(DVSTILEOFF(pipe), (y << 16) | x); else I915_WRITE(DVSLINOFF(pipe), linear_offset); @@ -746,7 +714,7 @@ ilk_update_plane(struct drm_plane *plane, I915_WRITE(DVSSCALE(pipe), dvsscale); I915_WRITE(DVSCNTR(pipe), dvscntr); I915_WRITE(DVSSURF(pipe), - i915_gem_obj_ggtt_offset(obj) + dvssurf_offset); + intel_fb_gtt_offset(fb, rotation) + dvssurf_offset); POSTING_READ(DVSSURF(pipe)); } @@ -785,6 +753,7 @@ intel_check_sprite_plane(struct drm_plane *plane, int hscale, vscale; int max_scale, min_scale; bool can_scale; + int ret; src->x1 = state->base.src_x; src->y1 = state->base.src_y; @@ -949,6 +918,12 @@ intel_check_sprite_plane(struct drm_plane *plane, dst->y1 = crtc_y; dst->y2 = crtc_y + crtc_h; + if (INTEL_GEN(dev) >= 9) { + ret = skl_check_plane_surface(state); + if (ret) + return ret; + } + return 0; } diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index b1755f8..4e1b274 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -93,6 +93,6 @@ extern bool i915_gpu_turbo_disable(void); #define I845_TSEG_SIZE_1M (3 << 1) #define INTEL_BSM 0x5c -#define INTEL_BSM_MASK (0xFFFF << 20) +#define INTEL_BSM_MASK (-(1u << 20)) #endif /* _I915_DRM_H_ */ diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 645ad06..58df02b 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -31,16 +31,16 @@ * See Documentation/io-mapping.txt */ -#ifdef CONFIG_HAVE_ATOMIC_IOMAP - -#include <asm/iomap.h> - struct io_mapping { resource_size_t base; unsigned long size; pgprot_t prot; + void __iomem *iomem; }; +#ifdef CONFIG_HAVE_ATOMIC_IOMAP + +#include <asm/iomap.h> /* * For small address space machines, mapping large objects * into the kernel virtual space isn't practical. Where @@ -49,34 +49,25 @@ struct io_mapping { */ static inline struct io_mapping * -io_mapping_create_wc(resource_size_t base, unsigned long size) +io_mapping_init_wc(struct io_mapping *iomap, + resource_size_t base, + unsigned long size) { - struct io_mapping *iomap; pgprot_t prot; - iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); - if (!iomap) - goto out_err; - if (iomap_create_wc(base, size, &prot)) - goto out_free; + return NULL; iomap->base = base; iomap->size = size; iomap->prot = prot; return iomap; - -out_free: - kfree(iomap); -out_err: - return NULL; } static inline void -io_mapping_free(struct io_mapping *mapping) +io_mapping_fini(struct io_mapping *mapping) { iomap_free(mapping->base, mapping->size); - kfree(mapping); } /* Atomic map/unmap */ @@ -121,21 +112,46 @@ io_mapping_unmap(void __iomem *vaddr) #else #include <linux/uaccess.h> - -/* this struct isn't actually defined anywhere */ -struct io_mapping; +#include <asm/pgtable.h> /* Create the io_mapping object*/ static inline struct io_mapping * -io_mapping_create_wc(resource_size_t base, unsigned long size) +io_mapping_init_wc(struct io_mapping *iomap, + resource_size_t base, + unsigned long size) { - return (struct io_mapping __force *) ioremap_wc(base, size); + iomap->base = base; + iomap->size = size; + iomap->iomem = ioremap_wc(base, size); +#if defined(pgprot_noncached_wc) /* archs can't agree on a name ... */ + iomap->prot = pgprot_noncached_wc(PAGE_KERNEL); +#elif defined(pgprot_writecombine) + iomap->prot = pgprot_writecombine(PAGE_KERNEL); +#else + iomap->prot = pgprot_noncached(PAGE_KERNEL); +#endif + + return iomap; } static inline void -io_mapping_free(struct io_mapping *mapping) +io_mapping_fini(struct io_mapping *mapping) +{ + iounmap(mapping->iomem); +} + +/* Non-atomic map/unmap */ +static inline void __iomem * +io_mapping_map_wc(struct io_mapping *mapping, + unsigned long offset, + unsigned long size) +{ + return mapping->iomem + offset; +} + +static inline void +io_mapping_unmap(void __iomem *vaddr) { - iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ @@ -145,30 +161,42 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, { preempt_disable(); pagefault_disable(); - return ((char __force __iomem *) mapping) + offset; + return io_mapping_map_wc(mapping, offset, PAGE_SIZE); } static inline void io_mapping_unmap_atomic(void __iomem *vaddr) { + io_mapping_unmap(vaddr); pagefault_enable(); preempt_enable(); } -/* Non-atomic map/unmap */ -static inline void __iomem * -io_mapping_map_wc(struct io_mapping *mapping, - unsigned long offset, - unsigned long size) +#endif /* HAVE_ATOMIC_IOMAP */ + +static inline struct io_mapping * +io_mapping_create_wc(resource_size_t base, + unsigned long size) { - return ((char __force __iomem *) mapping) + offset; + struct io_mapping *iomap; + + iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); + if (!iomap) + return NULL; + + if (!io_mapping_init_wc(iomap, base, size)) { + kfree(iomap); + return NULL; + } + + return iomap; } static inline void -io_mapping_unmap(void __iomem *vaddr) +io_mapping_free(struct io_mapping *iomap) { + io_mapping_fini(iomap); + kfree(iomap); } -#endif /* HAVE_ATOMIC_IOMAP */ - #endif /* _LINUX_IO_MAPPING_H */ diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 452629d..5501fe8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -855,7 +855,16 @@ struct drm_i915_gem_busy { * having flushed any pending activity), and a non-zero return that * the object is still in-flight on the GPU. (The GPU has not yet * signaled completion for all pending requests that reference the - * object.) + * object.) An object is guaranteed to become idle eventually (so + * long as no new GPU commands are executed upon it). Due to the + * asynchronous nature of the hardware, an object reported + * as busy may become idle before the ioctl is completed. + * + * Furthermore, if the object is busy, which engine is busy is only + * provided as a guide. There are race conditions which prevent the + * report of which engines are busy from being always accurate. + * However, the converse is not true. If the object is idle, the + * result of the ioctl, that all engines are idle, is accurate. * * The returned dword is split into two fields to indicate both * the engines on which the object is being read, and the @@ -878,6 +887,11 @@ struct drm_i915_gem_busy { * execution engines, e.g. multiple media engines, which are * mapped to the same identifier in the EXECBUFFER2 ioctl and * so are not separately reported for busyness. + * + * Caveat emptor: + * Only the boolean result of this query is reliable; that is whether + * the object is idle or busy. The report of which engines are busy + * should be only used as a heuristic. */ __u32 busy; }; |