diff options
Diffstat (limited to 'drivers/gpu/drm')
435 files changed, 33653 insertions, 10347 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index deeefa7..2a72d2f 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -49,16 +49,17 @@ config DRM_DEBUG_MM If in doubt, say "N". -config DRM_DEBUG_MM_SELFTEST - tristate "kselftests for DRM range manager (struct drm_mm)" +config DRM_DEBUG_SELFTEST + tristate "kselftests for DRM" depends on DRM depends on DEBUG_KERNEL select PRIME_NUMBERS select DRM_LIB_RANDOM + select DRM_KMS_HELPER default n help - This option provides a kernel module that can be used to test - the DRM range manager (drm_mm) and its API. This option is not + This option provides kernel modules that can be used to run + various selftests on parts of the DRM api. This option is not useful for distributions or general kernels, but only for kernel developers working on DRM and associated drivers. @@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig" source "drivers/gpu/drm/imx/Kconfig" +source "drivers/gpu/drm/v3d/Kconfig" + source "drivers/gpu/drm/vc4/Kconfig" source "drivers/gpu/drm/etnaviv/Kconfig" @@ -289,6 +292,8 @@ source "drivers/gpu/drm/pl111/Kconfig" source "drivers/gpu/drm/tve200/Kconfig" +source "drivers/gpu/drm/xen/Kconfig" + # Keep legacy drivers last menuconfig DRM_LEGACY diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 50093ff..ef9f3da 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/ +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/ obj-$(CONFIG_DRM) += drm.o obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o @@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA) += mga/ obj-$(CONFIG_DRM_I810) += i810/ obj-$(CONFIG_DRM_I915) += i915/ obj-$(CONFIG_DRM_MGAG200) += mgag200/ +obj-$(CONFIG_DRM_V3D) += v3d/ obj-$(CONFIG_DRM_VC4) += vc4/ obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ obj-$(CONFIG_DRM_SIS) += sis/ @@ -103,3 +104,4 @@ obj-$(CONFIG_DRM_MXSFB) += mxsfb/ obj-$(CONFIG_DRM_TINYDRM) += tinydrm/ obj-$(CONFIG_DRM_PL111) += pl111/ obj-$(CONFIG_DRM_TVE200) += tve200/ +obj-$(CONFIG_DRM_XEN) += xen/ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 2fe4a0b..68e9f58 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -134,7 +134,8 @@ amdgpu-y += \ amdgpu_amdkfd.o \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ - amdgpu_amdkfd_gfx_v8.o + amdgpu_amdkfd_gfx_v8.o \ + amdgpu_amdkfd_gfx_v9.o # add cgs amdgpu-y += amdgpu_cgs.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 887702c..bd36ee9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_POLARIS11: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); + break; default: dev_dbg(adev->dev, "kfd not supported on this ASIC\n"); return; @@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); + if (adev->asic_type >= CHIP_VEGA10) { + /* On SOC15 the BIF is involved in routing + * doorbells using the low 12 bits of the + * address. Communicate the assignments to + * KFD. KFD uses two doorbell pages per + * process in case of 64-bit doorbells so we + * can use each doorbell assignment twice. + */ + gpu_resources.sdma_doorbell[0][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE0; + gpu_resources.sdma_doorbell[0][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; + gpu_resources.sdma_doorbell[1][0] = + AMDGPU_DOORBELL64_sDMA_ENGINE1; + gpu_resources.sdma_doorbell[1][1] = + AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; + /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1f0; + gpu_resources.reserved_doorbell_val = 0x0f0; + } kgd2kfd->device_init(adev->kfd, &gpu_resources); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index c2c2bea..12367a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -28,6 +28,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/mmu_context.h> +#include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> #include "amdgpu_sync.h" @@ -59,7 +60,9 @@ struct kgd_mem { uint32_t mapping_flags; + atomic_t invalid; struct amdkfd_process_info *process_info; + struct page **user_pages; struct amdgpu_sync sync; @@ -84,6 +87,9 @@ struct amdkfd_process_info { struct list_head vm_list_head; /* List head for all KFD BOs that belong to a KFD process. */ struct list_head kfd_bo_list; + /* List of userptr BOs that are valid or invalid */ + struct list_head userptr_valid_list; + struct list_head userptr_inval_list; /* Lock to protect kfd_bo_list */ struct mutex lock; @@ -91,6 +97,11 @@ struct amdkfd_process_info { unsigned int n_vms; /* Eviction Fence */ struct amdgpu_amdkfd_fence *eviction_fence; + + /* MMU-notifier related fields */ + atomic_t evicted_bos; + struct delayed_work restore_userptr_work; + struct pid *pid; }; int amdgpu_amdkfd_init(void); @@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev); void amdgpu_amdkfd_device_init(struct amdgpu_device *adev); void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev); +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void); struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void); +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea54e53..0ff36d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 89264c9..6ef9762 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, uint32_t sh_mem_bases); static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, uint32_t queue_id, uint32_t __user *wptr, @@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = { .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, - .init_pipeline = kgd_init_pipeline, .init_interrupts = kgd_init_interrupts, .hqd_load = kgd_hqd_load, .hqd_sdma_load = kgd_hqd_sdma_load, @@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, return 0; } -static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr) -{ - /* amdgpu owns the per-pipe state */ - return 0; -} - static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) { struct amdgpu_device *adev = get_amdgpu_device(kgd); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c new file mode 100644 index 0000000..8f37991 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -0,0 +1,1043 @@ +/* + * Copyright 2014-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#define pr_fmt(fmt) "kfd2kgd: " fmt + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <linux/firmware.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_amdkfd.h" +#include "amdgpu_ucode.h" +#include "soc15_hw_ip.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "vega10_enum.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "sdma0/sdma0_4_0_sh_mask.h" +#include "sdma1/sdma1_4_0_offset.h" +#include "sdma1/sdma1_4_0_sh_mask.h" +#include "athub/athub_1_0_offset.h" +#include "athub/athub_1_0_sh_mask.h" +#include "oss/osssys_4_0_offset.h" +#include "oss/osssys_4_0_sh_mask.h" +#include "soc15_common.h" +#include "v9_structs.h" +#include "soc15.h" +#include "soc15d.h" + +/* HACK: MMHUB and GC both have VM-related register with the same + * names but different offsets. Define the MMHUB register we need here + * with a prefix. A proper solution would be to move the functions + * programming these registers into gfx_v9_0.c and mmhub_v1_0.c + * respectively. + */ +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 +#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 +#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c +#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 + +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 +#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 + +#define V9_PIPE_PER_MEC (4) +#define V9_QUEUES_PER_PIPE_MEC (8) + +enum hqd_dequeue_request_type { + NO_ACTION = 0, + DRAIN_PIPE, + RESET_WAVES +}; + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases); +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id); +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm); +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm); +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs); +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd); +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id); +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout); +static int kgd_address_watch_disable(struct kgd_dev *kgd); +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo); +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd); +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset); + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid); +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid); +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base); +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid); +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); + +/* Because of REG_GET_FIELD() being used, we put this function in the + * asic specific file. + */ +static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + config->gb_addr_config = adev->gfx.config.gb_addr_config; + + config->tile_config_ptr = adev->gfx.config.tile_mode_array; + config->num_tile_configs = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + config->macro_tile_config_ptr = + adev->gfx.config.macrotile_mode_array; + config->num_macro_tile_configs = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + + return 0; +} + +static const struct kfd2kgd_calls kfd2kgd = { + .init_gtt_mem_allocation = alloc_gtt_mem, + .free_gtt_mem = free_gtt_mem, + .get_local_mem_info = get_local_mem_info, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .alloc_pasid = amdgpu_pasid_alloc, + .free_pasid = amdgpu_pasid_free, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_interrupts = kgd_init_interrupts, + .hqd_load = kgd_hqd_load, + .hqd_sdma_load = kgd_hqd_sdma_load, + .hqd_dump = kgd_hqd_dump, + .hqd_sdma_dump = kgd_hqd_sdma_dump, + .hqd_is_occupied = kgd_hqd_is_occupied, + .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied, + .hqd_destroy = kgd_hqd_destroy, + .hqd_sdma_destroy = kgd_hqd_sdma_destroy, + .address_watch_disable = kgd_address_watch_disable, + .address_watch_execute = kgd_address_watch_execute, + .wave_control_execute = kgd_wave_control_execute, + .address_watch_get_offset = kgd_address_watch_get_offset, + .get_atc_vmid_pasid_mapping_pasid = + get_atc_vmid_pasid_mapping_pasid, + .get_atc_vmid_pasid_mapping_valid = + get_atc_vmid_pasid_mapping_valid, + .get_fw_version = get_fw_version, + .set_scratch_backing_va = set_scratch_backing_va, + .get_tile_config = amdgpu_amdkfd_get_tile_config, + .get_cu_info = get_cu_info, + .get_vram_usage = amdgpu_amdkfd_get_vram_usage, + .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, + .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, + .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, + .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, + .set_vm_context_page_table_base = set_vm_context_page_table_base, + .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, + .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, + .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, + .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, + .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, + .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, + .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, + .invalidate_tlbs = invalidate_tlbs, + .invalidate_tlbs_vmid = invalidate_tlbs_vmid, + .submit_ib = amdgpu_amdkfd_submit_ib, +}; + +struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) +{ + return (struct kfd2kgd_calls *)&kfd2kgd; +} + +static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) +{ + return (struct amdgpu_device *)kgd; +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + mutex_lock(&adev->srbm_mutex); + soc15_grbm_select(adev, mec, pipe, queue, vmid); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + soc15_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static uint32_t get_queue_mask(struct amdgpu_device *adev, + uint32_t pipe_id, uint32_t queue_id) +{ + unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe + + queue_id) & 31; + + return ((uint32_t)1) << bit; +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + lock_srbm(kgd, 0, 0, 0, vmid); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases); + /* APE1 no longer exists on GFX9 */ + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because + * a mapping is in progress or because a mapping finished + * and the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | + ATC_VMID0_PASID_MAPPING__VALID_MASK; + + /* + * need to do this twice, once for gfx and once for mmhub + * for ATC add 16 to VMID for mmhub, for IH different registers. + * ATC_VMID0..15 registers are separate from ATC_VMID16..31. + */ + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << vmid))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << vmid); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid, + pasid_mapping); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid, + pasid_mapping); + + while (!(RREG32(SOC15_REG_OFFSET( + ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) & + (1U << (vmid + 16)))) + cpu_relax(); + + WREG32(SOC15_REG_OFFSET(ATHUB, 0, + mmATC_VMID_PASID_MAPPING_UPDATE_STATUS), + 1U << (vmid + 16)); + + /* Mapping vmid to pasid also for IH block */ + WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid, + pasid_mapping); + return 0; +} + +/* TODO - RING0 form of field is obsolete, seems to date back to SI + * but still works + */ + +static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t mec; + uint32_t pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + lock_srbm(kgd, mec, pipe, 0, 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL), + CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); + + unlock_srbm(kgd); + + return 0; +} + +static uint32_t get_sdma_base_addr(struct amdgpu_device *adev, + unsigned int engine_id, + unsigned int queue_id) +{ + uint32_t base[2] = { + SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, + SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL + }; + uint32_t retval; + + retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL - + mmSDMA0_RLC0_RB_CNTL); + + pr_debug("sdma base address: 0x%x\n", retval); + + return retval; +} + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr, + uint32_t wptr_shift, uint32_t wptr_mask, + struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_mqd *m; + uint32_t *mqd_hqd; + uint32_t reg, hqd_base, data; + + m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + /* HIQ is set during driver init period with vmid set to 0*/ + if (m->cp_hqd_vmid == 0) { + uint32_t value, mec, pipe; + + mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1; + pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec); + + pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n", + mec, pipe, queue_id); + value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS)); + value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1, + ((mec << 5) | (pipe << 3) | queue_id | 0x80)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value); + } + + /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */ + mqd_hqd = &m->cp_mqd_base_addr_lo; + hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + + for (reg = hqd_base; + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + WREG32(reg, mqd_hqd[reg - hqd_base]); + + + /* Activate doorbell logic before triggering WPTR poll. */ + data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control, + CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data); + + if (wptr) { + /* Don't read wptr with get_user because the user + * context may not be accessible (if this function + * runs in a work queue). Instead trigger a one-shot + * polling read from memory in the CP. This assumes + * that wptr is GPU-accessible in the queue's VMID via + * ATC or SVM. WPTR==RPTR before starting the poll so + * the CP starts fetching new commands from the right + * place. + * + * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit + * tricky. Assume that the queue didn't overflow. The + * number of valid bits in the 32-bit RPTR depends on + * the queue size. The remaining bits are taken from + * the saved 64-bit WPTR. If the WPTR wrapped, add the + * queue size. + */ + uint32_t queue_size = + 2 << REG_GET_FIELD(m->cp_hqd_pq_control, + CP_HQD_PQ_CONTROL, QUEUE_SIZE); + uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1); + + if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr) + guessed_wptr += queue_size; + guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1); + guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32; + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO), + lower_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI), + upper_32_bits(guessed_wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR), + lower_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI), + upper_32_bits((uint64_t)wptr)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1), + get_queue_mask(adev, pipe_id, queue_id)); + } + + /* Start the EOP fetcher */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR), + REG_SET_FIELD(m->cp_hqd_eop_rptr, + CP_HQD_EOP_RPTR, INIT_FETCHER, 1)); + + data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data); + + release_queue(kgd); + + return 0; +} + +static int kgd_hqd_dump(struct kgd_dev *kgd, + uint32_t pipe_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t i = 0, reg; +#define HQD_N_REGS 56 +#define DUMP_REG(addr) do { \ + if (WARN_ON_ONCE(i >= HQD_N_REGS)) \ + break; \ + (*dump)[i][0] = (addr) << 2; \ + (*dump)[i++][1] = RREG32(addr); \ + } while (0) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + acquire_queue(kgd, pipe_id, queue_id); + + for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR); + reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++) + DUMP_REG(reg); + + release_queue(kgd); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, + uint32_t __user *wptr, struct mm_struct *mm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr, sdmax_gfx_context_cntl; + unsigned long end_jiffies; + uint32_t data; + uint64_t data64; + uint64_t __user *wptr64 = (uint64_t __user *)wptr; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + sdmax_gfx_context_cntl = m->sdma_engine_id ? + SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) : + SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)); + + end_jiffies = msecs_to_jiffies(2000) + jiffies; + while (true) { + data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + data = RREG32(sdmax_gfx_context_cntl); + data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL, + RESUME_CTX, 0); + WREG32(sdmax_gfx_context_cntl, data); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET, + m->sdmax_rlcx_doorbell_offset); + + data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL, + ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1); + if (read_user_wptr(mm, wptr64, data64)) { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + lower_32_bits(data64)); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + upper_32_bits(data64)); + } else { + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR, + m->sdmax_rlcx_rb_rptr); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI, + m->sdmax_rlcx_rb_rptr_hi); + } + WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0); + + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI, + m->sdmax_rlcx_rb_base_hi); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO, + m->sdmax_rlcx_rb_rptr_addr_lo); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI, + m->sdmax_rlcx_rb_rptr_addr_hi); + + data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL, + RB_ENABLE, 1); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data); + + return 0; +} + +static int kgd_hqd_sdma_dump(struct kgd_dev *kgd, + uint32_t engine_id, uint32_t queue_id, + uint32_t (**dump)[2], uint32_t *n_regs) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id); + uint32_t i = 0, reg; +#undef HQD_N_REGS +#define HQD_N_REGS (19+6+7+10) + + *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL); + if (*dump == NULL) + return -ENOMEM; + + for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN; + reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++) + DUMP_REG(sdma_base_addr + reg); + for (reg = mmSDMA0_RLC0_MIDCMD_DATA0; + reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++) + DUMP_REG(sdma_base_addr + reg); + + WARN_ON_ONCE(i != HQD_N_REGS); + *n_regs = i; + + return 0; +} + +static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) && + high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI))) + retval = true; + } + release_queue(kgd); + return retval; +} + +static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t sdma_rlc_rb_cntl; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + + if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK) + return true; + + return false; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd, + enum kfd_preempt_type reset_type, + unsigned int utimeout, uint32_t pipe_id, + uint32_t queue_id) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + enum hqd_dequeue_request_type type; + unsigned long end_jiffies; + uint32_t temp; + struct v9_mqd *m = get_mqd(mqd); + + acquire_queue(kgd, pipe_id, queue_id); + + if (m->cp_hqd_vmid == 0) + WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0); + + switch (reset_type) { + case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN: + type = DRAIN_PIPE; + break; + case KFD_PREEMPT_TYPE_WAVEFRONT_RESET: + type = RESET_WAVES; + break; + default: + type = DRAIN_PIPE; + break; + } + + WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type); + + end_jiffies = (utimeout * HZ / 1000) + jiffies; + while (true) { + temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE)); + if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK)) + break; + if (time_after(jiffies, end_jiffies)) { + pr_err("cp queue preemption time out.\n"); + release_queue(kgd); + return -ETIME; + } + usleep_range(500, 1000); + } + + release_queue(kgd); + return 0; +} + +static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, + unsigned int utimeout) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct v9_sdma_mqd *m; + uint32_t sdma_base_addr; + uint32_t temp; + unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies; + + m = get_sdma_mqd(mqd); + sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id, + m->sdma_queue_id); + + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL); + temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK; + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp); + + while (true) { + temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS); + if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK) + break; + if (time_after(jiffies, end_jiffies)) + return -ETIME; + usleep_range(500, 1000); + } + + WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0); + WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) | + SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK); + + m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR); + m->sdmax_rlcx_rb_rptr_hi = + RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI); + + return 0; +} + +static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK; +} + +static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, + uint8_t vmid) +{ + uint32_t reg; + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + + vmid); + return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK; +} + +static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + uint32_t req = (1 << vmid) | + (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | + VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; + + mutex_lock(&adev->srbm_mutex); + + /* Use legacy mode tlb invalidation. + * + * Currently on Raven the code below is broken for anything but + * legacy mode due to a MMHUB power gating problem. A workaround + * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ + * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack + * bit. + * + * TODO 1: agree on the right set of invalidation registers for + * KFD use. Use the last one for now. Invalidate both GC and + * MMHUB. + * + * TODO 2: support range-based invalidation, requires kfg2kgd + * interface change + */ + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), + 0xffffffff); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), + 0x0000001f); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), + req); + + while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, + mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & + (1 << vmid))) + cpu_relax(); + + mutex_unlock(&adev->srbm_mutex); + +} + +static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + spin_lock(&adev->gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(1) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */ + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + +static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int vmid; + struct amdgpu_ring *ring = &adev->gfx.kiq.ring; + + if (ring->ready) + return invalidate_tlbs_with_kiq(adev, pasid); + + for (vmid = 0; vmid < 16; vmid++) { + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) + continue; + if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { + if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid) + == pasid) { + write_vmid_invalidate_request(kgd, vmid); + break; + } + } + } + + return 0; +} + +static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("non kfd vmid %d\n", vmid); + return 0; + } + + write_vmid_invalidate_request(kgd, vmid); + return 0; +} + +static int kgd_address_watch_disable(struct kgd_dev *kgd) +{ + return 0; +} + +static int kgd_address_watch_execute(struct kgd_dev *kgd, + unsigned int watch_point_id, + uint32_t cntl_val, + uint32_t addr_hi, + uint32_t addr_lo) +{ + return 0; +} + +static int kgd_wave_control_execute(struct kgd_dev *kgd, + uint32_t gfx_index_val, + uint32_t sq_cmd) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint32_t data = 0; + + mutex_lock(&adev->grbm_idx_mutex); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd); + + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + INSTANCE_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SH_BROADCAST_WRITES, 1); + data = REG_SET_FIELD(data, GRBM_GFX_INDEX, + SE_BROADCAST_WRITES, 1); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data); + mutex_unlock(&adev->grbm_idx_mutex); + + return 0; +} + +static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd, + unsigned int watch_point_id, + unsigned int reg_offset) +{ + return 0; +} + +static void set_scratch_backing_va(struct kgd_dev *kgd, + uint64_t va, uint32_t vmid) +{ + /* No longer needed on GFXv9. The scratch base address is + * passed to the shader by the CP. It's the user mode driver's + * responsibility. + */ +} + +/* FIXME: Does this need to be ASIC-specific code? */ +static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) +{ + struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + const union amdgpu_firmware_header *hdr; + + switch (type) { + case KGD_ENGINE_PFP: + hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data; + break; + + case KGD_ENGINE_ME: + hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data; + break; + + case KGD_ENGINE_CE: + hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data; + break; + + case KGD_ENGINE_MEC1: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data; + break; + + case KGD_ENGINE_MEC2: + hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data; + break; + + case KGD_ENGINE_RLC: + hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data; + break; + + case KGD_ENGINE_SDMA1: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data; + break; + + case KGD_ENGINE_SDMA2: + hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data; + break; + + default: + return 0; + } + + if (hdr == NULL) + return 0; + + /* Only 12 bit in use*/ + return hdr->common.ucode_version; +} + +static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint32_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | + AMDGPU_PTE_VALID; + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + /* TODO: take advantage of per-process address space size. For + * now, all processes share the same address space size, like + * on GFX8 and older. + */ + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), + lower_32_bits(adev->vm_manager.max_pfn - 1)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), + upper_32_bits(adev->vm_manager.max_pfn - 1)); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); + WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c1b0cdb..72ab2b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -23,6 +23,7 @@ #define pr_fmt(fmt) "kfd2kgd: " fmt #include <linux/list.h> +#include <linux/sched/mm.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -33,10 +34,20 @@ */ #define VI_BO_SIZE_ALIGN (0x8000) +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) + +/* Userptr restore delay, just long enough to allow consecutive VM + * changes to accumulate + */ +#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; + uint64_t max_userptr_mem_limit; int64_t system_mem_used; + int64_t userptr_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = { #define domain_string(domain) domain_bit_to_string[ffs(domain)-1] +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work); static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) @@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, /* Set memory usage limits. Current, limits are * System (kernel) memory - 3/8th System RAM + * Userptr memory - 3/4th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - pr_debug("Kernel memory limit %lluM\n", - (kfd_mem_limit.max_system_mem_limit >> 20)); + kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); + pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + (kfd_mem_limit.max_system_mem_limit >> 20), + (kfd_mem_limit.max_userptr_mem_limit >> 20)); } static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, @@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, goto err_no_mem; } kfd_mem_limit.system_mem_used += (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + if ((kfd_mem_limit.system_mem_used + acc_size > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.userptr_mem_used + (size + acc_size) > + kfd_mem_limit.max_userptr_mem_limit)) { + ret = -ENOMEM; + goto err_no_mem; + } + kfd_mem_limit.system_mem_used += acc_size; + kfd_mem_limit.userptr_mem_used += size; } err_no_mem: spin_unlock(&kfd_mem_limit.mem_limit_lock); @@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, sizeof(struct amdgpu_bo)); spin_lock(&kfd_mem_limit.mem_limit_lock); - if (domain == AMDGPU_GEM_DOMAIN_GTT) + if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.system_mem_used -= acc_size; + kfd_mem_limit.userptr_mem_used -= size; + } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) { spin_lock(&kfd_mem_limit.mem_limit_lock); - if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { + if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; + kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); + } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (bo->tbo.acc_size + amdgpu_bo_size(bo)); } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, + "kfd userptr memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } @@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev, } static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, - struct amdkfd_process_info *process_info) + struct amdkfd_process_info *process_info, + bool userptr) { struct ttm_validate_buffer *entry = &mem->validate_list; struct amdgpu_bo *bo = mem->bo; @@ -515,10 +552,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, entry->shared = true; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); - list_add_tail(&entry->head, &process_info->kfd_bo_list); + if (userptr) + list_add_tail(&entry->head, &process_info->userptr_valid_list); + else + list_add_tail(&entry->head, &process_info->kfd_bo_list); mutex_unlock(&process_info->lock); } +/* Initializes user pages. It registers the MMU notifier and validates + * the userptr BO in the GTT domain. + * + * The BO must already be on the userptr_valid_list. Otherwise an + * eviction and restore may happen that leaves the new BO unmapped + * with the user mode queues running. + * + * Takes the process_info->lock to protect against concurrent restore + * workers. + * + * Returns 0 for success, negative errno for errors. + */ +static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm, + uint64_t user_addr) +{ + struct amdkfd_process_info *process_info = mem->process_info; + struct amdgpu_bo *bo = mem->bo; + struct ttm_operation_ctx ctx = { true, false }; + int ret = 0; + + mutex_lock(&process_info->lock); + + ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0); + if (ret) { + pr_err("%s: Failed to set userptr: %d\n", __func__, ret); + goto out; + } + + ret = amdgpu_mn_register(bo, user_addr); + if (ret) { + pr_err("%s: Failed to register MMU notifier: %d\n", + __func__, ret); + goto out; + } + + /* If no restore worker is running concurrently, user_pages + * should not be allocated + */ + WARN(mem->user_pages, "Leaking user_pages array"); + + mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", __func__); + ret = -ENOMEM; + goto unregister_out; + } + + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages); + if (ret) { + pr_err("%s: Failed to get user pages: %d\n", __func__, ret); + goto free_out; + } + + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages); + + ret = amdgpu_bo_reserve(bo, true); + if (ret) { + pr_err("%s: Failed to reserve BO\n", __func__); + goto release_out; + } + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + pr_err("%s: failed to validate BO\n", __func__); + amdgpu_bo_unreserve(bo); + +release_out: + if (ret) + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); +free_out: + kvfree(mem->user_pages); + mem->user_pages = NULL; +unregister_out: + if (ret) + amdgpu_mn_unregister(bo); +out: + mutex_unlock(&process_info->lock); + return ret; +} + /* Reserving a BO and its page table BOs must happen atomically to * avoid deadlocks. Some operations update multiple VMs at once. Track * all the reservation info in a context structure. Optionally a sync @@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, } static int map_bo_to_gpuvm(struct amdgpu_device *adev, - struct kfd_bo_va_list *entry, struct amdgpu_sync *sync) + struct kfd_bo_va_list *entry, struct amdgpu_sync *sync, + bool no_update_pte) { int ret; @@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev, return ret; } + if (no_update_pte) + return 0; + ret = update_gpuvm_pte(adev, entry, sync); if (ret) { pr_err("update_gpuvm_pte() failed\n"); @@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, mutex_init(&info->lock); INIT_LIST_HEAD(&info->vm_list_head); INIT_LIST_HEAD(&info->kfd_bo_list); + INIT_LIST_HEAD(&info->userptr_valid_list); + INIT_LIST_HEAD(&info->userptr_inval_list); info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), @@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, goto create_evict_fence_fail; } + info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); + atomic_set(&info->evicted_bos, 0); + INIT_DELAYED_WORK(&info->restore_userptr_work, + amdgpu_amdkfd_restore_userptr_worker); + *process_info = info; *ef = dma_fence_get(&info->eviction_fence->base); } @@ -872,6 +1005,7 @@ reserve_pd_fail: dma_fence_put(*ef); *ef = NULL; *process_info = NULL; + put_pid(info->pid); create_evict_fence_fail: mutex_destroy(&info->lock); kfree(info); @@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, /* Release per-process resources when last compute VM is destroyed */ if (!process_info->n_vms) { WARN_ON(!list_empty(&process_info->kfd_bo_list)); + WARN_ON(!list_empty(&process_info->userptr_valid_list)); + WARN_ON(!list_empty(&process_info->userptr_inval_list)); dma_fence_put(&process_info->eviction_fence->base); + cancel_delayed_work_sync(&process_info->restore_userptr_work); + put_pid(process_info->pid); mutex_destroy(&process_info->lock); kfree(process_info); } @@ -1003,10 +1141,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; int byte_align; - u32 alloc_domain; + u32 domain, alloc_domain; u64 alloc_flags; uint32_t mapping_flags; int ret; @@ -1015,14 +1154,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( * Check on which domain to allocate BO */ if (flags & ALLOC_MEM_FLAGS_VRAM) { - alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED; alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; } else if (flags & ALLOC_MEM_FLAGS_GTT) { - alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_flags = 0; + } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; + if (!offset || !*offset) + return -EINVAL; + user_addr = *offset; } else { return -EINVAL; } @@ -1085,18 +1231,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( } bo->kfd_bo = *mem; (*mem)->bo = bo; + if (user_addr) + bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; (*mem)->va = va; - (*mem)->domain = alloc_domain; + (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info); + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); + + if (user_addr) { + ret = init_user_pages(*mem, current->mm, user_addr); + if (ret) { + mutex_lock(&avm->process_info->lock); + list_del(&(*mem)->validate_list.head); + mutex_unlock(&avm->process_info->lock); + goto allocate_init_user_pages_failed; + } + } if (offset) *offset = amdgpu_bo_mmap_offset(bo); return 0; +allocate_init_user_pages_failed: + amdgpu_bo_unref(&bo); + /* Don't unreserve system mem limit twice */ + goto err_reserve_system_mem; err_bo_create: unreserve_system_mem_limit(adev, size, alloc_domain); err_reserve_system_mem: @@ -1129,12 +1291,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( * be freed anyway */ + /* No more MMU notifiers */ + amdgpu_mn_unregister(mem->bo); + /* Make sure restore workers don't access the BO any more */ bo_list_entry = &mem->validate_list; mutex_lock(&process_info->lock); list_del(&bo_list_entry->head); mutex_unlock(&process_info->lock); + /* Free user pages if necessary */ + if (mem->user_pages) { + pr_debug("%s: Freeing user_pages array\n", __func__); + if (mem->user_pages[0]) + release_pages(mem->user_pages, + mem->bo->tbo.ttm->num_pages); + kvfree(mem->user_pages); + } + ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx); if (unlikely(ret)) return ret; @@ -1180,21 +1354,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( struct kfd_bo_va_list *bo_va_entry = NULL; struct kfd_bo_va_list *bo_va_entry_aql = NULL; unsigned long bo_size; - - /* Make sure restore is not running concurrently. - */ - mutex_lock(&mem->process_info->lock); - - mutex_lock(&mem->lock); + bool is_invalid_userptr = false; bo = mem->bo; - if (!bo) { pr_err("Invalid BO when mapping memory to GPU\n"); - ret = -EINVAL; - goto out; + return -EINVAL; } + /* Make sure restore is not running concurrently. Since we + * don't map invalid userptr BOs, we rely on the next restore + * worker to do the mapping + */ + mutex_lock(&mem->process_info->lock); + + /* Lock mmap-sem. If we find an invalid userptr BO, we can be + * sure that the MMU notifier is no longer running + * concurrently and the queues are actually stopped + */ + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { + down_write(¤t->mm->mmap_sem); + is_invalid_userptr = atomic_read(&mem->invalid); + up_write(¤t->mm->mmap_sem); + } + + mutex_lock(&mem->lock); + domain = mem->domain; bo_size = bo->tbo.mem.size; @@ -1207,6 +1392,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( if (unlikely(ret)) goto out; + /* Userptr can be marked as "not invalid", but not actually be + * validated yet (still in the system domain). In that case + * the queues are still stopped and we can leave mapping for + * the next restore worker + */ + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + is_invalid_userptr = true; + if (check_if_add_bo_to_vm(avm, mem)) { ret = add_bo_to_vm(adev, mem, avm, false, &bo_va_entry); @@ -1224,7 +1417,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto add_bo_to_vm_failed; } - if (mem->mapped_to_gpu_memory == 0) { + if (mem->mapped_to_gpu_memory == 0 && + !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { /* Validate BO only once. The eviction fence gets added to BO * the first time it is mapped. Validate will wait for all * background evictions to complete. @@ -1242,7 +1436,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( entry->va, entry->va + bo_size, entry); - ret = map_bo_to_gpuvm(adev, entry, ctx.sync); + ret = map_bo_to_gpuvm(adev, entry, ctx.sync, + is_invalid_userptr); if (ret) { pr_err("Failed to map radeon bo to gpuvm\n"); goto map_bo_to_gpuvm_failed; @@ -1425,6 +1620,337 @@ bo_reserve_failed: return ret; } +/* Evict a userptr BO by stopping the queues if necessary + * + * Runs in MMU notifier, may be in RECLAIM_FS context. This means it + * cannot do any memory allocations, and cannot take any locks that + * are held elsewhere while allocating memory. Therefore this is as + * simple as possible, using atomic counters. + * + * It doesn't do anything to the BO itself. The real work happens in + * restore, where we get updated page addresses. This function only + * ensures that GPU access to the BO is stopped. + */ +int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, + struct mm_struct *mm) +{ + struct amdkfd_process_info *process_info = mem->process_info; + int invalid, evicted_bos; + int r = 0; + + invalid = atomic_inc_return(&mem->invalid); + evicted_bos = atomic_inc_return(&process_info->evicted_bos); + if (evicted_bos == 1) { + /* First eviction, stop the queues */ + r = kgd2kfd->quiesce_mm(mm); + if (r) + pr_err("Failed to quiesce KFD\n"); + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + } + + return r; +} + +/* Update invalid userptr BOs + * + * Moves invalidated (evicted) userptr BOs from userptr_valid_list to + * userptr_inval_list and updates user pages for all BOs that have + * been invalidated since their last update. + */ +static int update_invalid_user_pages(struct amdkfd_process_info *process_info, + struct mm_struct *mm) +{ + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int invalid, ret; + + /* Move all invalidated BOs to the userptr_inval_list and + * release their user pages by migration to the CPU domain + */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_valid_list, + validate_list.head) { + if (!atomic_read(&mem->invalid)) + continue; /* BO is still valid */ + + bo = mem->bo; + + if (amdgpu_bo_reserve(bo, true)) + return -EAGAIN; + amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + amdgpu_bo_unreserve(bo); + if (ret) { + pr_err("%s: Failed to invalidate userptr BO\n", + __func__); + return -EAGAIN; + } + + list_move_tail(&mem->validate_list.head, + &process_info->userptr_inval_list); + } + + if (list_empty(&process_info->userptr_inval_list)) + return 0; /* All evicted userptr BOs were freed */ + + /* Go through userptr_inval_list and update any invalid user_pages */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + invalid = atomic_read(&mem->invalid); + if (!invalid) + /* BO hasn't been invalidated since the last + * revalidation attempt. Keep its BO list. + */ + continue; + + bo = mem->bo; + + if (!mem->user_pages) { + mem->user_pages = + kvmalloc_array(bo->tbo.ttm->num_pages, + sizeof(struct page *), + GFP_KERNEL | __GFP_ZERO); + if (!mem->user_pages) { + pr_err("%s: Failed to allocate pages array\n", + __func__); + return -ENOMEM; + } + } else if (mem->user_pages[0]) { + release_pages(mem->user_pages, bo->tbo.ttm->num_pages); + } + + /* Get updated user pages */ + ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, + mem->user_pages); + if (ret) { + mem->user_pages[0] = NULL; + pr_info("%s: Failed to get user pages: %d\n", + __func__, ret); + /* Pretend it succeeded. It will fail later + * with a VM fault if the GPU tries to access + * it. Better than hanging indefinitely with + * stalled user mode queues. + */ + } + + /* Mark the BO as valid unless it was invalidated + * again concurrently + */ + if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) + return -EAGAIN; + } + + return 0; +} + +/* Validate invalid userptr BOs + * + * Validates BOs on the userptr_inval_list, and moves them back to the + * userptr_valid_list. Also updates GPUVM page tables with new page + * addresses and waits for the page table updates to complete. + */ +static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) +{ + struct amdgpu_bo_list_entry *pd_bo_list_entries; + struct list_head resv_list, duplicates; + struct ww_acquire_ctx ticket; + struct amdgpu_sync sync; + + struct amdgpu_vm *peer_vm; + struct kgd_mem *mem, *tmp_mem; + struct amdgpu_bo *bo; + struct ttm_operation_ctx ctx = { false, false }; + int i, ret; + + pd_bo_list_entries = kcalloc(process_info->n_vms, + sizeof(struct amdgpu_bo_list_entry), + GFP_KERNEL); + if (!pd_bo_list_entries) { + pr_err("%s: Failed to allocate PD BO list entries\n", __func__); + return -ENOMEM; + } + + INIT_LIST_HEAD(&resv_list); + INIT_LIST_HEAD(&duplicates); + + /* Get all the page directory BOs that need to be reserved */ + i = 0; + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_vm_get_pd_bo(peer_vm, &resv_list, + &pd_bo_list_entries[i++]); + /* Add the userptr_inval_list entries to resv_list */ + list_for_each_entry(mem, &process_info->userptr_inval_list, + validate_list.head) { + list_add_tail(&mem->resv_list.head, &resv_list); + mem->resv_list.bo = mem->validate_list.bo; + mem->resv_list.shared = mem->validate_list.shared; + } + + /* Reserve all BOs and page tables for validation */ + ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates); + WARN(!list_empty(&duplicates), "Duplicates should be empty"); + if (ret) + goto out; + + amdgpu_sync_create(&sync); + + /* Avoid triggering eviction fences when unmapping invalid + * userptr BOs (waits for all fences, doesn't use + * FENCE_OWNER_VM) + */ + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo, + process_info->eviction_fence, + NULL, NULL); + + ret = process_validate_vms(process_info); + if (ret) + goto unreserve_out; + + /* Validate BOs and update GPUVM page tables */ + list_for_each_entry_safe(mem, tmp_mem, + &process_info->userptr_inval_list, + validate_list.head) { + struct kfd_bo_va_list *bo_va_entry; + + bo = mem->bo; + + /* Copy pages array and validate the BO if we got user pages */ + if (mem->user_pages[0]) { + amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, + mem->user_pages); + amdgpu_ttm_placement_from_domain(bo, mem->domain); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) { + pr_err("%s: failed to validate BO\n", __func__); + goto unreserve_out; + } + } + + /* Validate succeeded, now the BO owns the pages, free + * our copy of the pointer array. Put this BO back on + * the userptr_valid_list. If we need to revalidate + * it, we need to start from scratch. + */ + kvfree(mem->user_pages); + mem->user_pages = NULL; + list_move_tail(&mem->validate_list.head, + &process_info->userptr_valid_list); + + /* Update mapping. If the BO was not validated + * (because we couldn't get user pages), this will + * clear the page table entries, which will result in + * VM faults if the GPU tries to access the invalid + * memory. + */ + list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { + if (!bo_va_entry->is_mapped) + continue; + + ret = update_gpuvm_pte((struct amdgpu_device *) + bo_va_entry->kgd_dev, + bo_va_entry, &sync); + if (ret) { + pr_err("%s: update PTE failed\n", __func__); + /* make sure this gets validated again */ + atomic_inc(&mem->invalid); + goto unreserve_out; + } + } + } + + /* Update page directories */ + ret = process_update_pds(process_info, &sync); + +unreserve_out: + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) + amdgpu_bo_fence(peer_vm->root.base.bo, + &process_info->eviction_fence->base, true); + ttm_eu_backoff_reservation(&ticket, &resv_list); + amdgpu_sync_wait(&sync, false); + amdgpu_sync_free(&sync); +out: + kfree(pd_bo_list_entries); + + return ret; +} + +/* Worker callback to restore evicted userptr BOs + * + * Tries to update and validate all userptr BOs. If successful and no + * concurrent evictions happened, the queues are restarted. Otherwise, + * reschedule for another attempt later. + */ +static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info = + container_of(dwork, struct amdkfd_process_info, + restore_userptr_work); + struct task_struct *usertask; + struct mm_struct *mm; + int evicted_bos; + + evicted_bos = atomic_read(&process_info->evicted_bos); + if (!evicted_bos) + return; + + /* Reference task and mm in case of concurrent process termination */ + usertask = get_pid_task(process_info->pid, PIDTYPE_PID); + if (!usertask) + return; + mm = get_task_mm(usertask); + if (!mm) { + put_task_struct(usertask); + return; + } + + mutex_lock(&process_info->lock); + + if (update_invalid_user_pages(process_info, mm)) + goto unlock_out; + /* userptr_inval_list can be empty if all evicted userptr BOs + * have been freed. In that case there is nothing to validate + * and we can just restart the queues. + */ + if (!list_empty(&process_info->userptr_inval_list)) { + if (atomic_read(&process_info->evicted_bos) != evicted_bos) + goto unlock_out; /* Concurrent eviction, try again */ + + if (validate_invalid_user_pages(process_info)) + goto unlock_out; + } + /* Final check for concurrent evicton and atomic update. If + * another eviction happens after successful update, it will + * be a first eviction that calls quiesce_mm. The eviction + * reference counting inside KFD will handle this case. + */ + if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != + evicted_bos) + goto unlock_out; + evicted_bos = 0; + if (kgd2kfd->resume_mm(mm)) { + pr_err("%s: Failed to resume KFD\n", __func__); + /* No recovery from this failure. Probably the CP is + * hanging. No point trying again. + */ + } +unlock_out: + mutex_unlock(&process_info->lock); + mmput(mm); + put_task_struct(usertask); + + /* If validation failed, reschedule another attempt */ + if (evicted_bos) + schedule_delayed_work(&process_info->restore_userptr_work, + msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); +} + /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given * KFD process identified by process_info * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index e1756b6..9c1d491 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -530,7 +530,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, if (p->bo_list) { amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) - p->mn = amdgpu_mn_get(p->adev); + p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); } INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index bd67f4c..83e344f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -36,12 +36,14 @@ #include <drm/drm.h> #include "amdgpu.h" +#include "amdgpu_amdkfd.h" struct amdgpu_mn { /* constant after initialisation */ struct amdgpu_device *adev; struct mm_struct *mm; struct mmu_notifier mn; + enum amdgpu_mn_type type; /* only used on destruction */ struct work_struct work; @@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, } /** - * amdgpu_mn_invalidate_range_start - callback to notify about mm change + * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier * @mn: the mm this callback is about @@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * We block for all BOs between start and end to be idle and * unmap them by move them into system domain again. */ -static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) +static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) { struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; @@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, } /** + * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change + * + * @mn: our notifier + * @mn: the mm this callback is about + * @start: start of updated range + * @end: end of updated range + * + * We temporarily evict all BOs between start and end. This + * necessitates evicting all user-mode queues of the process. The BOs + * are restorted in amdgpu_mn_invalidate_range_end_hsa. + */ +static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, + unsigned long end) +{ + struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn); + struct interval_tree_node *it; + + /* notification is exclusive, but interval is inclusive */ + end -= 1; + + amdgpu_mn_read_lock(rmn); + + it = interval_tree_iter_first(&rmn->objects, start, end); + while (it) { + struct amdgpu_mn_node *node; + struct amdgpu_bo *bo; + + node = container_of(it, struct amdgpu_mn_node, it); + it = interval_tree_iter_next(it, start, end); + + list_for_each_entry(bo, &node->bos, mn_list) { + struct kgd_mem *mem = bo->kfd_bo; + + if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, + start, end)) + amdgpu_amdkfd_evict_userptr(mem, mm); + } + } +} + +/** * amdgpu_mn_invalidate_range_end - callback to notify about mm change * * @mn: our notifier @@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, amdgpu_mn_read_unlock(rmn); } -static const struct mmu_notifier_ops amdgpu_mn_ops = { - .release = amdgpu_mn_release, - .invalidate_range_start = amdgpu_mn_invalidate_range_start, - .invalidate_range_end = amdgpu_mn_invalidate_range_end, +static const struct mmu_notifier_ops amdgpu_mn_ops[] = { + [AMDGPU_MN_TYPE_GFX] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, + [AMDGPU_MN_TYPE_HSA] = { + .release = amdgpu_mn_release, + .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa, + .invalidate_range_end = amdgpu_mn_invalidate_range_end, + }, }; +/* Low bits of any reasonable mm pointer will be unused due to struct + * alignment. Use these bits to make a unique key from the mm pointer + * and notifier type. + */ +#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type)) + /** * amdgpu_mn_get - create notifier context * * @adev: amdgpu device pointer + * @type: type of MMU notifier context * * Creates a notifier context for current->mm. */ -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { struct mm_struct *mm = current->mm; struct amdgpu_mn *rmn; + unsigned long key = AMDGPU_MN_KEY(mm, type); int r; mutex_lock(&adev->mn_lock); @@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) return ERR_PTR(-EINTR); } - hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) - if (rmn->mm == mm) + hash_for_each_possible(adev->mn_hash, rmn, node, key) + if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key) goto release_locks; rmn = kzalloc(sizeof(*rmn), GFP_KERNEL); @@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->adev = adev; rmn->mm = mm; - rmn->mn.ops = &amdgpu_mn_ops; init_rwsem(&rmn->lock); + rmn->type = type; + rmn->mn.ops = &amdgpu_mn_ops[type]; rmn->objects = RB_ROOT_CACHED; mutex_init(&rmn->read_lock); atomic_set(&rmn->recursion, 0); @@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) if (r) goto free_rmn; - hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); + hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type)); release_locks: up_write(&mm->mmap_sem); @@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) { unsigned long end = addr + amdgpu_bo_size(bo) - 1; struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + enum amdgpu_mn_type type = + bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX; struct amdgpu_mn *rmn; - struct amdgpu_mn_node *node = NULL; + struct amdgpu_mn_node *node = NULL, *new_node; struct list_head bos; struct interval_tree_node *it; - rmn = amdgpu_mn_get(adev); + rmn = amdgpu_mn_get(adev, type); if (IS_ERR(rmn)) return PTR_ERR(rmn); + new_node = kmalloc(sizeof(*new_node), GFP_KERNEL); + if (!new_node) + return -ENOMEM; + INIT_LIST_HEAD(&bos); down_write(&rmn->lock); @@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) list_splice(&node->bos, &bos); } - if (!node) { - node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); - if (!node) { - up_write(&rmn->lock); - return -ENOMEM; - } - } + if (!node) + node = new_node; + else + kfree(new_node); bo->mn = rmn; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index d0095a3..eb0f432 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -29,16 +29,23 @@ */ struct amdgpu_mn; +enum amdgpu_mn_type { + AMDGPU_MN_TYPE_GFX, + AMDGPU_MN_TYPE_HSA, +}; + #if defined(CONFIG_MMU_NOTIFIER) void amdgpu_mn_lock(struct amdgpu_mn *mn); void amdgpu_mn_unlock(struct amdgpu_mn *mn); -struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev); +struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type); int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); #else static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {} static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {} -static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) +static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev, + enum amdgpu_mn_type type) { return NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 5e9fd25..69a2b25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -682,7 +682,7 @@ struct amdgpu_ttm_tt { struct ttm_dma_tt ttm; u64 offset; uint64_t userptr; - struct mm_struct *usermm; + struct task_struct *usertask; uint32_t userflags; spinlock_t guptasklock; struct list_head guptasks; @@ -693,14 +693,18 @@ struct amdgpu_ttm_tt { int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + struct mm_struct *mm = gtt->usertask->mm; unsigned int flags = 0; unsigned pinned = 0; int r; + if (!mm) /* Happens during process shutdown */ + return -ESRCH; + if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY)) flags |= FOLL_WRITE; - down_read(¤t->mm->mmap_sem); + down_read(&mm->mmap_sem); if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only use anonymous memory @@ -708,9 +712,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE; struct vm_area_struct *vma; - vma = find_vma(gtt->usermm, gtt->userptr); + vma = find_vma(mm, gtt->userptr); if (!vma || vma->vm_file || vma->vm_end < end) { - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return -EPERM; } } @@ -726,7 +730,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) list_add(&guptask.list, >t->guptasks); spin_unlock(>t->guptasklock); - r = get_user_pages(userptr, num_pages, flags, p, NULL); + if (mm == current->mm) + r = get_user_pages(userptr, num_pages, flags, p, NULL); + else + r = get_user_pages_remote(gtt->usertask, + mm, userptr, num_pages, + flags, p, NULL, NULL); spin_lock(>t->guptasklock); list_del(&guptask.list); @@ -739,12 +748,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages) } while (pinned < ttm->num_pages); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return 0; release_pages: release_pages(pages, pinned); - up_read(¤t->mm->mmap_sem); + up_read(&mm->mmap_sem); return r; } @@ -999,6 +1008,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + if (gtt->usertask) + put_task_struct(gtt->usertask); + ttm_dma_tt_fini(>t->ttm); kfree(gtt); } @@ -1100,8 +1112,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return -EINVAL; gtt->userptr = addr; - gtt->usermm = current->mm; gtt->userflags = flags; + + if (gtt->usertask) + put_task_struct(gtt->usertask); + gtt->usertask = current->group_leader; + get_task_struct(gtt->usertask); + spin_lock_init(>t->guptasklock); INIT_LIST_HEAD(>t->guptasks); atomic_set(>t->mmu_invalidations, 0); @@ -1117,7 +1134,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) if (gtt == NULL) return NULL; - return gtt->usermm; + if (gtt->usertask == NULL) + return NULL; + + return gtt->usertask->mm; } bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index ee71c40..75592bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -308,7 +308,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) else return AMDGPU_FW_LOAD_PSP; default: - DRM_ERROR("Unknow firmware load type\n"); + DRM_ERROR("Unknown firmware load type\n"); } return AMDGPU_FW_LOAD_DIRECT; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2c5e2a4..fc19118 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4789,6 +4789,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + cu_info->simd_per_cu = NUM_SIMD_PER_CU; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 839a144..8dc2910 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -269,6 +269,11 @@ * x=1: tmz_end */ +#define PACKET3_INVALIDATE_TLBS 0x98 +# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) +# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) +# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_SET_RESOURCES 0xA0 /* 1. header * 2. CONTROL diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 8fd1b74..8fd1b74 100755..100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index ed2f06c..3858820 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -6,5 +6,6 @@ config HSA_AMD tristate "HSA kernel driver for AMD GPU devices" depends on DRM_AMDGPU && X86_64 imply AMD_IOMMU_V2 + select MMU_NOTIFIER help Enable this if you want to use HSA features on AMD GPU devices. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 0d02422..ffd096f 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \ kfd_process.o kfd_queue.o kfd_mqd_manager.o \ kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \ + kfd_mqd_manager_v9.o \ kfd_kernel_queue.o kfd_kernel_queue_cik.o \ - kfd_kernel_queue_vi.o kfd_packet_manager.o \ - kfd_process_queue_manager.o kfd_device_queue_manager.o \ - kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \ + kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \ + kfd_packet_manager.o kfd_process_queue_manager.o \ + kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \ + kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \ kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ - kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o + kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) amdkfd-y += kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index 3d5ccb3..49df6c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c @@ -27,18 +27,28 @@ static bool cik_event_interrupt_isr(struct kfd_dev *dev, const uint32_t *ih_ring_entry) { - unsigned int pasid; const struct cik_ih_ring_entry *ihre = (const struct cik_ih_ring_entry *)ih_ring_entry; + unsigned int vmid, pasid; + + /* Only handle interrupts from KFD VMIDs */ + vmid = (ihre->ring_id & 0x0000ff00) >> 8; + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + /* If there is no valid PASID, it's likely a firmware bug */ pasid = (ihre->ring_id & 0xffff0000) >> 16; + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; - /* Do not process in ISR, just request it to be forwarded to WQ. */ - return (pasid != 0) && - (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE || ihre->source_id == CIK_INTSRC_SDMA_TRAP || ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG || - ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE); + ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE; } static void cik_event_interrupt_wq(struct kfd_dev *dev, diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h index 48769d1..37ce6dd 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h +++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h @@ -33,7 +33,8 @@ #define APE1_MTYPE(x) ((x) << 7) /* valid for both DEFAULT_MTYPE and APE1_MTYPE */ -#define MTYPE_CACHED 0 +#define MTYPE_CACHED_NV 0 +#define MTYPE_CACHED 1 #define MTYPE_NONCACHED 3 #define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h new file mode 100644 index 0000000..f68aef0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -0,0 +1,560 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +static const uint32_t cwsr_trap_gfx8_hex[] = { + 0xbf820001, 0xbf820125, + 0xb8f4f802, 0x89748674, + 0xb8f5f803, 0x8675ff75, + 0x00000400, 0xbf850011, + 0xc00a1e37, 0x00000000, + 0xbf8c007f, 0x87777978, + 0xbf840002, 0xb974f802, + 0xbe801d78, 0xb8f5f803, + 0x8675ff75, 0x000001ff, + 0xbf850002, 0x80708470, + 0x82718071, 0x8671ff71, + 0x0000ffff, 0xb974f802, + 0xbe801f70, 0xb8f5f803, + 0x8675ff75, 0x00000100, + 0xbf840006, 0xbefa0080, + 0xb97a0203, 0x8671ff71, + 0x0000ffff, 0x80f08870, + 0x82f18071, 0xbefa0080, + 0xb97a0283, 0xbef60068, + 0xbef70069, 0xb8fa1c07, + 0x8e7a9c7a, 0x87717a71, + 0xb8fa03c7, 0x8e7a9b7a, + 0x87717a71, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbef2007e, + 0xbef3007f, 0xbefe0180, + 0xbf900004, 0x877a8474, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x877b7a7b, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x877b7a7b, 0xbeef007c, + 0xbeee0080, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611c7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cbc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611cfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611d3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xb8f5f803, + 0xbefe007c, 0xbefc006e, + 0xc0611d7c, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dbc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xbefe007c, 0xbefc006e, + 0xc0611dfc, 0x0000007c, + 0x806e846e, 0xbefc007e, + 0xb8eff801, 0xbefe007c, + 0xbefc006e, 0xc0611bfc, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b3c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0xbefe007c, + 0xbefc006e, 0xc0611b7c, + 0x0000007c, 0x806e846e, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbef30080, + 0x8773737a, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8f51605, 0x80758175, + 0x8e758475, 0x8e7a8275, + 0xbefa00ff, 0x01000000, + 0xbef60178, 0x80786e78, + 0x82798079, 0xbefc0080, + 0xbe802b00, 0xbe822b02, + 0xbe842b04, 0xbe862b06, + 0xbe882b08, 0xbe8a2b0a, + 0xbe8c2b0c, 0xbe8e2b0e, + 0xc06b003c, 0x00000000, + 0xc06b013c, 0x00000010, + 0xc06b023c, 0x00000020, + 0xc06b033c, 0x00000030, + 0x8078c078, 0x82798079, + 0x807c907c, 0xbf0a757c, + 0xbf85ffeb, 0xbef80176, + 0xbeee0080, 0xbefe00c1, + 0xbeff00c1, 0xbefa00ff, + 0x01000000, 0xe0724000, + 0x6e1e0000, 0xe0724100, + 0x6e1e0100, 0xe0724200, + 0x6e1e0200, 0xe0724300, + 0x6e1e0300, 0xbefe00c1, + 0xbeff00c1, 0xb8f54306, + 0x8675c175, 0xbf84002c, + 0xbf8a0000, 0x867aff73, + 0x04000000, 0xbf840028, + 0x8e758675, 0x8e758275, + 0xbefa0075, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x806e7a6e, + 0x806eff6e, 0x00000080, + 0xbefa00ff, 0x01000000, + 0xbefc0080, 0xd28c0002, + 0x000100c1, 0xd28d0003, + 0x000204c1, 0xd1060002, + 0x00011103, 0x7e0602ff, + 0x00000200, 0xbefc00ff, + 0x00010000, 0xbe80007b, + 0x867bff7b, 0xff7fffff, + 0x877bff7b, 0x00058000, + 0xd8ec0000, 0x00000002, + 0xbf8c007f, 0xe0765000, + 0x6e1e0002, 0x32040702, + 0xd0c9006a, 0x0000eb02, + 0xbf87fff7, 0xbefb0000, + 0xbeee00ff, 0x00000400, + 0xbefe00c1, 0xbeff00c1, + 0xb8f52a05, 0x80758175, + 0x8e758275, 0x8e7a8875, + 0xbefa00ff, 0x01000000, + 0xbefc0084, 0xbf0a757c, + 0xbf840015, 0xbf11017c, + 0x8075ff75, 0x00001000, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0xe0724000, 0x6e1e0000, + 0xe0724100, 0x6e1e0100, + 0xe0724200, 0x6e1e0200, + 0xe0724300, 0x6e1e0300, + 0x807c847c, 0x806eff6e, + 0x00000400, 0xbf0a757c, + 0xbf85ffef, 0xbf9c0000, + 0xbf8200ca, 0xbef8007e, + 0x8679ff7f, 0x0000ffff, + 0x8779ff79, 0x00040000, + 0xbefa0080, 0xbefb00ff, + 0x00807fac, 0x8676ff7f, + 0x08000000, 0x8f768376, + 0x877b767b, 0x8676ff7f, + 0x70000000, 0x8f768176, + 0x877b767b, 0x8676ff7f, + 0x04000000, 0xbf84001e, + 0xbefe00c1, 0xbeff00c1, + 0xb8f34306, 0x8673c173, + 0xbf840019, 0x8e738673, + 0x8e738273, 0xbefa0073, + 0xb8f22a05, 0x80728172, + 0x8e728a72, 0xb8f61605, + 0x80768176, 0x8e768676, + 0x80727672, 0x8072ff72, + 0x00000080, 0xbefa00ff, + 0x01000000, 0xbefc0080, + 0xe0510000, 0x721e0000, + 0xe0510100, 0x721e0000, + 0x807cff7c, 0x00000200, + 0x8072ff72, 0x00000200, + 0xbf0a737c, 0xbf85fff6, + 0xbef20080, 0xbefe00c1, + 0xbeff00c1, 0xb8f32a05, + 0x80738173, 0x8e738273, + 0x8e7a8873, 0xbefa00ff, + 0x01000000, 0xbef60072, + 0x8072ff72, 0x00000400, + 0xbefc0084, 0xbf11087c, + 0x8073ff73, 0x00008000, + 0xe0524000, 0x721e0000, + 0xe0524100, 0x721e0100, + 0xe0524200, 0x721e0200, + 0xe0524300, 0x721e0300, + 0xbf8c0f70, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0x807c847c, + 0x8072ff72, 0x00000400, + 0xbf0a737c, 0xbf85ffee, + 0xbf9c0000, 0xe0524000, + 0x761e0000, 0xe0524100, + 0x761e0100, 0xe0524200, + 0x761e0200, 0xe0524300, + 0x761e0300, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0x80f2c072, 0xb8f31605, + 0x80738173, 0x8e738473, + 0x8e7a8273, 0xbefa00ff, + 0x01000000, 0xbefc0073, + 0xc031003c, 0x00000072, + 0x80f2c072, 0xbf8c007f, + 0x80fc907c, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff1, 0xb8f22a05, + 0x80728172, 0x8e728a72, + 0xb8f61605, 0x80768176, + 0x8e768676, 0x80727672, + 0xbefa0084, 0xbefa00ff, + 0x01000000, 0xc0211cfc, + 0x00000072, 0x80728472, + 0xc0211c3c, 0x00000072, + 0x80728472, 0xc0211c7c, + 0x00000072, 0x80728472, + 0xc0211bbc, 0x00000072, + 0x80728472, 0xc0211bfc, + 0x00000072, 0x80728472, + 0xc0211d3c, 0x00000072, + 0x80728472, 0xc0211d7c, + 0x00000072, 0x80728472, + 0xc0211a3c, 0x00000072, + 0x80728472, 0xc0211a7c, + 0x00000072, 0x80728472, + 0xc0211dfc, 0x00000072, + 0x80728472, 0xc0211b3c, + 0x00000072, 0x80728472, + 0xc0211b7c, 0x00000072, + 0x80728472, 0xbf8c007f, + 0xbefc0073, 0xbefe006e, + 0xbeff006f, 0x867375ff, + 0x000003ff, 0xb9734803, + 0x867375ff, 0xfffff800, + 0x8f738b73, 0xb973a2c3, + 0xb977f801, 0x8673ff71, + 0xf0000000, 0x8f739c73, + 0x8e739073, 0xbef60080, + 0x87767376, 0x8673ff71, + 0x08000000, 0x8f739b73, + 0x8e738f73, 0x87767376, + 0x8673ff74, 0x00800000, + 0x8f739773, 0xb976f807, + 0x8671ff71, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb974f802, 0xbf8a0000, + 0x95807370, 0xbf810000, +}; + + +static const uint32_t cwsr_trap_gfx9_hex[] = { + 0xbf820001, 0xbf82015a, + 0xb8f8f802, 0x89788678, + 0xb8f1f803, 0x866eff71, + 0x00000400, 0xbf850034, + 0x866eff71, 0x00000800, + 0xbf850003, 0x866eff71, + 0x00000100, 0xbf840008, + 0x866eff78, 0x00002000, + 0xbf840001, 0xbf810000, + 0x8778ff78, 0x00002000, + 0x80ec886c, 0x82ed806d, + 0xb8eef807, 0x866fff6e, + 0x001f8000, 0x8e6f8b6f, + 0x8977ff77, 0xfc000000, + 0x87776f77, 0x896eff6e, + 0x001f8000, 0xb96ef807, + 0xb8f0f812, 0xb8f1f813, + 0x8ef08870, 0xc0071bb8, + 0x00000000, 0xbf8cc07f, + 0xc0071c38, 0x00000008, + 0xbf8cc07f, 0x86ee6e6e, + 0xbf840001, 0xbe801d6e, + 0xb8f1f803, 0x8671ff71, + 0x000001ff, 0xbf850002, + 0x806c846c, 0x826d806d, + 0x866dff6d, 0x0000ffff, + 0x8f6e8b77, 0x866eff6e, + 0x001f8000, 0xb96ef807, + 0x86fe7e7e, 0x86ea6a6a, + 0xb978f802, 0xbe801f6c, + 0x866dff6d, 0x0000ffff, + 0xbef00080, 0xb9700283, + 0xb8f02407, 0x8e709c70, + 0x876d706d, 0xb8f003c7, + 0x8e709b70, 0x876d706d, + 0xb8f0f807, 0x8670ff70, + 0x00007fff, 0xb970f807, + 0xbeee007e, 0xbeef007f, + 0xbefe0180, 0xbf900004, + 0x87708478, 0xb970f802, + 0xbf8e0002, 0xbf88fffe, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8f11605, + 0x80718171, 0x8e718671, + 0x80707170, 0x80707e70, + 0x8271807f, 0x8671ff71, + 0x0000ffff, 0xc0471cb8, + 0x00000040, 0xbf8cc07f, + 0xc04b1d38, 0x00000048, + 0xbf8cc07f, 0xc0431e78, + 0x00000058, 0xbf8cc07f, + 0xc0471eb8, 0x0000005c, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x8670ff7f, + 0x08000000, 0x8f708370, + 0x87777077, 0x8670ff7f, + 0x70000000, 0x8f708170, + 0x87777077, 0xbefb007c, + 0xbefa0080, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f01605, 0x80708170, + 0x8e708670, 0x807a707a, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc007a, 0xc0611efa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611b7a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611bfa, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8f1f803, + 0xbefe007c, 0xbefc007a, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xbefe007c, + 0xbefc007a, 0xc0611a3a, + 0x0000007c, 0xbf8cc07f, + 0x807a847a, 0xbefc007e, + 0xbefe007c, 0xbefc007a, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0xb8fbf801, + 0xbefe007c, 0xbefc007a, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x807a847a, + 0xbefc007e, 0x8670ff7f, + 0x04000000, 0xbeef0080, + 0x876f6f70, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8f11605, 0x80718171, + 0x8e718471, 0x8e768271, + 0xbef600ff, 0x01000000, + 0xbef20174, 0x80747a74, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a717c, 0xbf85ffe7, + 0xbef40172, 0xbefa0080, + 0xbefe00c1, 0xbeff00c1, + 0xbee80080, 0xbee90080, + 0xbef600ff, 0x01000000, + 0xe0724000, 0x7a1d0000, + 0xe0724100, 0x7a1d0100, + 0xe0724200, 0x7a1d0200, + 0xe0724300, 0x7a1d0300, + 0xbefe00c1, 0xbeff00c1, + 0xb8f14306, 0x8671c171, + 0xbf84002c, 0xbf8a0000, + 0x8670ff6f, 0x04000000, + 0xbf840028, 0x8e718671, + 0x8e718271, 0xbef60071, + 0xb8fa2a05, 0x807a817a, + 0x8e7a8a7a, 0xb8f01605, + 0x80708170, 0x8e708670, + 0x807a707a, 0x807aff7a, + 0x00000080, 0xbef600ff, + 0x01000000, 0xbefc0080, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x7a1d0002, + 0x68040702, 0xd0c9006a, + 0x0000e302, 0xbf87fff7, + 0xbef70000, 0xbefa00ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8f12a05, + 0x80718171, 0x8e718271, + 0x8e768871, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a717c, 0xbf840015, + 0xbf11017c, 0x8071ff71, + 0x00001000, 0x7e000300, + 0x7e020301, 0x7e040302, + 0x7e060303, 0xe0724000, + 0x7a1d0000, 0xe0724100, + 0x7a1d0100, 0xe0724200, + 0x7a1d0200, 0xe0724300, + 0x7a1d0300, 0x807c847c, + 0x807aff7a, 0x00000400, + 0xbf0a717c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200d9, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, + 0xb8f82a05, 0x80788178, + 0x8e788a78, 0xb8ee1605, + 0x806e816e, 0x8e6e866e, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, + 0xbef600ff, 0x01000000, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, + 0x00000078, 0x80788478, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, + 0x00000078, 0x80788478, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, + 0x00000078, 0x80788478, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, + 0x00000078, 0x80788478, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, + 0x00000078, 0x80788478, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe007a, + 0xbeff007b, 0x866f71ff, + 0x000003ff, 0xb96f4803, + 0x866f71ff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc0071cb7, 0x00000040, + 0xc00b1d37, 0x00000048, + 0xc0031e77, 0x00000058, + 0xc0071eb7, 0x0000005c, + 0xbf8cc07f, 0x866fff6d, + 0xf0000000, 0x8f6f9c6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x08000000, 0x8f6f9b6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff70, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0xb970f802, 0xbf8a0000, + 0x95806f6c, 0xbf810000, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index 997a383d..a2a04bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -20,9 +20,12 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if 0 -HW (VI) source code for CWSR trap handler -#Version 18 + multiple trap handler +/* To compile this assembly code: + * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex + */ + +/* HW (VI) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ // this performance-optimal version was originally from Seven Xu at SRDC @@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi /**************************************************************************/ var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 @@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi //tba_lo and tba_hi need to be saved/restored -var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 @@ -319,6 +323,10 @@ end s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC end + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + L_SLEEP: s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 @@ -1007,8 +1015,6 @@ end s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS - s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS - //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) @@ -1044,6 +1050,7 @@ end s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu @@ -1127,258 +1134,3 @@ end function get_hwreg_size_bytes return 128 //HWREG size 128 bytes end - - -#endif - -static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf820123, - 0xb8f4f802, 0x89748674, - 0xb8f5f803, 0x8675ff75, - 0x00000400, 0xbf850011, - 0xc00a1e37, 0x00000000, - 0xbf8c007f, 0x87777978, - 0xbf840002, 0xb974f802, - 0xbe801d78, 0xb8f5f803, - 0x8675ff75, 0x000001ff, - 0xbf850002, 0x80708470, - 0x82718071, 0x8671ff71, - 0x0000ffff, 0xb974f802, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, - 0xb97a0283, 0xbef60068, - 0xbef70069, 0xb8fa1c07, - 0x8e7a9c7a, 0x87717a71, - 0xb8fa03c7, 0x8e7a9b7a, - 0x87717a71, 0xb8faf807, - 0x867aff7a, 0x00007fff, - 0xb97af807, 0xbef2007e, - 0xbef3007f, 0xbefe0180, - 0xbf900004, 0xbf8e0002, - 0xbf88fffe, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x867aff7f, - 0x08000000, 0x8f7a837a, - 0x877b7a7b, 0x867aff7f, - 0x70000000, 0x8f7a817a, - 0x877b7a7b, 0xbeef007c, - 0xbeee0080, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611c7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cbc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611cfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611d3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xb8f5f803, - 0xbefe007c, 0xbefc006e, - 0xc0611d7c, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dbc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xbefe007c, 0xbefc006e, - 0xc0611dfc, 0x0000007c, - 0x806e846e, 0xbefc007e, - 0xb8eff801, 0xbefe007c, - 0xbefc006e, 0xc0611bfc, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b3c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0xbefe007c, - 0xbefc006e, 0xc0611b7c, - 0x0000007c, 0x806e846e, - 0xbefc007e, 0x867aff7f, - 0x04000000, 0xbef30080, - 0x8773737a, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8f51605, 0x80758175, - 0x8e758475, 0x8e7a8275, - 0xbefa00ff, 0x01000000, - 0xbef60178, 0x80786e78, - 0x82798079, 0xbefc0080, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003c, 0x00000000, - 0xc06b013c, 0x00000010, - 0xc06b023c, 0x00000020, - 0xc06b033c, 0x00000030, - 0x8078c078, 0x82798079, - 0x807c907c, 0xbf0a757c, - 0xbf85ffeb, 0xbef80176, - 0xbeee0080, 0xbefe00c1, - 0xbeff00c1, 0xbefa00ff, - 0x01000000, 0xe0724000, - 0x6e1e0000, 0xe0724100, - 0x6e1e0100, 0xe0724200, - 0x6e1e0200, 0xe0724300, - 0x6e1e0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f54306, - 0x8675c175, 0xbf84002c, - 0xbf8a0000, 0x867aff73, - 0x04000000, 0xbf840028, - 0x8e758675, 0x8e758275, - 0xbefa0075, 0xb8ee2a05, - 0x806e816e, 0x8e6e8a6e, - 0xb8fa1605, 0x807a817a, - 0x8e7a867a, 0x806e7a6e, - 0x806eff6e, 0x00000080, - 0xbefa00ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe80007b, - 0x867bff7b, 0xff7fffff, - 0x877bff7b, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8c007f, 0xe0765000, - 0x6e1e0002, 0x32040702, - 0xd0c9006a, 0x0000eb02, - 0xbf87fff7, 0xbefb0000, - 0xbeee00ff, 0x00000400, - 0xbefe00c1, 0xbeff00c1, - 0xb8f52a05, 0x80758175, - 0x8e758275, 0x8e7a8875, - 0xbefa00ff, 0x01000000, - 0xbefc0084, 0xbf0a757c, - 0xbf840015, 0xbf11017c, - 0x8075ff75, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x6e1e0000, - 0xe0724100, 0x6e1e0100, - 0xe0724200, 0x6e1e0200, - 0xe0724300, 0x6e1e0300, - 0x807c847c, 0x806eff6e, - 0x00000400, 0xbf0a757c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200ca, 0xbef8007e, - 0x8679ff7f, 0x0000ffff, - 0x8779ff79, 0x00040000, - 0xbefa0080, 0xbefb00ff, - 0x00807fac, 0x8676ff7f, - 0x08000000, 0x8f768376, - 0x877b767b, 0x8676ff7f, - 0x70000000, 0x8f768176, - 0x877b767b, 0x8676ff7f, - 0x04000000, 0xbf84001e, - 0xbefe00c1, 0xbeff00c1, - 0xb8f34306, 0x8673c173, - 0xbf840019, 0x8e738673, - 0x8e738273, 0xbefa0073, - 0xb8f22a05, 0x80728172, - 0x8e728a72, 0xb8f61605, - 0x80768176, 0x8e768676, - 0x80727672, 0x8072ff72, - 0x00000080, 0xbefa00ff, - 0x01000000, 0xbefc0080, - 0xe0510000, 0x721e0000, - 0xe0510100, 0x721e0000, - 0x807cff7c, 0x00000200, - 0x8072ff72, 0x00000200, - 0xbf0a737c, 0xbf85fff6, - 0xbef20080, 0xbefe00c1, - 0xbeff00c1, 0xb8f32a05, - 0x80738173, 0x8e738273, - 0x8e7a8873, 0xbefa00ff, - 0x01000000, 0xbef60072, - 0x8072ff72, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x8073ff73, 0x00008000, - 0xe0524000, 0x721e0000, - 0xe0524100, 0x721e0100, - 0xe0524200, 0x721e0200, - 0xe0524300, 0x721e0300, - 0xbf8c0f70, 0x7e000300, - 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8072ff72, 0x00000400, - 0xbf0a737c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x761e0000, 0xe0524100, - 0x761e0100, 0xe0524200, - 0x761e0200, 0xe0524300, - 0x761e0300, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0x80f2c072, 0xb8f31605, - 0x80738173, 0x8e738473, - 0x8e7a8273, 0xbefa00ff, - 0x01000000, 0xbefc0073, - 0xc031003c, 0x00000072, - 0x80f2c072, 0xbf8c007f, - 0x80fc907c, 0xbe802d00, - 0xbe822d02, 0xbe842d04, - 0xbe862d06, 0xbe882d08, - 0xbe8a2d0a, 0xbe8c2d0c, - 0xbe8e2d0e, 0xbf06807c, - 0xbf84fff1, 0xb8f22a05, - 0x80728172, 0x8e728a72, - 0xb8f61605, 0x80768176, - 0x8e768676, 0x80727672, - 0xbefa0084, 0xbefa00ff, - 0x01000000, 0xc0211cfc, - 0x00000072, 0x80728472, - 0xc0211c3c, 0x00000072, - 0x80728472, 0xc0211c7c, - 0x00000072, 0x80728472, - 0xc0211bbc, 0x00000072, - 0x80728472, 0xc0211bfc, - 0x00000072, 0x80728472, - 0xc0211d3c, 0x00000072, - 0x80728472, 0xc0211d7c, - 0x00000072, 0x80728472, - 0xc0211a3c, 0x00000072, - 0x80728472, 0xc0211a7c, - 0x00000072, 0x80728472, - 0xc0211dfc, 0x00000072, - 0x80728472, 0xc0211b3c, - 0x00000072, 0x80728472, - 0xc0211b7c, 0x00000072, - 0x80728472, 0xbf8c007f, - 0x8671ff71, 0x0000ffff, - 0xbefc0073, 0xbefe006e, - 0xbeff006f, 0x867375ff, - 0x000003ff, 0xb9734803, - 0x867375ff, 0xfffff800, - 0x8f738b73, 0xb973a2c3, - 0xb977f801, 0x8673ff71, - 0xf0000000, 0x8f739c73, - 0x8e739073, 0xbef60080, - 0x87767376, 0x8673ff71, - 0x08000000, 0x8f739b73, - 0x8e738f73, 0x87767376, - 0x8673ff74, 0x00800000, - 0x8f739773, 0xb976f807, - 0x86fe7e7e, 0x86ea6a6a, - 0xb974f802, 0xbf8a0000, - 0x95807370, 0xbf810000, -}; - diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm new file mode 100644 index 0000000..998be96 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -0,0 +1,1214 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* To compile this assembly code: + * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex + */ + +/* HW (GFX9) source code for CWSR trap handler */ +/* Version 18 + multiple trap handler */ + +// this performance-optimal version was originally from Seven Xu at SRDC + +// Revison #18 --... +/* Rev History +** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV) +** #4. SR Memory Layout: +** 1. VGPR-SGPR-HWREG-{LDS} +** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern.. +** #5. Update: 1. Accurate g8sr_ts_save_d timestamp +** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation) +** #7. Update: 1. don't barrier if noLDS +** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version +** 2. Fix SQ issue by s_sleep 2 +** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last +** 2. optimize s_buffer save by burst 16sgprs... +** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs. +** #11. Update 1. Add 2 more timestamp for debug version +** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance +** #13. Integ 1. Always use MUBUF for PV trap shader... +** #14. Update 1. s_buffer_store soft clause... +** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot. +** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree +** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part] +** 2. PERF - Save LDS before save VGPR to cover LDS save long latency... +** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32 +** 2. FUNC - Handle non-CWSR traps +*/ + +var G8SR_WDMEM_HWREG_OFFSET = 0 +var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes + +// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore. + +var G8SR_DEBUG_TIMESTAMP = 0 +var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset +var s_g8sr_ts_save_s = s[34:35] // save start +var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi +var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ +var s_g8sr_ts_save_d = s[40:41] // save end +var s_g8sr_ts_restore_s = s[42:43] // restore start +var s_g8sr_ts_restore_d = s[44:45] // restore end + +var G8SR_VGPR_SR_IN_DWX4 = 0 +var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes +var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 + + +/*************************************************************************/ +/* control on how to run the shader */ +/*************************************************************************/ +//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run) +var EMU_RUN_HACK = 0 +var EMU_RUN_HACK_RESTORE_NORMAL = 0 +var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0 +var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0 +var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK +var SAVE_LDS = 1 +var WG_BASE_ADDR_LO = 0x9000a000 +var WG_BASE_ADDR_HI = 0x0 +var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem +var CTX_SAVE_CONTROL = 0x0 +var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL +var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run) +var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write +var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes +var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing +var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency + +/**************************************************************************/ +/* variables */ +/**************************************************************************/ +var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23 +var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000 +var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1 +var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006 +var SQ_WAVE_STATUS_HALT_MASK = 0x2000 + +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12 +var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8 +var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24 +var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits + +var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400 +var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask +var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10 +var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100 +var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0 +var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11 +var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21 +var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800 + +var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME +var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000 +var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME + +var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24 +var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27 + +var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data +var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000 + +/* Save */ +var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes +var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE + +var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_SAVE_SPI_INIT_ATC_SHIFT = 27 +var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 +var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME + +var s_save_spi_init_lo = exec_lo +var s_save_spi_init_hi = exec_hi + +var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]} +var s_save_pc_hi = ttmp1 +var s_save_exec_lo = ttmp2 +var s_save_exec_hi = ttmp3 +var s_save_tmp = ttmp4 +var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_xnack_mask_lo = ttmp6 +var s_save_xnack_mask_hi = ttmp7 +var s_save_buf_rsrc0 = ttmp8 +var s_save_buf_rsrc1 = ttmp9 +var s_save_buf_rsrc2 = ttmp10 +var s_save_buf_rsrc3 = ttmp11 +var s_save_status = ttmp12 +var s_save_mem_offset = ttmp14 +var s_save_alloc_size = s_save_trapsts //conflict +var s_save_m0 = ttmp15 +var s_save_ttmps_lo = s_save_tmp //no conflict +var s_save_ttmps_hi = s_save_trapsts //no conflict + +/* Restore */ +var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE +var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC + +var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit +var S_RESTORE_SPI_INIT_ATC_SHIFT = 27 +var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype +var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28 +var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG +var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26 + +var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT +var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK +var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT +var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK + +var s_restore_spi_init_lo = exec_lo +var s_restore_spi_init_hi = exec_hi + +var s_restore_mem_offset = ttmp12 +var s_restore_alloc_size = ttmp3 +var s_restore_tmp = ttmp2 +var s_restore_mem_offset_save = s_restore_tmp //no conflict + +var s_restore_m0 = s_restore_alloc_size //no conflict + +var s_restore_mode = ttmp7 + +var s_restore_pc_lo = ttmp0 +var s_restore_pc_hi = ttmp1 +var s_restore_exec_lo = ttmp14 +var s_restore_exec_hi = ttmp15 +var s_restore_status = ttmp4 +var s_restore_trapsts = ttmp5 +var s_restore_xnack_mask_lo = xnack_mask_lo +var s_restore_xnack_mask_hi = xnack_mask_hi +var s_restore_buf_rsrc0 = ttmp8 +var s_restore_buf_rsrc1 = ttmp9 +var s_restore_buf_rsrc2 = ttmp10 +var s_restore_buf_rsrc3 = ttmp11 +var s_restore_ttmps_lo = s_restore_tmp //no conflict +var s_restore_ttmps_hi = s_restore_alloc_size //no conflict + +/**************************************************************************/ +/* trap handler entry points */ +/**************************************************************************/ +/* Shader Main*/ + +shader main + asic(GFX9) + type(CS) + + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore + //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC + s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f. + s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE + //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE + s_branch L_SKIP_RESTORE //NOT restore, SAVE actually + else + s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save + end + +L_JUMP_TO_RESTORE: + s_branch L_RESTORE //restore + +L_SKIP_RESTORE: + + s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC + s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save + s_cbranch_scc1 L_SAVE //this is the operation for save + + // ********* Handle non-CWSR traps ******************* +if (!EMU_RUN_HACK) + // Illegal instruction is a non-maskable exception which blocks context save. + // Halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK + s_cbranch_scc1 L_HALT_WAVE + + // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA. + // Instead, halt the wavefront and return from the trap. + s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK + s_cbranch_scc0 L_FETCH_2ND_TRAP + +L_HALT_WAVE: + // If STATUS.HALT is set then this fault must come from SQC instruction fetch. + // We cannot prevent further faults so just terminate the wavefront. + s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK + s_cbranch_scc0 L_NOT_ALREADY_HALTED + s_endpgm +L_NOT_ALREADY_HALTED: + s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK + + // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set. + // Rewind the PC to prevent this from occurring. The debugger compensates for this. + s_sub_u32 ttmp0, ttmp0, 0x8 + s_subb_u32 ttmp1, ttmp1, 0x0 + +L_FETCH_2ND_TRAP: + // Preserve and clear scalar XNACK state before issuing scalar reads. + // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26]. + s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS) + s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK + s_or_b32 ttmp11, ttmp11, ttmp3 + + s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Read second-level TBA/TMA from first-level TMA and jump if available. + // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) + // ttmp12 holds SQ_WAVE_STATUS + s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_waitcnt lgkmcnt(0) + s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_waitcnt lgkmcnt(0) + s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] + s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set + s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler + +L_NO_NEXT_TRAP: + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception + s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly. + s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0 + s_addc_u32 ttmp1, ttmp1, 0 +L_EXCP_CASE: + s_and_b32 ttmp1, ttmp1, 0xFFFF + + // Restore SQ_WAVE_IB_STS. + s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK + s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2 + + // Restore SQ_WAVE_STATUS. + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status + + s_rfe_b64 [ttmp0, ttmp1] +end + // ********* End handling of non-CWSR traps ******************* + +/**************************************************************************/ +/* save routine */ +/**************************************************************************/ + +L_SAVE: + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? +end + + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + + s_mov_b32 s_save_tmp, 0 //clear saveCtx bit + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit + + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY + s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp + s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS + s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG + + s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp + + /* inform SPI the readiness and wait for SPI's go signal */ + s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI + s_mov_b32 s_save_exec_hi, exec_hi + s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_sq_save_msg + s_waitcnt lgkmcnt(0) +end + + if (EMU_RUN_HACK) + + else + s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC + end + + // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for. + s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT) + s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp + + L_SLEEP: + s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0 + + if (EMU_RUN_HACK) + + else + s_cbranch_execz L_SLEEP + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_spi_wrexec + s_waitcnt lgkmcnt(0) +end + + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_save_tmp, v9, 0 + s_lshr_b32 s_save_tmp, s_save_tmp, 6 + s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE)) + s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL + else + end + + // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_save_ttmps_lo) + get_sgpr_size_bytes(s_save_ttmps_hi) + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi + s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo + s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 + s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF + s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 + ack_sqc_store_workaround() + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + ack_sqc_store_workaround() + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + ack_sqc_store_workaround() + s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + ack_sqc_store_workaround() + + /* setup Resource Contants */ + s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo + s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited + s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE + + //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?) + s_mov_b32 s_save_m0, m0 //save M0 + + /* global mem offset */ + s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0 + + + + + /* save HW registers */ + ////////////////////////////// + + L_SAVE_HWREG: + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + + + s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0 + + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME)) + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + end + + write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC + write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC + write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset) + write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS + + //s_save_trapsts conflicts with s_save_alloc_size + s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) + write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS + + write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO + write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI + + //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2 + s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE + write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) + + + + /* the first wave in the threadgroup */ + s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit + s_mov_b32 s_save_exec_hi, 0x0 + s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26] + + + /* save SGPRs */ + // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save... + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0 + //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0 + s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0 + s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset + s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0 + + s_mov_b32 m0, 0x0 //SGPR initial index value =0 + s_nop 0x0 //Manually inserted wait states + L_SAVE_SGPR_LOOP: + // SGPR is allocated in 16 SGPR granularity + s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0] + s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0] + s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0] + s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0] + s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0] + s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0] + s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0] + s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0] + + write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4 + s_add_u32 m0, m0, 16 //next sgpr index + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete? + // restore s_save_buf_rsrc0,1 + //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo + s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo + + + + + /* save first 4 VGPR, then LDS save could use */ + // each wave will alloc 4 vgprs at least... + ///////////////////////////////////////////////////////////////////////////////////// + + s_mov_b32 s_save_mem_offset, 0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + s_mov_b32 xnack_mask_lo, 0x0 + s_mov_b32 xnack_mask_hi, 0x0 + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 +end + + + + /* save LDS */ + ////////////////////////////// + + L_SAVE_LDS: + + // Change EXEC to all threads... + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE + + s_barrier //LDS is used? wait for other waves in the same TG + s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here + s_cbranch_scc0 L_SAVE_LDS_DONE + + // first wave do LDS save; + + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_save_mem_offset) + get_sgpr_size_bytes(s_save_tmp) + s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp + s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes() + + + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + +var LDS_DMA_ENABLE = 0 +var UNROLL = 0 +if UNROLL==0 && LDS_DMA_ENABLE==1 + s_mov_b32 s3, 256*2 + s_nop 0 + s_nop 0 + s_nop 0 + L_SAVE_LDS_LOOP: + //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.??? + if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + end + + s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes + s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete? + +elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss + // store from higest LDS address to lowest + s_mov_b32 s3, 256*2 + s_sub_u32 m0, s_save_alloc_size, s3 + s_add_u32 s_save_mem_offset, s_save_mem_offset, m0 + s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks... + s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest + s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction + s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc + s_nop 0 + s_nop 0 + s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes + s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved + s_add_u32 s0, s0,s_save_alloc_size + s_addc_u32 s1, s1, 0 + s_setpc_b64 s[0:1] + + + for var i =0; i< 128; i++ + // be careful to make here a 64Byte aligned address, which could improve performance... + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW + buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW + + if i!=127 + s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline + s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3 + end + end + +else // BUFFER_STORE + v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0 + v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid + v_mul_i32_i24 v2, v3, 8 // tid*8 + v_mov_b32 v3, 256*2 + s_mov_b32 m0, 0x10000 + s_mov_b32 s0, s_save_buf_rsrc3 + s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid + s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT + +L_SAVE_LDS_LOOP_VECTOR: + ds_read_b64 v[0:1], v2 //x =LDS[a], byte address + s_waitcnt lgkmcnt(0) + buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1 +// s_waitcnt vmcnt(0) +// v_add_u32 v2, vcc[0:1], v2, v3 + v_add_u32 v2, v2, v3 + v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size + s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR + + // restore rsrc3 + s_mov_b32 s_save_buf_rsrc3, s0 + +end + +L_SAVE_LDS_DONE: + + + /* save VGPRs - set the Rest VGPRs */ + ////////////////////////////////////////////////////////////////////////////////////// + L_SAVE_VGPR: + // VGPR SR memory offset: 0 + // TODO rearrange the RSRC words to use swizzle for VGPR save... + + s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_save_alloc_size, s_save_alloc_size, 1 + s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible + s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + + // VGPR Allocated in 4-GPR granularity + +if G8SR_VGPR_SR_IN_DWX4 + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, 4 // skip first 4 VGPRs + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs + + s_set_gpr_idx_on m0, 0x1 // This will change M0 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0 +L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 // v0 = v[0+m0] + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + + + buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + s_add_u32 m0, m0, 4 + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +L_SAVE_VGPR_LOOP_END: + + s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes +else + // VGPR store using dw burst + s_mov_b32 m0, 0x4 //VGPR initial index value =0 + s_cmp_lt_u32 m0, s_save_alloc_size + s_cbranch_scc0 L_SAVE_VGPR_END + + + s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1 + s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later + + L_SAVE_VGPR_LOOP: + v_mov_b32 v0, v0 //v0 = v[0+m0] + v_mov_b32 v1, v1 //v0 = v[0+m0] + v_mov_b32 v2, v2 //v0 = v[0+m0] + v_mov_b32 v3, v3 //v0 = v[0+m0] + + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 + buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256 + buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2 + buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3 + end + + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes + s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete? + s_set_gpr_idx_off +end + +L_SAVE_VGPR_END: + + + + + + + /* S_PGM_END_SAVED */ //FIXME graphics ONLY + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT)) + s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] + s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4 + s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over + s_rfe_b64 s_save_pc_lo //Return to the main shader program + else + end + +// Save Done timestamp +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_save_d + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_save_mem_offset) + s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // Need reset rsrc2?? + s_mov_b32 m0, s_save_mem_offset + s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1 +end + + + s_branch L_END_PGM + + + +/**************************************************************************/ +/* restore routine */ +/**************************************************************************/ + +L_RESTORE: + /* Setup Resource Contants */ + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + //calculate wd_addr using absolute thread id + v_readlane_b32 s_restore_tmp, v9, 0 + s_lshr_b32 s_restore_tmp, s_restore_tmp, 6 + s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE + s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO + s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI + s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL + else + end + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_s + s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? + // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case... + s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0] + s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored.. +end + + + + s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo + s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE + s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) + s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK + s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position + s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE + + /* global mem offset */ +// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0 + + /* the first wave in the threadgroup */ + s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK + s_cbranch_scc0 L_RESTORE_VGPR + + /* restore LDS */ + ////////////////////////////// + L_RESTORE_LDS: + + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size + s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero? + s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes + s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes + + // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG) + // + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow??? + + + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + s_mov_b32 m0, 0x0 //lds_offset initial value = 0 + + L_RESTORE_LDS_LOOP: + if (SAVE_LDS) + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW + end + s_add_u32 m0, m0, 256*2 // 128 DW + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW + s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete? + + + /* restore VGPRs */ + ////////////////////////////// + L_RESTORE_VGPR: + // VGPR SR memory offset : 0 + s_mov_b32 s_restore_mem_offset, 0x0 + s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead + s_mov_b32 exec_hi, 0xFFFFFFFF + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4) + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + +if G8SR_VGPR_SR_IN_DWX4 + get_vgpr_size_bytes(s_restore_mem_offset) + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + + // the const stride for DWx4 is 4*4 bytes + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes + + s_mov_b32 m0, s_restore_alloc_size + s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0 + +L_RESTORE_VGPR_LOOP: + buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + s_waitcnt vmcnt(0) + s_sub_u32 m0, m0, 4 + v_mov_b32 v0, v0 // v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_cmp_eq_u32 m0, 0x8000 + s_cbranch_scc0 L_RESTORE_VGPR_LOOP + s_set_gpr_idx_off + + s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0 + s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes + +else + // VGPR load using dw burst + s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 + s_mov_b32 m0, 4 //VGPR initial index value = 1 + s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8 + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later + + L_RESTORE_VGPR_LOOP: + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3 + end + s_waitcnt vmcnt(0) //ensure data ready + v_mov_b32 v0, v0 //v[0+m0] = v0 + v_mov_b32 v1, v1 + v_mov_b32 v2, v2 + v_mov_b32 v3, v3 + s_add_u32 m0, m0, 4 //next vgpr index + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes + s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete? + s_set_gpr_idx_off + /* VGPR restore on v0 */ + if(USE_MTBUF_INSTEAD_OF_MUBUF) + tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1 + else + buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 + buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256 + buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2 + buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3 + end + +end + + /* restore SGPRs */ + ////////////////////////////// + + // SGPR SR memory offset : size(VGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group + // TODO, change RSRC word to rearrange memory layout for SGPRS + + s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1 + s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value) + + if (SGPR_SAVE_USE_SQC) + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes + else + s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads) + end + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + s_mov_b32 m0, s_restore_alloc_size + + L_RESTORE_SGPR_LOOP: + read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made + s_waitcnt lgkmcnt(0) //ensure data ready + + s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0] + s_nop 0 // hazard SALU M0=> S_MOVREL + + s_movreld_b64 s0, s0 //s[0+m0] = s0 + s_movreld_b64 s2, s2 + s_movreld_b64 s4, s4 + s_movreld_b64 s6, s6 + s_movreld_b64 s8, s8 + s_movreld_b64 s10, s10 + s_movreld_b64 s12, s12 + s_movreld_b64 s14, s14 + + s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0 + s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete? + + /* restore HW registers */ + ////////////////////////////// + L_RESTORE_HWREG: + + +if G8SR_DEBUG_TIMESTAMP + s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo + s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi +end + + // HWREG SR memory offset : size(VGPR)+size(SGPR) + get_vgpr_size_bytes(s_restore_mem_offset) + get_sgpr_size_bytes(s_restore_tmp) + s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp + + + s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes + if (SWIZZLE_EN) + s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking? + else + s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes + end + + read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0 + read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC + read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC + read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset) + read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS + read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS + read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO + read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI + read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE + + s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS + + //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise: + if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore) + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL)) + s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal + s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over + end + + s_mov_b32 m0, s_restore_m0 + s_mov_b32 exec_lo, s_restore_exec_lo + s_mov_b32 exec_hi, s_restore_exec_hi + + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0 + s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT + s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0 + //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore + s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode + + // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 + get_vgpr_size_bytes(s_restore_ttmps_lo) + get_sgpr_size_bytes(s_restore_ttmps_hi) + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi + s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 + s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 + s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF + s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 + s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_waitcnt lgkmcnt(0) + + //reuse s_restore_m0 as a temp register + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT + s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT + s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT + s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0 + s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK + s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT + s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp + + s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS + s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32 + s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32 + s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu + + s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time + +if G8SR_DEBUG_TIMESTAMP + s_memrealtime s_g8sr_ts_restore_d + s_waitcnt lgkmcnt(0) +end + +// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution + s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc + + +/**************************************************************************/ +/* the END */ +/**************************************************************************/ +L_END_PGM: + s_endpgm + +end + + +/**************************************************************************/ +/* the helper functions */ +/**************************************************************************/ + +//Only for save hwreg to mem +function write_hwreg_to_mem(s, s_rsrc, s_mem_offset) + s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on + s_mov_b32 m0, s_mem_offset + s_buffer_store_dword s, s_rsrc, m0 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_mem_offset, s_mem_offset, 4 + s_mov_b32 m0, exec_lo +end + + +// HWREG are saved before SGPRs, so all HWREG could be use. +function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset) + + s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1 + ack_sqc_store_workaround() + s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1 + ack_sqc_store_workaround() + s_add_u32 s_rsrc[0], s_rsrc[0], 4*16 + s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc +end + + +function read_hwreg_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1 + s_add_u32 s_mem_offset, s_mem_offset, 4 +end + +function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset) + s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1 + s_sub_u32 s_mem_offset, s_mem_offset, 4*16 +end + + + +function get_lds_size_bytes(s_lds_size_byte) + // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW + s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size + s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW +end + +function get_vgpr_size_bytes(s_vgpr_size_byte) + s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size + s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1 + s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible +end + +function get_sgpr_size_bytes(s_sgpr_size_byte) + s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size + s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1 + s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value) +end + +function get_hwreg_size_bytes + return 128 //HWREG size 128 bytes +end + +function ack_sqc_store_workaround + if ACK_SQC_STORE + s_waitcnt lgkmcnt(0) + end +end diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cd679cf..f64c555 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, pr_debug("Queue Size: 0x%llX, %u\n", q_properties->queue_size, args->ring_size); - pr_debug("Queue r/w Pointers: %p, %p\n", + pr_debug("Queue r/w Pointers: %px, %px\n", q_properties->read_ptr, q_properties->write_ptr); @@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Return gpu_id as doorbell offset for mmap usage */ - args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id); + args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; + args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; + if (KFD_IS_SOC15(dev->device_info->asic_family)) + /* On SOC15 ASICs, doorbell allocation must be + * per-device, and independent from the per-process + * queue_id. Return the doorbell offset within the + * doorbell aperture to user mode. + */ + args->doorbell_offset |= q_properties.doorbell_off; mutex_unlock(&p->mutex); @@ -749,12 +757,13 @@ static int kfd_ioctl_get_clock_counters(struct file *filep, struct timespec64 time; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) - return -EINVAL; - - /* Reading GPU clock counter from KGD */ - args->gpu_clock_counter = - dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + if (dev) + /* Reading GPU clock counter from KGD */ + args->gpu_clock_counter = + dev->kfd2kgd->get_gpu_clock_counter(dev->kgd); + else + /* Node without GPU resource */ + args->gpu_clock_counter = 0; /* No access to rdtsc. Using raw monotonic time */ getrawmonotonic64(&time); @@ -1147,7 +1156,7 @@ err_unlock: return ret; } -bool kfd_dev_is_large_bar(struct kfd_dev *dev) +static bool kfd_dev_is_large_bar(struct kfd_dev *dev) { struct kfd_local_mem_info mem_info; @@ -1295,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1404,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, return -EINVAL; } - devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr), - GFP_KERNEL); + devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), + GFP_KERNEL); if (!devices_arr) return -ENOMEM; @@ -1421,7 +1430,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, pdd = kfd_get_process_device_data(dev, p); if (!pdd) { - err = PTR_ERR(pdd); + err = -EINVAL; goto bind_process_to_device_failed; } @@ -1644,23 +1653,33 @@ err_i1: static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; + struct kfd_dev *dev = NULL; + unsigned long vm_pgoff; + unsigned int gpu_id; process = kfd_get_process(current); if (IS_ERR(process)) return PTR_ERR(process); - if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) == - KFD_MMAP_DOORBELL_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK; - return kfd_doorbell_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) == - KFD_MMAP_EVENTS_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK; + vm_pgoff = vma->vm_pgoff; + vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); + gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); + if (gpu_id) + dev = kfd_device_by_id(gpu_id); + + switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { + case KFD_MMAP_TYPE_DOORBELL: + if (!dev) + return -ENODEV; + return kfd_doorbell_mmap(dev, process, vma); + + case KFD_MMAP_TYPE_EVENTS: return kfd_event_mmap(process, vma); - } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) == - KFD_MMAP_RESERVED_MEM_MASK) { - vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK; - return kfd_reserved_mem_mmap(process, vma); + + case KFD_MMAP_TYPE_RESERVED_MEM: + if (!dev) + return -ENODEV; + return kfd_reserved_mem_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 4f126ef..296b3f2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define fiji_cache_info carrizo_cache_info #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info +/* TODO - check & update Vega10 cache details */ +#define vega10_cache_info carrizo_cache_info +#define raven_cache_info carrizo_cache_info static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev, struct crat_subtype_computeunit *cu) @@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris11_cache_info; num_of_cache_types = ARRAY_SIZE(polaris11_cache_info); break; + case CHIP_VEGA10: + pcache_info = vega10_cache_info; + num_of_cache_types = ARRAY_SIZE(vega10_cache_info); + break; + case CHIP_RAVEN: + pcache_info = raven_cache_info; + num_of_cache_types = ARRAY_SIZE(raven_cache_info); + break; default: return -EINVAL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 334669996..7ee6cec 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -20,16 +20,13 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) -#include <linux/amd-iommu.h> -#endif #include <linux/bsearch.h> #include <linux/pci.h> #include <linux/slab.h> #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_pm4_headers_vi.h" -#include "cwsr_trap_handler_gfx8.asm" +#include "cwsr_trap_handler.h" #include "kfd_iommu.h" #define MQD_SIZE_ALIGNED 768 @@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = { .max_pasid_bits = 16, /* max num of queues for CZ.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = { .max_pasid_bits = 16, /* max num of queues for KV.TODO should be a dynamic value */ .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = { .asic_family = CHIP_TONGA, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .asic_family = CHIP_FIJI, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .asic_family = CHIP_POLARIS10, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = { .asic_family = CHIP_POLARIS11, .max_pasid_bits = 16, .max_no_of_hqd = 24, + .doorbell_size = 4, .ih_ring_entry_size = 4 * sizeof(uint32_t), .event_interrupt_class = &event_interrupt_class_cik, .num_of_watch_points = 4, @@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = { .needs_pci_atomics = true, }; +static const struct kfd_device_info vega10_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + +static const struct kfd_device_info vega10_vf_device_info = { + .asic_family = CHIP_VEGA10, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 8, + .ih_ring_entry_size = 8 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_v9, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = false, +}; + struct kfd_deviceid { unsigned short did; @@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67EB, &polaris11_device_info }, /* Polaris11 */ { 0x67EF, &polaris11_device_info }, /* Polaris11 */ { 0x67FF, &polaris11_device_info }, /* Polaris11 */ + { 0x6860, &vega10_device_info }, /* Vega10 */ + { 0x6861, &vega10_device_info }, /* Vega10 */ + { 0x6862, &vega10_device_info }, /* Vega10 */ + { 0x6863, &vega10_device_info }, /* Vega10 */ + { 0x6864, &vega10_device_info }, /* Vega10 */ + { 0x6867, &vega10_device_info }, /* Vega10 */ + { 0x6868, &vega10_device_info }, /* Vega10 */ + { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/ + { 0x687F, &vega10_device_info }, /* Vega10 */ }; static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, @@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev, const struct kfd2kgd_calls *f2g) { struct kfd_dev *kfd; - + int ret; const struct kfd_device_info *device_info = lookup_device_info(pdev->device); @@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, return NULL; } - if (device_info->needs_pci_atomics) { - /* Allow BIF to recode atomics to PCIe 3.0 - * AtomicOps. 32 and 64-bit requests are possible and - * must be supported. - */ - if (pci_enable_atomic_ops_to_root(pdev, - PCI_EXP_DEVCAP2_ATOMIC_COMP32 | - PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) { - dev_info(kfd_device, - "skipped device %x:%x, PCI rejects atomics", - pdev->vendor, pdev->device); - return NULL; - } + /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. + * 32 and 64-bit requests are possible and must be + * supported. + */ + ret = pci_enable_atomic_ops_to_root(pdev, + PCI_EXP_DEVCAP2_ATOMIC_COMP32 | + PCI_EXP_DEVCAP2_ATOMIC_COMP64); + if (device_info->needs_pci_atomics && ret < 0) { + dev_info(kfd_device, + "skipped device %x:%x, PCI rejects atomics\n", + pdev->vendor, pdev->device); + return NULL; } kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); @@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, static void kfd_cwsr_init(struct kfd_dev *kfd) { if (cwsr_enable && kfd->device_info->supports_cwsr) { - BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + if (kfd->device_info->asic_family < CHIP_VEGA10) { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx8_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); + } else { + BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); + kfd->cwsr_isa = cwsr_trap_gfx9_hex; + kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); + } - kfd->cwsr_isa = cwsr_trap_gfx8_hex; - kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); kfd->cwsr_enabled = true; } } @@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock(&kfd->interrupt_lock); } +int kgd2kfd_quiesce_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_evict_queues(p); + + kfd_unref_process(p); + return r; +} + +int kgd2kfd_resume_mm(struct mm_struct *mm) +{ + struct kfd_process *p; + int r; + + /* Because we are called from arbitrary context (workqueue) as opposed + * to process context, kfd_process could attempt to exit while we are + * running so the lookup function increments the process ref count. + */ + p = kfd_lookup_process_by_mm(mm); + if (!p) + return -ESRCH; + + r = kfd_process_restore_queues(p); + + kfd_unref_process(p); + return r; +} + /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will * prepare for safe eviction of KFD BOs that belong to the specified * process. @@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) return -ENOMEM; - *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); if ((*mem_obj) == NULL) return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c18e048..668ad07 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) +{ + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) { + /* On pre-SOC15 chips we need to use the queue ID to + * preserve the user mode ABI. + */ + q->doorbell_id = q->properties.queue_id; + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + /* For SDMA queues on SOC15, use static doorbell + * assignments based on the engine and queue. + */ + q->doorbell_id = dev->shared_resources.sdma_doorbell + [q->properties.sdma_engine_id] + [q->properties.sdma_queue_id]; + } else { + /* For CP queues on SOC15 reserve a free doorbell ID */ + unsigned int found; + + found = find_first_zero_bit(qpd->doorbell_bitmap, + KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); + if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { + pr_debug("No doorbells available"); + return -EBUSY; + } + set_bit(found, qpd->doorbell_bitmap); + q->doorbell_id = found; + } + + q->properties.doorbell_off = + kfd_doorbell_id_to_offset(dev, q->process, + q->doorbell_id); + + return 0; +} + +static void deallocate_doorbell(struct qcm_process_device *qpd, + struct queue *q) +{ + unsigned int old; + struct kfd_dev *dev = qpd->dqm->dev; + + if (!KFD_IS_SOC15(dev->device_info->asic_family) || + q->properties.type == KFD_QUEUE_TYPE_SDMA) + return; + + old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); + WARN_ON(!old); +} + static int allocate_vmid(struct device_queue_manager *dqm, struct qcm_process_device *qpd, struct queue *q) @@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm, static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, struct qcm_process_device *qpd) { - uint32_t len; + const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; + int ret; if (!qpd->ib_kaddr) return -ENOMEM; - len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); + if (ret) + return ret; return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, - qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len); + qpd->ib_base, (uint32_t *)qpd->ib_kaddr, + pmf->release_mem_size / sizeof(uint32_t)); } static void deallocate_vmid(struct device_queue_manager *dqm, @@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, if (retval) return retval; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_hqd; + retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_hqd; + goto out_deallocate_doorbell; pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", q->pipe, q->queue); @@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_hqd: deallocate_hqd(dqm, q); @@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } dqm->total_queue_count--; + deallocate_doorbell(qpd, q); + retval = mqd->destroy_mqd(mqd, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_RESET, KFD_UNMAP_LATENCY_MS, @@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + pr_debug("SDMA id is: %d\n", q->sdma_id); pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out_deallocate_sdma_queue; + goto out_deallocate_doorbell; retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL); if (retval) @@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, out_uninit_mqd: mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: deallocate_sdma_queue(dqm, q->sdma_id); @@ -1058,24 +1127,29 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out; + goto out_unlock; } if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { retval = allocate_sdma_queue(dqm, &q->sdma_id); if (retval) - goto out; + goto out_unlock; q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE; q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; } + + retval = allocate_doorbell(qpd, q); + if (retval) + goto out_deallocate_sdma_queue; + mqd = dqm->ops.get_mqd_manager(dqm, get_mqd_type_from_queue_type(q->properties.type)); if (!mqd) { retval = -ENOMEM; - goto out; + goto out_deallocate_doorbell; } /* * Eviction state logic: we only mark active queues as evicted @@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); if (retval) - goto out; + goto out_deallocate_doorbell; list_add(&q->list, &qpd->queues_list); qpd->queue_count++; @@ -1114,7 +1188,15 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, pr_debug("Total of %d queues are accountable so far\n", dqm->total_queue_count); -out: + mutex_unlock(&dqm->lock); + return retval; + +out_deallocate_doorbell: + deallocate_doorbell(qpd, q); +out_deallocate_sdma_queue: + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + deallocate_sdma_queue(dqm, q->sdma_id); +out_unlock: mutex_unlock(&dqm->lock); return retval; } @@ -1251,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, goto failed; } + deallocate_doorbell(qpd, q); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; deallocate_sdma_queue(dqm, q->sdma_id); @@ -1302,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - bool retval; + bool retval = true; + + if (!dqm->asic_ops.set_cache_memory_policy) + return retval; mutex_lock(&dqm->lock); @@ -1433,8 +1520,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, /* Clear all user mode queues */ list_for_each_entry(q, &qpd->queues_list, list) { - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; + deallocate_sdma_queue(dqm, q->sdma_id); + } if (q->properties.is_active) dqm->queue_count--; @@ -1569,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS11: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + device_queue_manager_init_v9(&dqm->asic_ops); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); @@ -1619,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) int pipe, queue; int r = 0; + r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, + KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); + if (!r) { + seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", + KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, + KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), + KFD_CIK_HIQ_QUEUE); + seq_reg_dump(m, dump, n_regs); + + kfree(dump); + } + for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 412beff..59a6b19 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -200,6 +200,8 @@ void device_queue_manager_init_vi( struct device_queue_manager_asic_ops *asic_ops); void device_queue_manager_init_vi_tonga( struct device_queue_manager_asic_ops *asic_ops); +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd); unsigned int get_queues_num(struct device_queue_manager *dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c new file mode 100644 index 0000000..79e5bcf --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -0,0 +1,84 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_device_queue_manager.h" +#include "vega10_enum.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd); +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd); + +void device_queue_manager_init_v9( + struct device_queue_manager_asic_ops *asic_ops) +{ + asic_ops->update_qpd = update_qpd_v9; + asic_ops->init_sdma_vm = init_sdma_vm_v9; +} + +static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) +{ + uint32_t shared_base = pdd->lds_base >> 48; + uint32_t private_base = pdd->scratch_base >> 48; + + return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) | + private_base; +} + +static int update_qpd_v9(struct device_queue_manager *dqm, + struct qcm_process_device *qpd) +{ + struct kfd_process_device *pdd; + + pdd = qpd_to_pdd(qpd); + + /* check if sh_mem_config register already configured */ + if (qpd->sh_mem_config == 0) { + qpd->sh_mem_config = + SH_MEM_ALIGNMENT_MODE_UNALIGNED << + SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT; + if (vega10_noretry && + !dqm->dev->device_info->needs_iommu_device) + qpd->sh_mem_config |= + 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT; + + qpd->sh_mem_ape1_limit = 0; + qpd->sh_mem_ape1_base = 0; + } + + qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd); + + pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases); + + return 0; +} + +static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q, + struct qcm_process_device *qpd) +{ + /* Not needed on SDMAv4 any more */ + q->properties.sdma_vm_addr = 0; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebb4da14..c3744d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -33,7 +33,6 @@ static DEFINE_IDA(doorbell_ida); static unsigned int max_doorbell_slices; -#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4 /* * Each device exposes a doorbell aperture, a PCI MMIO aperture that @@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices; */ /* # of doorbell bytes allocated for each process. */ -static inline size_t doorbell_process_allocation(void) +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) { - return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES * + return roundup(kfd->device_info->doorbell_size * KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, PAGE_SIZE); } @@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd) doorbell_start_offset = roundup(kfd->shared_resources.doorbell_start_offset, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); doorbell_aperture_size = rounddown(kfd->shared_resources.doorbell_aperture_size, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (doorbell_aperture_size > doorbell_start_offset) doorbell_process_limit = (doorbell_aperture_size - doorbell_start_offset) / - doorbell_process_allocation(); + kfd_doorbell_process_slice(kfd); else return -ENOSPC; @@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, - doorbell_process_allocation()); + kfd_doorbell_process_slice(kfd)); if (!kfd->doorbell_kernel_ptr) return -ENOMEM; @@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd) iounmap(kfd->doorbell_kernel_ptr); } -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) { phys_addr_t address; - struct kfd_dev *dev; /* * For simplicitly we only allow mapping of the entire doorbell * allocation of a single device & process. */ - if (vma->vm_end - vma->vm_start != doorbell_process_allocation()) - return -EINVAL; - - /* Find kfd device according to gpu id */ - dev = kfd_device_by_id(vma->vm_pgoff); - if (!dev) + if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) return -EINVAL; /* Calculate physical address of doorbell */ @@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma) " vm_flags == 0x%04lX\n" " size == 0x%04lX\n", (unsigned long long) vma->vm_start, address, vma->vm_flags, - doorbell_process_allocation()); + kfd_doorbell_process_slice(dev)); return io_remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, - doorbell_process_allocation(), + kfd_doorbell_process_slice(dev), vma->vm_page_prot); } /* get kernel iomem pointer for a doorbell */ -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off) { u32 inx; @@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) return NULL; + inx *= kfd->device_info->doorbell_size / sizeof(u32); + /* * Calculating the kernel doorbell offset using the first * doorbell page. @@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) mutex_unlock(&kfd->doorbell_mutex); } -inline void write_kernel_doorbell(u32 __iomem *db, u32 value) +void write_kernel_doorbell(void __iomem *db, u32 value) { if (db) { writel(value, db); @@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value) } } -/* - * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1 - * to doorbells with the process's doorbell page - */ -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell64(void __iomem *db, u64 value) +{ + if (db) { + WARN(((unsigned long)db & 7) != 0, + "Unaligned 64-bit doorbell"); + writeq(value, (u64 __iomem *)db); + pr_debug("writing %llu to doorbell address %p\n", value, db); + } +} + +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id) + unsigned int doorbell_id) { /* * doorbell_id_offset accounts for doorbells taken by KGD. - * index * doorbell_process_allocation/sizeof(u32) adjusts to - * the process's doorbells. + * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to + * the process's doorbells. The offset returned is in dword + * units regardless of the ASIC-dependent doorbell size. */ return kfd->doorbell_id_offset + process->doorbell_index - * doorbell_process_allocation() / sizeof(u32) + - queue_id; + * kfd_doorbell_process_slice(kfd) / sizeof(u32) + + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); } uint64_t kfd_get_number_elems(struct kfd_dev *kfd) { uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - kfd->shared_resources.doorbell_start_offset) / - doorbell_process_allocation() + 1; + kfd_doorbell_process_slice(kfd) + 1; return num_of_elems; @@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process) { return dev->doorbell_base + - process->doorbell_index * doorbell_process_allocation(); + process->doorbell_index * kfd_doorbell_process_slice(dev); } int kfd_alloc_process_doorbells(struct kfd_process *process) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 4890a90..5562e94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p, case KFD_EVENT_TYPE_DEBUG: ret = create_signal_event(devkfd, p, ev); if (!ret) { - *event_page_offset = KFD_MMAP_EVENTS_MASK; + *event_page_offset = KFD_MMAP_TYPE_EVENTS; *event_page_offset <<= PAGE_SHIFT; *event_slot_index = ev->event_id; } @@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n", partial_id, valid_id_bits); - if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) { + if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) { /* With relatively few events, it's faster to * iterate over the event IDR */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 66852de..97d5423 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -275,23 +275,35 @@ * for FLAT_* / S_LOAD operations. */ -#define MAKE_GPUVM_APP_BASE(gpu_num) \ +#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \ (((uint64_t)(gpu_num) << 61) + 0x1000000000000L) #define MAKE_GPUVM_APP_LIMIT(base, size) \ (((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1) -#define MAKE_SCRATCH_APP_BASE() \ +#define MAKE_SCRATCH_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x100000000L) #define MAKE_SCRATCH_APP_LIMIT(base) \ (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) -#define MAKE_LDS_APP_BASE() \ +#define MAKE_LDS_APP_BASE_VI() \ (((uint64_t)(0x1UL) << 61) + 0x0) #define MAKE_LDS_APP_LIMIT(base) \ (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF) +/* On GFXv9 the LDS and scratch apertures are programmed independently + * using the high 16 bits of the 64-bit virtual address. They must be + * in the hole, which will be the case as long as the high 16 bits are + * not 0. + * + * The aperture sizes are still 4GB implicitly. + * + * A GPUVM aperture is not applicable on GFXv9. + */ +#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48) +#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48) + /* User mode manages most of the SVM aperture address space. The low * 16MB are reserved for kernel use (CWSR trap handler and kernel IB * for now). @@ -300,6 +312,55 @@ #define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE) #define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE) +static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id) +{ + /* + * node id couldn't be 0 - the three MSB bits of + * aperture shoudn't be 0 + */ + pdd->lds_base = MAKE_LDS_APP_BASE_VI(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + if (!pdd->dev->device_info->needs_iommu_device) { + /* dGPUs: SVM aperture starting at 0 + * with small reserved space for kernel. + * Set them to CANONICAL addresses. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + } else { + /* set them to non CANONICAL addresses, and no SVM is + * allocated. + */ + pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1); + pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base, + pdd->dev->shared_resources.gpuvm_size); + } + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + +static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id) +{ + pdd->lds_base = MAKE_LDS_APP_BASE_V9(); + pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); + + /* Raven needs SVM to support graphic handle, etc. Leave the small + * reserved space before SVM on Raven as well, even though we don't + * have to. + * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they + * are used in Thunk to reserve SVM. + */ + pdd->gpuvm_base = SVM_USER_BASE; + pdd->gpuvm_limit = + pdd->dev->shared_resources.gpuvm_size - 1; + + pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9(); + pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); +} + int kfd_init_apertures(struct kfd_process *process) { uint8_t id = 0; @@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process) struct kfd_process_device *pdd; /*Iterating over all devices*/ - while (kfd_topology_enum_kfd_devices(id, &dev) == 0 && - id < NUM_OF_SUPPORTED_GPUS) { - + while (kfd_topology_enum_kfd_devices(id, &dev) == 0) { if (!dev) { id++; /* Skip non GPU devices */ continue; @@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process) pdd = kfd_create_process_device_data(dev, process); if (!pdd) { pr_err("Failed to create process device data\n"); - return -1; + return -ENOMEM; } /* * For 64 bit process apertures will be statically reserved in @@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process) pdd->gpuvm_base = pdd->gpuvm_limit = 0; pdd->scratch_base = pdd->scratch_limit = 0; } else { - /* Same LDS and scratch apertures can be used - * on all GPUs. This allows using more dGPUs - * than placement options for apertures. - */ - pdd->lds_base = MAKE_LDS_APP_BASE(); - pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base); - - pdd->scratch_base = MAKE_SCRATCH_APP_BASE(); - pdd->scratch_limit = - MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base); + switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + kfd_init_apertures_vi(pdd, id); + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + kfd_init_apertures_v9(pdd, id); + break; + default: + WARN(1, "Unexpected ASIC family %u", + dev->device_info->asic_family); + return -EINVAL; + } - if (dev->device_info->needs_iommu_device) { - /* APUs: GPUVM aperture in - * non-canonical address space - */ - pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1); - pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT( - pdd->gpuvm_base, - dev->shared_resources.gpuvm_size); - } else { - /* dGPUs: SVM aperture starting at 0 - * with small reserved space for kernel + if (!dev->device_info->needs_iommu_device) { + /* dGPUs: the reserved space for kernel + * before SVM */ - pdd->gpuvm_base = SVM_USER_BASE; - pdd->gpuvm_limit = - dev->shared_resources.gpuvm_size - 1; pdd->qpd.cwsr_base = SVM_CWSR_BASE; pdd->qpd.ib_base = SVM_IB_BASE; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c new file mode 100644 index 0000000..37029ba --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -0,0 +1,92 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "kfd_priv.h" +#include "kfd_events.h" +#include "soc15_int.h" + + +static bool event_interrupt_isr_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + const uint32_t *data = ih_ring_entry; + + /* Only handle interrupts from KFD VMIDs */ + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + if (vmid < dev->vm_info.first_vmid_kfd || + vmid > dev->vm_info.last_vmid_kfd) + return 0; + + /* If there is no valid PASID, it's likely a firmware bug */ + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt")) + return 0; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + + pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n", + client_id, source_id, pasid); + pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n", + data[0], data[1], data[2], data[3], + data[4], data[5], data[6], data[7]); + + /* Interrupt types we care about: various signals and faults. + * They will be forwarded to a work queue (see below). + */ + return source_id == SOC15_INTSRC_CP_END_OF_PIPE || + source_id == SOC15_INTSRC_SDMA_TRAP || + source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG || + source_id == SOC15_INTSRC_CP_BAD_OPCODE; +} + +static void event_interrupt_wq_v9(struct kfd_dev *dev, + const uint32_t *ih_ring_entry) +{ + uint16_t source_id, client_id, pasid, vmid; + uint32_t context_id; + + source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry); + client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); + pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); + context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); + + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_UTCL2) { + /* TODO */ + } +} + +const struct kfd_event_interrupt_class event_interrupt_class_v9 = { + .interrupt_isr = event_interrupt_isr_v9, + .interrupt_wq = event_interrupt_wq_v9, +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index 035c351..db6d933 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c @@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work) { struct kfd_dev *dev = container_of(work, struct kfd_dev, interrupt_work); + uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE]; - uint32_t ih_ring_entry[DIV_ROUND_UP( - dev->device_info->ih_ring_entry_size, - sizeof(uint32_t))]; + if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) { + dev_err_once(kfd_chardev(), "Ring entry too small\n"); + return; + } while (dequeue_ih_ring_entry(dev, ih_ring_entry)) dev->device_info->event_interrupt_class->interrupt_wq(dev, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 69f4964..476951d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->rptr_kernel = kq->rptr_mem->cpu_ptr; kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr; - retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel), + retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size, &kq->wptr_mem); if (retval != 0) @@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, size_t available_size; size_t queue_size_dwords; uint32_t wptr, rptr; + uint64_t wptr64; unsigned int *queue_address; /* When rptr == wptr, the buffer is empty. @@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * the opposite. So we can only use up to queue_size_dwords - 1 dwords. */ rptr = *kq->rptr_kernel; - wptr = *kq->wptr_kernel; + wptr = kq->pending_wptr; + wptr64 = kq->pending_wptr64; queue_address = (unsigned int *)kq->pq_kernel_addr; queue_size_dwords = kq->queue->properties.queue_size / 4; @@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq, * make sure calling functions know * acquire_packet_buffer() failed */ - *buffer_ptr = NULL; - return -ENOMEM; + goto err_no_space; } if (wptr + packet_size_in_dwords >= queue_size_dwords) { /* make sure after rolling back to position 0, there is * still enough space. */ - if (packet_size_in_dwords >= rptr) { - *buffer_ptr = NULL; - return -ENOMEM; - } + if (packet_size_in_dwords >= rptr) + goto err_no_space; + /* fill nops, roll back and start at position 0 */ while (wptr > 0) { queue_address[wptr] = kq->nop_packet; wptr = (wptr + 1) % queue_size_dwords; + wptr64++; } } *buffer_ptr = &queue_address[wptr]; kq->pending_wptr = wptr + packet_size_in_dwords; + kq->pending_wptr64 = wptr64 + packet_size_in_dwords; return 0; + +err_no_space: + *buffer_ptr = NULL; + return -ENOMEM; } static void submit_packet(struct kernel_queue *kq) @@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq) pr_debug("\n"); #endif - *kq->wptr_kernel = kq->pending_wptr; - write_kernel_doorbell(kq->queue->properties.doorbell_ptr, - kq->pending_wptr); + kq->ops_asic_specific.submit_packet(kq); } static void rollback_packet(struct kernel_queue *kq) { - kq->pending_wptr = *kq->queue->properties.write_ptr; + if (kq->dev->device_info->doorbell_size == 8) { + kq->pending_wptr64 = *kq->wptr64_kernel; + kq->pending_wptr = *kq->wptr_kernel % + (kq->queue->properties.queue_size / 4); + } else { + kq->pending_wptr = *kq->wptr_kernel; + } } struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_HAWAII: kernel_queue_init_cik(&kq->ops_asic_specific); break; + + case CHIP_VEGA10: + case CHIP_RAVEN: + kernel_queue_init_v9(&kq->ops_asic_specific); + break; default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index 5940531..97aff20 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -72,6 +72,7 @@ struct kernel_queue { struct kfd_dev *dev; struct mqd_manager *mqd; struct queue *queue; + uint64_t pending_wptr64; uint32_t pending_wptr; unsigned int nop_packet; @@ -79,7 +80,10 @@ struct kernel_queue { uint32_t *rptr_kernel; uint64_t rptr_gpu_addr; struct kfd_mem_obj *wptr_mem; - uint32_t *wptr_kernel; + union { + uint64_t *wptr64_kernel; + uint32_t *wptr_kernel; + }; uint64_t wptr_gpu_addr; struct kfd_mem_obj *pq; uint64_t pq_gpu_addr; @@ -97,5 +101,6 @@ struct kernel_queue { void kernel_queue_init_cik(struct kernel_queue_ops *ops); void kernel_queue_init_vi(struct kernel_queue_ops *ops); +void kernel_queue_init_v9(struct kernel_queue_ops *ops); #endif /* KFD_KERNEL_QUEUE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c index a90eb44..19e54ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c @@ -26,11 +26,13 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_cik(struct kernel_queue *kq); +static void submit_packet_cik(struct kernel_queue *kq); void kernel_queue_init_cik(struct kernel_queue_ops *ops) { ops->initialize = initialize_cik; ops->uninitialize = uninitialize_cik; + ops->submit_packet = submit_packet_cik; } static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, @@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, static void uninitialize_cik(struct kernel_queue *kq) { } + +static void submit_packet_cik(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c new file mode 100644 index 0000000..684a3bf --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -0,0 +1,340 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_ai.h" +#include "kfd_pm4_opcodes.h" + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size); +static void uninitialize_v9(struct kernel_queue *kq); +static void submit_packet_v9(struct kernel_queue *kq); + +void kernel_queue_init_v9(struct kernel_queue_ops *ops) +{ + ops->initialize = initialize_v9; + ops->uninitialize = uninitialize_v9; + ops->submit_packet = submit_packet_v9; +} + +static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev, + enum kfd_queue_type type, unsigned int queue_size) +{ + int retval; + + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem); + if (retval) + return false; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + + return true; +} + +static void uninitialize_v9(struct kernel_queue *kq) +{ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); +} + +static void submit_packet_v9(struct kernel_queue *kq) +{ + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); +} + +static int pm_map_process_v9(struct packet_manager *pm, + uint32_t *buffer, struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + uint64_t vm_page_table_base_addr = + (uint64_t)(qpd->page_table_base) << 12; + + packet = (struct pm4_mes_map_process *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields14.gds_size = qpd->gds_size; + packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_oac = qpd->num_oac; + packet->bitfields14.sdma_enable = 1; + packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8); + packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8); + packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8); + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + packet->vm_context_page_table_base_addr_lo32 = + lower_32_bits(vm_page_table_base_addr); + packet->vm_context_page_table_base_addr_hi32 = + upper_32_bits(vm_page_table_base_addr); + + return 0; +} + +static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->ib_base_hi = upper_32_bits(ib); + + return 0; +} + +static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + + +static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(struct pm4_mec_release_mem)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(struct pm4_mec_release_mem)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru; + + packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel__mec_release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_v9_pm_funcs = { + .map_process = pm_map_process_v9, + .runlist = pm_runlist_v9, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_v9, + .unmap_queues = pm_unmap_queues_v9, + .query_status = pm_query_status_v9, + .release_mem = pm_release_mem_v9, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index f1d4828..bf20c6d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -22,15 +22,20 @@ */ #include "kfd_kernel_queue.h" +#include "kfd_device_queue_manager.h" +#include "kfd_pm4_headers_vi.h" +#include "kfd_pm4_opcodes.h" static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size); static void uninitialize_vi(struct kernel_queue *kq); +static void submit_packet_vi(struct kernel_queue *kq); void kernel_queue_init_vi(struct kernel_queue_ops *ops) { ops->initialize = initialize_vi; ops->uninitialize = uninitialize_vi; + ops->submit_packet = submit_packet_vi; } static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev, @@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq) { kfd_gtt_sa_free(kq->dev, kq->eop_mem); } + +static void submit_packet_vi(struct kernel_queue *kq) +{ + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); +} + +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size) +{ + union PM4_MES_TYPE_3_HEADER header; + + header.u32All = 0; + header.opcode = opcode; + header.count = packet_size / 4 - 2; + header.type = PM4_TYPE_3; + + return header.u32All; +} + +static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd) +{ + struct pm4_mes_map_process *packet; + + packet = (struct pm4_mes_map_process *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_map_process)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS, + sizeof(struct pm4_mes_map_process)); + packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; + packet->bitfields2.process_quantum = 1; + packet->bitfields2.pasid = qpd->pqm->process->pasid; + packet->bitfields3.page_table_base = qpd->page_table_base; + packet->bitfields10.gds_size = qpd->gds_size; + packet->bitfields10.num_gws = qpd->num_gws; + packet->bitfields10.num_oac = qpd->num_oac; + packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; + + packet->sh_mem_config = qpd->sh_mem_config; + packet->sh_mem_bases = qpd->sh_mem_bases; + packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; + packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; + + packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base; + + packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); + packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); + + return 0; +} + +static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain) +{ + struct pm4_mes_runlist *packet; + int concurrent_proc_cnt = 0; + struct kfd_dev *kfd = pm->dqm->dev; + + if (WARN_ON(!ib)) + return -EFAULT; + + /* Determine the number of processes to map together to HW: + * it can not exceed the number of VMIDs available to the + * scheduler, and it is determined by the smaller of the number + * of processes in the runlist and kfd module parameter + * hws_max_conc_proc. + * Note: the arbitration between the number of VMIDs and + * hws_max_conc_proc has been done in + * kgd2kfd_device_init(). + */ + concurrent_proc_cnt = min(pm->dqm->processes_count, + kfd->max_proc_per_quantum); + + packet = (struct pm4_mes_runlist *)buffer; + + memset(buffer, 0, sizeof(struct pm4_mes_runlist)); + packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST, + sizeof(struct pm4_mes_runlist)); + + packet->bitfields4.ib_size = ib_size_in_dwords; + packet->bitfields4.chain = chain ? 1 : 0; + packet->bitfields4.offload_polling = 0; + packet->bitfields4.valid = 1; + packet->bitfields4.process_cnt = concurrent_proc_cnt; + packet->ordinal2 = lower_32_bits(ib); + packet->bitfields3.ib_base_hi = upper_32_bits(ib); + + return 0; +} + +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res) +{ + struct pm4_mes_set_resources *packet; + + packet = (struct pm4_mes_set_resources *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_set_resources)); + + packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES, + sizeof(struct pm4_mes_set_resources)); + + packet->bitfields2.queue_type = + queue_type__mes_set_resources__hsa_interface_queue_hiq; + packet->bitfields2.vmid_mask = res->vmid_mask; + packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; + packet->bitfields7.oac_mask = res->oac_mask; + packet->bitfields8.gds_heap_base = res->gds_heap_base; + packet->bitfields8.gds_heap_size = res->gds_heap_size; + + packet->gws_mask_lo = lower_32_bits(res->gws_mask); + packet->gws_mask_hi = upper_32_bits(res->gws_mask); + + packet->queue_mask_lo = lower_32_bits(res->queue_mask); + packet->queue_mask_hi = upper_32_bits(res->queue_mask); + + return 0; +} + +static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static) +{ + struct pm4_mes_map_queues *packet; + bool use_static = is_static; + + packet = (struct pm4_mes_map_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, + sizeof(struct pm4_mes_map_queues)); + packet->bitfields2.alloc_format = + alloc_format__mes_map_queues__one_per_pipe_vi; + packet->bitfields2.num_queues = 1; + packet->bitfields2.queue_sel = + queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; + + packet->bitfields2.engine_sel = + engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_compute_vi; + + switch (q->properties.type) { + case KFD_QUEUE_TYPE_COMPUTE: + if (use_static) + packet->bitfields2.queue_type = + queue_type__mes_map_queues__normal_latency_static_queue_vi; + break; + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.queue_type = + queue_type__mes_map_queues__debug_interface_queue_vi; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = q->properties.sdma_engine_id + + engine_sel__mes_map_queues__sdma0_vi; + use_static = false; /* no static queues under SDMA */ + break; + default: + WARN(1, "queue type %d", q->properties.type); + return -EINVAL; + } + packet->bitfields3.doorbell_offset = + q->properties.doorbell_off; + + packet->mqd_addr_lo = + lower_32_bits(q->gart_mqd_addr); + + packet->mqd_addr_hi = + upper_32_bits(q->gart_mqd_addr); + + packet->wptr_addr_lo = + lower_32_bits((uint64_t)q->properties.write_ptr); + + packet->wptr_addr_hi = + upper_32_bits((uint64_t)q->properties.write_ptr); + + return 0; +} + +static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter filter, + uint32_t filter_param, bool reset, + unsigned int sdma_engine) +{ + struct pm4_mes_unmap_queues *packet; + + packet = (struct pm4_mes_unmap_queues *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); + + packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES, + sizeof(struct pm4_mes_unmap_queues)); + switch (type) { + case KFD_QUEUE_TYPE_COMPUTE: + case KFD_QUEUE_TYPE_DIQ: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__compute; + break; + case KFD_QUEUE_TYPE_SDMA: + packet->bitfields2.engine_sel = + engine_sel__mes_unmap_queues__sdma0 + sdma_engine; + break; + default: + WARN(1, "queue type %d", type); + return -EINVAL; + } + + if (reset) + packet->bitfields2.action = + action__mes_unmap_queues__reset_queues; + else + packet->bitfields2.action = + action__mes_unmap_queues__preempt_queues; + + switch (filter) { + case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_specified_queues; + packet->bitfields2.num_queues = 1; + packet->bitfields3b.doorbell_offset0 = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_BY_PASID: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; + packet->bitfields3a.pasid = filter_param; + break; + case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_queues; + break; + case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: + /* in this case, we do not preempt static queues */ + packet->bitfields2.queue_sel = + queue_sel__mes_unmap_queues__unmap_all_non_static_queues; + break; + default: + WARN(1, "filter %d", filter); + return -EINVAL; + } + + return 0; + +} + +static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value) +{ + struct pm4_mes_query_status *packet; + + packet = (struct pm4_mes_query_status *)buffer; + memset(buffer, 0, sizeof(struct pm4_mes_query_status)); + + packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS, + sizeof(struct pm4_mes_query_status)); + + packet->bitfields2.context_id = 0; + packet->bitfields2.interrupt_sel = + interrupt_sel__mes_query_status__completion_status; + packet->bitfields2.command = + command__mes_query_status__fence_only_after_write_ack; + + packet->addr_hi = upper_32_bits((uint64_t)fence_address); + packet->addr_lo = lower_32_bits((uint64_t)fence_address); + packet->data_hi = upper_32_bits((uint64_t)fence_value); + packet->data_lo = lower_32_bits((uint64_t)fence_value); + + return 0; +} + +static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer) +{ + struct pm4_mec_release_mem *packet; + + packet = (struct pm4_mec_release_mem *)buffer; + memset(buffer, 0, sizeof(*packet)); + + packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM, + sizeof(*packet)); + + packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; + packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; + packet->bitfields2.tcl1_action_ena = 1; + packet->bitfields2.tc_action_ena = 1; + packet->bitfields2.cache_policy = cache_policy___release_mem__lru; + packet->bitfields2.atc = 0; + + packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; + packet->bitfields3.int_sel = + int_sel___release_mem__send_interrupt_after_write_confirm; + + packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; + packet->address_hi = upper_32_bits(gpu_addr); + + packet->data_lo = 0; + + return 0; +} + +const struct packet_manager_funcs kfd_vi_pm_funcs = { + .map_process = pm_map_process_vi, + .runlist = pm_runlist_vi, + .set_resources = pm_set_resources_vi, + .map_queues = pm_map_queues_vi, + .unmap_queues = pm_unmap_queues_vi, + .query_status = pm_query_status_vi, + .release_mem = pm_release_mem_vi, + .map_process_size = sizeof(struct pm4_mes_map_process), + .runlist_size = sizeof(struct pm4_mes_runlist), + .set_resources_size = sizeof(struct pm4_mes_set_resources), + .map_queues_size = sizeof(struct pm4_mes_map_queues), + .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), + .query_status_size = sizeof(struct pm4_mes_query_status), + .release_mem_size = sizeof(struct pm4_mec_release_mem) +}; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index e0c07d2..76bf2dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = { .interrupt = kgd2kfd_interrupt, .suspend = kgd2kfd_suspend, .resume = kgd2kfd_resume, + .quiesce_mm = kgd2kfd_quiesce_mm, + .resume_mm = kgd2kfd_resume_mm, .schedule_evict_and_restore_process = kgd2kfd_schedule_evict_and_restore_process, }; @@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444); MODULE_PARM_DESC(ignore_crat, "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); +int vega10_noretry; +module_param_named(noretry, vega10_noretry, int, 0644); +MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + static int amdkfd_init_completed; int kgd2kfd_init(unsigned int interface_version, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index ee7061e..4b8eb50 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, case CHIP_POLARIS10: case CHIP_POLARIS11: return mqd_manager_init_vi_tonga(type, dev); + case CHIP_VEGA10: + case CHIP_RAVEN: + return mqd_manager_init_v9(type, dev); default: WARN(1, "Unexpected ASIC family %u", dev->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index c00c325..06eaa21 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, m->cp_mqd_base_addr_lo = lower_32_bits(addr); m->cp_mqd_base_addr_hi = upper_32_bits(addr); - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN; - /* Although WinKFD writes this, I suspect it should not be necessary */ - m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | QUANTUM_DURATION(10); @@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c new file mode 100644 index 0000000..684054f --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -0,0 +1,443 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include "kfd_priv.h" +#include "kfd_mqd_manager.h" +#include "v9_structs.h" +#include "gc/gc_9_0_offset.h" +#include "gc/gc_9_0_sh_mask.h" +#include "sdma0/sdma0_4_0_sh_mask.h" + +static inline struct v9_mqd *get_mqd(void *mqd) +{ + return (struct v9_mqd *)mqd; +} + +static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd) +{ + return (struct v9_sdma_mqd *)mqd; +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + /* From V9, for CWSR, the control stack is located on the next page + * boundary after the mqd, we will use the gtt allocation function + * instead of sub-allocation function. + */ + if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { + *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!*mqd_mem_obj) + return -ENOMEM; + retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd, + ALIGN(q->ctl_stack_size, PAGE_SIZE) + + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), + &((*mqd_mem_obj)->gtt_mem), + &((*mqd_mem_obj)->gpu_addr), + (void *)&((*mqd_mem_obj)->cpu_ptr)); + } else + retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), + mqd_mem_obj); + if (retval != 0) + return -ENOMEM; + + m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; + addr = (*mqd_mem_obj)->gpu_addr; + + memset(m, 0, sizeof(struct v9_mqd)); + + m->header = 0xC0310800; + m->compute_pipelinestat_enable = 1; + m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; + m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; + + m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK | + 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT; + + m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT; + + m->cp_mqd_base_addr_lo = lower_32_bits(addr); + m->cp_mqd_base_addr_hi = upper_32_bits(addr); + + m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT | + 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | + 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; + + m->cp_hqd_pipe_priority = 1; + m->cp_hqd_queue_priority = 15; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_aql_control = + 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; + } + + if (q->tba_addr) { + m->compute_pgm_rsrc2 |= + (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT); + } + + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) { + m->cp_hqd_persistent_state |= + (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT); + m->cp_hqd_ctx_save_base_addr_lo = + lower_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_base_addr_hi = + upper_32_bits(q->ctx_save_restore_area_address); + m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size; + m->cp_hqd_cntl_stack_size = q->ctl_stack_size; + m->cp_hqd_cntl_stack_offset = q->ctl_stack_size; + m->cp_hqd_wg_state_offset = q->ctl_stack_size; + } + + *mqd = m; + if (gart_addr) + *gart_addr = addr; + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static int load_mqd(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */ + uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0); + + return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id, + (uint32_t __user *)p->write_ptr, + wptr_shift, 0, mms); +} + +static int update_mqd(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + + m = get_mqd(mqd); + + m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT; + m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1; + pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control); + + m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8); + m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8); + + m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr); + m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr); + + m->cp_hqd_pq_doorbell_control = + q->doorbell_off << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT; + pr_debug("cp_hqd_pq_doorbell_control 0x%x\n", + m->cp_hqd_pq_doorbell_control); + + m->cp_hqd_ib_control = + 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT | + 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT; + + /* + * HW does not clamp this field correctly. Maximum EOP queue size + * is constrained by per-SE EOP done signal count, which is 8-bit. + * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit + * more than (EOP entry count - 1) so a queue size of 0x800 dwords + * is safe, giving a maximum field value of 0xA. + */ + m->cp_hqd_eop_control = min(0xA, + order_base_2(q->eop_ring_buffer_size / 4) - 1); + m->cp_hqd_eop_base_addr_lo = + lower_32_bits(q->eop_ring_buffer_address >> 8); + m->cp_hqd_eop_base_addr_hi = + upper_32_bits(q->eop_ring_buffer_address >> 8); + + m->cp_hqd_iq_timer = 0; + + m->cp_hqd_vmid = q->vmid; + + if (q->format == KFD_QUEUE_FORMAT_AQL) { + m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK | + 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT | + 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT | + 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT; + m->cp_hqd_pq_doorbell_control |= 1 << + CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT; + } + if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) + m->cp_hqd_ctx_save_control = 0; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + + +static int destroy_mqd(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_destroy + (mm->dev->kgd, mqd, type, timeout, + pipe_id, queue_id); +} + +static void uninit_mqd(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + struct kfd_dev *kfd = mm->dev; + + if (mqd_mem_obj->gtt_mem) { + kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); + } else { + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); + } +} + +static bool is_occupied(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_is_occupied( + mm->dev->kgd, queue_address, + pipe_id, queue_id); +} + +static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); + + if (retval != 0) + return retval; + + m = get_mqd(*mqd); + + m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT | + 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; + + return retval; +} + +static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_mqd *m; + int retval = update_mqd(mm, mqd, q); + + if (retval != 0) + return retval; + + /* TODO: what's the point? update_mqd already does this. */ + m = get_mqd(mqd); + m->cp_hqd_vmid = q->vmid; + return retval; +} + +static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + struct v9_sdma_mqd *m; + + + retval = kfd_gtt_sa_allocate(mm->dev, + sizeof(struct v9_sdma_mqd), + mqd_mem_obj); + + if (retval != 0) + return -ENOMEM; + + m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; + + memset(m, 0, sizeof(struct v9_sdma_mqd)); + + *mqd = m; + if (gart_addr) + *gart_addr = (*mqd_mem_obj)->gpu_addr; + + retval = mm->update_mqd(mm, m, q); + + return retval; +} + +static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + kfd_gtt_sa_free(mm->dev, mqd_mem_obj); +} + +static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, + uint32_t pipe_id, uint32_t queue_id, + struct queue_properties *p, struct mm_struct *mms) +{ + return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd, + (uint32_t __user *)p->write_ptr, + mms); +} + +#define SDMA_RLC_DUMMY_DEFAULT 0xf + +static int update_mqd_sdma(struct mqd_manager *mm, void *mqd, + struct queue_properties *q) +{ + struct v9_sdma_mqd *m; + + m = get_sdma_mqd(mqd); + m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4) + << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT | + q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT | + 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT | + 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT; + + m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8); + m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr); + m->sdmax_rlcx_doorbell_offset = + q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT; + + m->sdma_engine_id = q->sdma_engine_id; + m->sdma_queue_id = q->sdma_queue_id; + m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; + + q->is_active = (q->queue_size > 0 && + q->queue_address != 0 && + q->queue_percent > 0 && + !q->is_evicted); + + return 0; +} + +/* + * * preempt type here is ignored because there is only one way + * * to preempt sdma queue + */ +static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd, + enum kfd_preempt_type type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout); +} + +static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd, + uint64_t queue_address, uint32_t pipe_id, + uint32_t queue_id) +{ + return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd); +} + +#if defined(CONFIG_DEBUG_FS) + +static int debugfs_show_mqd(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_mqd), false); + return 0; +} + +static int debugfs_show_mqd_sdma(struct seq_file *m, void *data) +{ + seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, + data, sizeof(struct v9_sdma_mqd), false); + return 0; +} + +#endif + +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev) +{ + struct mqd_manager *mqd; + + if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) + return NULL; + + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + if (!mqd) + return NULL; + + mqd->dev = dev; + + switch (type) { + case KFD_MQD_TYPE_CP: + case KFD_MQD_TYPE_COMPUTE: + mqd->init_mqd = init_mqd; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_HIQ: + mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_SDMA: + mqd->init_mqd = init_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_sdma; + mqd->load_mqd = load_mqd_sdma; + mqd->update_mqd = update_mqd_sdma; + mqd->destroy_mqd = destroy_mqd_sdma; + mqd->is_occupied = is_occupied_sdma; +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; +#endif + break; + default: + kfree(mqd); + return NULL; + } + + return mqd; +} diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 89e4242..481307b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); + mqd = kzalloc(sizeof(*mqd), GFP_NOIO); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 7614375..c317feb4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -26,8 +26,6 @@ #include "kfd_device_queue_manager.h" #include "kfd_kernel_queue.h" #include "kfd_priv.h" -#include "kfd_pm4_headers_vi.h" -#include "kfd_pm4_opcodes.h" static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, unsigned int buffer_size_bytes) @@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, *wptr = temp; } -static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size) -{ - union PM4_MES_TYPE_3_HEADER header; - - header.u32All = 0; - header.opcode = opcode; - header.count = packet_size / 4 - 2; - header.type = PM4_TYPE_3; - - return header.u32All; -} - static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) @@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm, pr_debug("Over subscribed runlist\n"); } - map_queue_size = sizeof(struct pm4_mes_map_queues); + map_queue_size = pm->pmf->map_queues_size; /* calculate run list ib allocation size */ - *rlib_size = process_count * sizeof(struct pm4_mes_map_process) + + *rlib_size = process_count * pm->pmf->map_process_size + queue_count * map_queue_size; /* @@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, * when over subscription */ if (*over_subscription) - *rlib_size += sizeof(struct pm4_mes_runlist); + *rlib_size += pm->pmf->runlist_size; pr_debug("runlist ib size %d\n", *rlib_size); } @@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription); + mutex_lock(&pm->lock); + retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, &pm->ib_buffer_obj); if (retval) { pr_err("Failed to allocate runlist IB\n"); - return retval; + goto out; } *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; @@ -121,139 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, memset(*rl_buffer, 0, *rl_buffer_size); pm->allocated = true; - return retval; -} - -static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer, - uint64_t ib, size_t ib_size_in_dwords, bool chain) -{ - struct pm4_mes_runlist *packet; - int concurrent_proc_cnt = 0; - struct kfd_dev *kfd = pm->dqm->dev; - - if (WARN_ON(!ib)) - return -EFAULT; - - /* Determine the number of processes to map together to HW: - * it can not exceed the number of VMIDs available to the - * scheduler, and it is determined by the smaller of the number - * of processes in the runlist and kfd module parameter - * hws_max_conc_proc. - * Note: the arbitration between the number of VMIDs and - * hws_max_conc_proc has been done in - * kgd2kfd_device_init(). - */ - concurrent_proc_cnt = min(pm->dqm->processes_count, - kfd->max_proc_per_quantum); - - packet = (struct pm4_mes_runlist *)buffer; - - memset(buffer, 0, sizeof(struct pm4_mes_runlist)); - packet->header.u32All = build_pm4_header(IT_RUN_LIST, - sizeof(struct pm4_mes_runlist)); - - packet->bitfields4.ib_size = ib_size_in_dwords; - packet->bitfields4.chain = chain ? 1 : 0; - packet->bitfields4.offload_polling = 0; - packet->bitfields4.valid = 1; - packet->bitfields4.process_cnt = concurrent_proc_cnt; - packet->ordinal2 = lower_32_bits(ib); - packet->bitfields3.ib_base_hi = upper_32_bits(ib); - - return 0; -} - -static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer, - struct qcm_process_device *qpd) -{ - struct pm4_mes_map_process *packet; - - packet = (struct pm4_mes_map_process *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_process)); - - packet->header.u32All = build_pm4_header(IT_MAP_PROCESS, - sizeof(struct pm4_mes_map_process)); - packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0; - packet->bitfields2.process_quantum = 1; - packet->bitfields2.pasid = qpd->pqm->process->pasid; - packet->bitfields3.page_table_base = qpd->page_table_base; - packet->bitfields10.gds_size = qpd->gds_size; - packet->bitfields10.num_gws = qpd->num_gws; - packet->bitfields10.num_oac = qpd->num_oac; - packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; - - packet->sh_mem_config = qpd->sh_mem_config; - packet->sh_mem_bases = qpd->sh_mem_bases; - packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base; - packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit; - - /* TODO: scratch support */ - packet->sh_hidden_private_base_vmid = 0; - - packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area); - packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area); - - return 0; -} - -static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer, - struct queue *q, bool is_static) -{ - struct pm4_mes_map_queues *packet; - bool use_static = is_static; - - packet = (struct pm4_mes_map_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_map_queues)); - - packet->header.u32All = build_pm4_header(IT_MAP_QUEUES, - sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; - packet->bitfields2.num_queues = 1; - packet->bitfields2.queue_sel = - queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; - - packet->bitfields2.engine_sel = - engine_sel__mes_map_queues__compute_vi; - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_compute_vi; - - switch (q->properties.type) { - case KFD_QUEUE_TYPE_COMPUTE: - if (use_static) - packet->bitfields2.queue_type = - queue_type__mes_map_queues__normal_latency_static_queue_vi; - break; - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.queue_type = - queue_type__mes_map_queues__debug_interface_queue_vi; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = q->properties.sdma_engine_id + - engine_sel__mes_map_queues__sdma0_vi; - use_static = false; /* no static queues under SDMA */ - break; - default: - WARN(1, "queue type %d", q->properties.type); - return -EINVAL; - } - packet->bitfields3.doorbell_offset = - q->properties.doorbell_off; - - packet->mqd_addr_lo = - lower_32_bits(q->gart_mqd_addr); - - packet->mqd_addr_hi = - upper_32_bits(q->gart_mqd_addr); - - packet->wptr_addr_lo = - lower_32_bits((uint64_t)q->properties.write_ptr); - - packet->wptr_addr_hi = - upper_32_bits((uint64_t)q->properties.write_ptr); - - return 0; +out: + mutex_unlock(&pm->lock); + return retval; } static int pm_create_runlist_ib(struct packet_manager *pm, @@ -293,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return -ENOMEM; } - retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd); + retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); if (retval) return retval; proccesses_mapped++; - inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process), + inc_wptr(&rl_wptr, pm->pmf->map_process_size, alloc_size_bytes); list_for_each_entry(kq, &qpd->priv_queue_list, list) { @@ -308,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping kernel q %d, is debug status %d\n", kq->queue->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], kq->queue, qpd->is_debug); @@ -316,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } @@ -327,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("static_queue, mapping user queue %d, is debug status %d\n", q->queue, qpd->is_debug); - retval = pm_create_map_queue(pm, + retval = pm->pmf->map_queues(pm, &rl_buffer[rl_wptr], q, qpd->is_debug); @@ -336,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; inc_wptr(&rl_wptr, - sizeof(struct pm4_mes_map_queues), + pm->pmf->map_queues_size, alloc_size_bytes); } } @@ -344,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, pr_debug("Finished map process and queues to runlist\n"); if (is_over_subscription) - retval = pm_create_runlist(pm, &rl_buffer[rl_wptr], + retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], *rl_gpu_addr, alloc_size_bytes / sizeof(uint32_t), true); @@ -356,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; } -/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size - * of this packet - * @gpu_addr - GPU address of the packet. It's a virtual address. - * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer - * Return - length of the packet - */ -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer) -{ - struct pm4_mec_release_mem *packet; - - WARN_ON(!buffer); - - packet = (struct pm4_mec_release_mem *)buffer; - memset(buffer, 0, sizeof(*packet)); - - packet->header.u32All = build_pm4_header(IT_RELEASE_MEM, - sizeof(*packet)); - - packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT; - packet->bitfields2.event_index = event_index___release_mem__end_of_pipe; - packet->bitfields2.tcl1_action_ena = 1; - packet->bitfields2.tc_action_ena = 1; - packet->bitfields2.cache_policy = cache_policy___release_mem__lru; - packet->bitfields2.atc = 0; - - packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low; - packet->bitfields3.int_sel = - int_sel___release_mem__send_interrupt_after_write_confirm; - - packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2; - packet->address_hi = upper_32_bits(gpu_addr); - - packet->data_lo = 0; - - return sizeof(*packet) / sizeof(unsigned int); -} - int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) { + switch (dqm->dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: + /* PM4 packet structures on CIK are the same as on VI */ + case CHIP_CARRIZO: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + pm->pmf = &kfd_vi_pm_funcs; + break; + case CHIP_VEGA10: + case CHIP_RAVEN: + pm->pmf = &kfd_v9_pm_funcs; + break; + default: + WARN(1, "Unexpected ASIC family %u", + dqm->dev->device_info->asic_family); + return -EINVAL; + } + pm->dqm = dqm; mutex_init(&pm->lock); pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); @@ -416,38 +259,25 @@ void pm_uninit(struct packet_manager *pm) int pm_send_set_resources(struct packet_manager *pm, struct scheduling_resources *res) { - struct pm4_mes_set_resources *packet; + uint32_t *buffer, size; int retval = 0; + size = pm->pmf->set_resources_size; mutex_lock(&pm->lock); pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, - sizeof(*packet) / sizeof(uint32_t), - (unsigned int **)&packet); - if (!packet) { + size / sizeof(uint32_t), + (unsigned int **)&buffer); + if (!buffer) { pr_err("Failed to allocate buffer on kernel queue\n"); retval = -ENOMEM; goto out; } - memset(packet, 0, sizeof(struct pm4_mes_set_resources)); - packet->header.u32All = build_pm4_header(IT_SET_RESOURCES, - sizeof(struct pm4_mes_set_resources)); - - packet->bitfields2.queue_type = - queue_type__mes_set_resources__hsa_interface_queue_hiq; - packet->bitfields2.vmid_mask = res->vmid_mask; - packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100; - packet->bitfields7.oac_mask = res->oac_mask; - packet->bitfields8.gds_heap_base = res->gds_heap_base; - packet->bitfields8.gds_heap_size = res->gds_heap_size; - - packet->gws_mask_lo = lower_32_bits(res->gws_mask); - packet->gws_mask_hi = upper_32_bits(res->gws_mask); - - packet->queue_mask_lo = lower_32_bits(res->queue_mask); - packet->queue_mask_hi = upper_32_bits(res->queue_mask); - - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->set_resources(pm, buffer, res); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); out: mutex_unlock(&pm->lock); @@ -469,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t); + packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(&pm->lock); retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, @@ -477,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_acquire_packet_buffer; - retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr, + retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; @@ -500,37 +330,29 @@ fail_create_runlist_ib: int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, uint32_t fence_value) { - int retval; - struct pm4_mes_query_status *packet; + uint32_t *buffer, size; + int retval = 0; if (WARN_ON(!fence_address)) return -EFAULT; + size = pm->pmf->query_status_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_query_status) / sizeof(uint32_t), - (unsigned int **)&packet); - if (retval) - goto fail_acquire_packet_buffer; - - packet->header.u32All = build_pm4_header(IT_QUERY_STATUS, - sizeof(struct pm4_mes_query_status)); - - packet->bitfields2.context_id = 0; - packet->bitfields2.interrupt_sel = - interrupt_sel__mes_query_status__completion_status; - packet->bitfields2.command = - command__mes_query_status__fence_only_after_write_ack; - - packet->addr_hi = upper_32_bits((uint64_t)fence_address); - packet->addr_lo = lower_32_bits((uint64_t)fence_address); - packet->data_hi = upper_32_bits((uint64_t)fence_value); - packet->data_lo = lower_32_bits((uint64_t)fence_value); + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; + } - pm->priv_queue->ops.submit_packet(pm->priv_queue); + retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); + else + pm->priv_queue->ops.rollback_packet(pm->priv_queue); -fail_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } @@ -540,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, uint32_t filter_param, bool reset, unsigned int sdma_engine) { - int retval; - uint32_t *buffer; - struct pm4_mes_unmap_queues *packet; + uint32_t *buffer, size; + int retval = 0; + size = pm->pmf->unmap_queues_size; mutex_lock(&pm->lock); - retval = pm->priv_queue->ops.acquire_packet_buffer( - pm->priv_queue, - sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t), - &buffer); - if (retval) - goto err_acquire_packet_buffer; - - packet = (struct pm4_mes_unmap_queues *)buffer; - memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues)); - pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n", - filter, reset, type); - packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES, - sizeof(struct pm4_mes_unmap_queues)); - switch (type) { - case KFD_QUEUE_TYPE_COMPUTE: - case KFD_QUEUE_TYPE_DIQ: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__compute; - break; - case KFD_QUEUE_TYPE_SDMA: - packet->bitfields2.engine_sel = - engine_sel__mes_unmap_queues__sdma0 + sdma_engine; - break; - default: - WARN(1, "queue type %d", type); - retval = -EINVAL; - goto err_invalid; + pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue, + size / sizeof(uint32_t), (unsigned int **)&buffer); + if (!buffer) { + pr_err("Failed to allocate buffer on kernel queue\n"); + retval = -ENOMEM; + goto out; } - if (reset) - packet->bitfields2.action = - action__mes_unmap_queues__reset_queues; + retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param, + reset, sdma_engine); + if (!retval) + pm->priv_queue->ops.submit_packet(pm->priv_queue); else - packet->bitfields2.action = - action__mes_unmap_queues__preempt_queues; - - switch (filter) { - case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_specified_queues; - packet->bitfields2.num_queues = 1; - packet->bitfields3b.doorbell_offset0 = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_BY_PASID: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__perform_request_on_pasid_queues; - packet->bitfields3a.pasid = filter_param; - break; - case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_queues; - break; - case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES: - /* in this case, we do not preempt static queues */ - packet->bitfields2.queue_sel = - queue_sel__mes_unmap_queues__unmap_all_non_static_queues; - break; - default: - WARN(1, "filter %d", filter); - retval = -EINVAL; - goto err_invalid; - } + pm->priv_queue->ops.rollback_packet(pm->priv_queue); - pm->priv_queue->ops.submit_packet(pm->priv_queue); - - mutex_unlock(&pm->lock); - return 0; - -err_invalid: - pm->priv_queue->ops.rollback_packet(pm->priv_queue); -err_acquire_packet_buffer: +out: mutex_unlock(&pm->lock); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h new file mode 100644 index 0000000..f2bcf5c --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -0,0 +1,583 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef F32_MES_PM4_PACKETS_H +#define F32_MES_PM4_PACKETS_H + +#ifndef PM4_MES_HEADER_DEFINED +#define PM4_MES_HEADER_DEFINED +union PM4_MES_TYPE_3_HEADER { + struct { + uint32_t reserved1 : 8; /* < reserved */ + uint32_t opcode : 8; /* < IT opcode */ + uint32_t count : 14;/* < number of DWORDs - 1 in the + * information body. + */ + uint32_t type : 2; /* < packet identifier. + * It should be 3 for type 3 packets + */ + }; + uint32_t u32All; +}; +#endif /* PM4_MES_HEADER_DEFINED */ + +/*--------------------MES_SET_RESOURCES--------------------*/ + +#ifndef PM4_MES_SET_RESOURCES_DEFINED +#define PM4_MES_SET_RESOURCES_DEFINED +enum mes_set_resources_queue_type_enum { + queue_type__mes_set_resources__kernel_interface_queue_kiq = 0, + queue_type__mes_set_resources__hsa_interface_queue_hiq = 1, + queue_type__mes_set_resources__hsa_debug_interface_queue = 4 +}; + + +struct pm4_mes_set_resources { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t vmid_mask:16; + uint32_t unmap_latency:8; + uint32_t reserved1:5; + enum mes_set_resources_queue_type_enum queue_type:3; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t queue_mask_lo; + uint32_t queue_mask_hi; + uint32_t gws_mask_lo; + uint32_t gws_mask_hi; + + union { + struct { + uint32_t oac_mask:16; + uint32_t reserved2:16; + } bitfields7; + uint32_t ordinal7; + }; + + union { + struct { + uint32_t gds_heap_base:6; + uint32_t reserved3:5; + uint32_t gds_heap_size:6; + uint32_t reserved4:15; + } bitfields8; + uint32_t ordinal8; + }; + +}; +#endif + +/*--------------------MES_RUN_LIST--------------------*/ + +#ifndef PM4_MES_RUN_LIST_DEFINED +#define PM4_MES_RUN_LIST_DEFINED + +struct pm4_mes_runlist { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:2; + uint32_t ib_base_lo:30; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t ib_base_hi; + + union { + struct { + uint32_t ib_size:20; + uint32_t chain:1; + uint32_t offload_polling:1; + uint32_t reserved2:1; + uint32_t valid:1; + uint32_t process_cnt:4; + uint32_t reserved3:4; + } bitfields4; + uint32_t ordinal4; + }; + +}; +#endif + +/*--------------------MES_MAP_PROCESS--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_DEFINED +#define PM4_MES_MAP_PROCESS_DEFINED + +struct pm4_mes_map_process { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:8; + uint32_t diq_enable:1; + uint32_t process_quantum:7; + } bitfields2; + uint32_t ordinal2; + }; + + uint32_t vm_context_page_table_base_addr_lo32; + + uint32_t vm_context_page_table_base_addr_hi32; + + uint32_t sh_mem_bases; + + uint32_t sh_mem_config; + + uint32_t sq_shader_tba_lo; + + uint32_t sq_shader_tba_hi; + + uint32_t sq_shader_tma_lo; + + uint32_t sq_shader_tma_hi; + + uint32_t reserved6; + + uint32_t gds_addr_lo; + + uint32_t gds_addr_hi; + + union { + struct { + uint32_t num_gws:6; + uint32_t reserved7:1; + uint32_t sdma_enable:1; + uint32_t num_oac:4; + uint32_t reserved8:4; + uint32_t gds_size:6; + uint32_t num_queues:10; + } bitfields14; + uint32_t ordinal14; + }; + + uint32_t completion_signal_lo; + + uint32_t completion_signal_hi; + +}; + +#endif + +/*--------------------MES_MAP_PROCESS_VM--------------------*/ + +#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED +#define PM4_MES_MAP_PROCESS_VM_DEFINED + +struct PM4_MES_MAP_PROCESS_VM { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + uint32_t reserved1; + + uint32_t vm_context_cntl; + + uint32_t reserved2; + + uint32_t vm_context_page_table_end_addr_lo32; + + uint32_t vm_context_page_table_end_addr_hi32; + + uint32_t vm_context_page_table_start_addr_lo32; + + uint32_t vm_context_page_table_start_addr_hi32; + + uint32_t reserved3; + + uint32_t reserved4; + + uint32_t reserved5; + + uint32_t reserved6; + + uint32_t reserved7; + + uint32_t reserved8; + + uint32_t completion_signal_lo32; + + uint32_t completion_signal_hi32; + +}; +#endif + +/*--------------------MES_MAP_QUEUES--------------------*/ + +#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED +#define PM4_MES_MAP_QUEUES_VI_DEFINED +enum mes_map_queues_queue_sel_enum { + queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0, +queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1 +}; + +enum mes_map_queues_queue_type_enum { + queue_type__mes_map_queues__normal_compute_vi = 0, + queue_type__mes_map_queues__debug_interface_queue_vi = 1, + queue_type__mes_map_queues__normal_latency_static_queue_vi = 2, +queue_type__mes_map_queues__low_latency_static_queue_vi = 3 +}; + +enum mes_map_queues_alloc_format_enum { + alloc_format__mes_map_queues__one_per_pipe_vi = 0, +alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 +}; + +enum mes_map_queues_engine_sel_enum { + engine_sel__mes_map_queues__compute_vi = 0, + engine_sel__mes_map_queues__sdma0_vi = 2, + engine_sel__mes_map_queues__sdma1_vi = 3 +}; + + +struct pm4_mes_map_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t reserved1:4; + enum mes_map_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:15; + enum mes_map_queues_queue_type_enum queue_type:3; + enum mes_map_queues_alloc_format_enum alloc_format:2; + enum mes_map_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t reserved3:1; + uint32_t check_disable:1; + uint32_t doorbell_offset:26; + uint32_t reserved4:4; + } bitfields3; + uint32_t ordinal3; + }; + + uint32_t mqd_addr_lo; + uint32_t mqd_addr_hi; + uint32_t wptr_addr_lo; + uint32_t wptr_addr_hi; +}; +#endif + +/*--------------------MES_QUERY_STATUS--------------------*/ + +#ifndef PM4_MES_QUERY_STATUS_DEFINED +#define PM4_MES_QUERY_STATUS_DEFINED +enum mes_query_status_interrupt_sel_enum { + interrupt_sel__mes_query_status__completion_status = 0, + interrupt_sel__mes_query_status__process_status = 1, + interrupt_sel__mes_query_status__queue_status = 2 +}; + +enum mes_query_status_command_enum { + command__mes_query_status__interrupt_only = 0, + command__mes_query_status__fence_only_immediate = 1, + command__mes_query_status__fence_only_after_write_ack = 2, + command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3 +}; + +enum mes_query_status_engine_sel_enum { + engine_sel__mes_query_status__compute = 0, + engine_sel__mes_query_status__sdma0_queue = 2, + engine_sel__mes_query_status__sdma1_queue = 3 +}; + +struct pm4_mes_query_status { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + uint32_t context_id:28; + enum mes_query_status_interrupt_sel_enum interrupt_sel:2; + enum mes_query_status_command_enum command:2; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved1:16; + } bitfields3a; + struct { + uint32_t reserved2:2; + uint32_t doorbell_offset:26; + enum mes_query_status_engine_sel_enum engine_sel:3; + uint32_t reserved3:1; + } bitfields3b; + uint32_t ordinal3; + }; + + uint32_t addr_lo; + uint32_t addr_hi; + uint32_t data_lo; + uint32_t data_hi; +}; +#endif + +/*--------------------MES_UNMAP_QUEUES--------------------*/ + +#ifndef PM4_MES_UNMAP_QUEUES_DEFINED +#define PM4_MES_UNMAP_QUEUES_DEFINED +enum mes_unmap_queues_action_enum { + action__mes_unmap_queues__preempt_queues = 0, + action__mes_unmap_queues__reset_queues = 1, + action__mes_unmap_queues__disable_process_queues = 2, + action__mes_unmap_queues__reserved = 3 +}; + +enum mes_unmap_queues_queue_sel_enum { + queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0, + queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1, + queue_sel__mes_unmap_queues__unmap_all_queues = 2, + queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3 +}; + +enum mes_unmap_queues_engine_sel_enum { + engine_sel__mes_unmap_queues__compute = 0, + engine_sel__mes_unmap_queues__sdma0 = 2, + engine_sel__mes_unmap_queues__sdmal = 3 +}; + +struct pm4_mes_unmap_queues { + union { + union PM4_MES_TYPE_3_HEADER header; /* header */ + uint32_t ordinal1; + }; + + union { + struct { + enum mes_unmap_queues_action_enum action:2; + uint32_t reserved1:2; + enum mes_unmap_queues_queue_sel_enum queue_sel:2; + uint32_t reserved2:20; + enum mes_unmap_queues_engine_sel_enum engine_sel:3; + uint32_t num_queues:3; + } bitfields2; + uint32_t ordinal2; + }; + + union { + struct { + uint32_t pasid:16; + uint32_t reserved3:16; + } bitfields3a; + struct { + uint32_t reserved4:2; + uint32_t doorbell_offset0:26; + int32_t reserved5:4; + } bitfields3b; + uint32_t ordinal3; + }; + + union { + struct { + uint32_t reserved6:2; + uint32_t doorbell_offset1:26; + uint32_t reserved7:4; + } bitfields4; + uint32_t ordinal4; + }; + + union { + struct { + uint32_t reserved8:2; + uint32_t doorbell_offset2:26; + uint32_t reserved9:4; + } bitfields5; + uint32_t ordinal5; + }; + + union { + struct { + uint32_t reserved10:2; + uint32_t doorbell_offset3:26; + uint32_t reserved11:4; + } bitfields6; + uint32_t ordinal6; + }; +}; +#endif + +#ifndef PM4_MEC_RELEASE_MEM_DEFINED +#define PM4_MEC_RELEASE_MEM_DEFINED + +enum mec_release_mem_event_index_enum { + event_index__mec_release_mem__end_of_pipe = 5, + event_index__mec_release_mem__shader_done = 6 +}; + +enum mec_release_mem_cache_policy_enum { + cache_policy__mec_release_mem__lru = 0, + cache_policy__mec_release_mem__stream = 1 +}; + +enum mec_release_mem_pq_exe_status_enum { + pq_exe_status__mec_release_mem__default = 0, + pq_exe_status__mec_release_mem__phase_update = 1 +}; + +enum mec_release_mem_dst_sel_enum { + dst_sel__mec_release_mem__memory_controller = 0, + dst_sel__mec_release_mem__tc_l2 = 1, + dst_sel__mec_release_mem__queue_write_pointer_register = 2, + dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3 +}; + +enum mec_release_mem_int_sel_enum { + int_sel__mec_release_mem__none = 0, + int_sel__mec_release_mem__send_interrupt_only = 1, + int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2, + int_sel__mec_release_mem__send_data_after_write_confirm = 3, + int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5, + int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6 +}; + +enum mec_release_mem_data_sel_enum { + data_sel__mec_release_mem__none = 0, + data_sel__mec_release_mem__send_32_bit_low = 1, + data_sel__mec_release_mem__send_64_bit_data = 2, + data_sel__mec_release_mem__send_gpu_clock_counter = 3, + data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4, + data_sel__mec_release_mem__store_gds_data_to_memory = 5 +}; + +struct pm4_mec_release_mem { + union { + union PM4_MES_TYPE_3_HEADER header; /*header */ + unsigned int ordinal1; + }; + + union { + struct { + unsigned int event_type:6; + unsigned int reserved1:2; + enum mec_release_mem_event_index_enum event_index:4; + unsigned int tcl1_vol_action_ena:1; + unsigned int tc_vol_action_ena:1; + unsigned int reserved2:1; + unsigned int tc_wb_action_ena:1; + unsigned int tcl1_action_ena:1; + unsigned int tc_action_ena:1; + uint32_t reserved3:1; + uint32_t tc_nc_action_ena:1; + uint32_t tc_wc_action_ena:1; + uint32_t tc_md_action_ena:1; + uint32_t reserved4:3; + enum mec_release_mem_cache_policy_enum cache_policy:2; + uint32_t reserved5:2; + enum mec_release_mem_pq_exe_status_enum pq_exe_status:1; + uint32_t reserved6:2; + } bitfields2; + unsigned int ordinal2; + }; + + union { + struct { + uint32_t reserved7:16; + enum mec_release_mem_dst_sel_enum dst_sel:2; + uint32_t reserved8:6; + enum mec_release_mem_int_sel_enum int_sel:3; + uint32_t reserved9:2; + enum mec_release_mem_data_sel_enum data_sel:3; + } bitfields3; + unsigned int ordinal3; + }; + + union { + struct { + uint32_t reserved10:2; + unsigned int address_lo_32b:30; + } bitfields4; + struct { + uint32_t reserved11:3; + uint32_t address_lo_64b:29; + } bitfields4b; + uint32_t reserved12; + unsigned int ordinal4; + }; + + union { + uint32_t address_hi; + uint32_t reserved13; + uint32_t ordinal5; + }; + + union { + uint32_t data_lo; + uint32_t cmp_data_lo; + struct { + uint32_t dw_offset:16; + uint32_t num_dwords:16; + } bitfields6c; + uint32_t reserved14; + uint32_t ordinal6; + }; + + union { + uint32_t data_hi; + uint32_t cmp_data_hi; + uint32_t reserved15; + uint32_t reserved16; + uint32_t ordinal7; + }; + + uint32_t int_ctxid; + +}; + +#endif + +enum { + CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 +}; +#endif + diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 96a9cc0..5e3990b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -39,11 +39,37 @@ #include "amd_shared.h" +#define KFD_MAX_RING_ENTRY_SIZE 8 + #define KFD_SYSFS_FILE_MODE 0444 -#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull -#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull -#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull +/* GPU ID hash width in bits */ +#define KFD_GPU_ID_HASH_WIDTH 16 + +/* Use upper bits of mmap offset to store KFD driver specific information. + * BITS[63:62] - Encode MMAP type + * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to + * BITS[45:0] - MMAP offset value + * + * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these + * defines are w.r.t to PAGE_SIZE + */ +#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) +#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) + +#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) +#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ + << KFD_MMAP_GPU_ID_SHIFT) +#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ + & KFD_MMAP_GPU_ID_MASK) +#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ + >> KFD_MMAP_GPU_ID_SHIFT) + +#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) +#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) /* * When working with cp scheduler we should assign the HIQ manually or via @@ -55,9 +81,6 @@ #define KFD_CIK_HIQ_PIPE 4 #define KFD_CIK_HIQ_QUEUE 0 -/* GPU ID hash width in bits */ -#define KFD_GPU_ID_HASH_WIDTH 16 - /* Macro for allocating structures */ #define kfd_alloc_struct(ptr_to_struct) \ ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) @@ -116,6 +139,11 @@ extern int debug_largebar; */ extern int ignore_crat; +/* + * Set sh_mem_config.retry_disable on Vega10 + */ +extern int vega10_noretry; + /** * enum kfd_sched_policy * @@ -148,6 +176,8 @@ enum cache_policy { cache_policy_noncoherent }; +#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) + struct kfd_event_interrupt_class { bool (*interrupt_isr)(struct kfd_dev *dev, const uint32_t *ih_ring_entry); @@ -160,6 +190,7 @@ struct kfd_device_info { const struct kfd_event_interrupt_class *event_interrupt_class; unsigned int max_pasid_bits; unsigned int max_no_of_hqd; + unsigned int doorbell_size; size_t ih_ring_entry_size; uint8_t num_of_watch_points; uint16_t mqd_size_aligned; @@ -173,6 +204,7 @@ struct kfd_mem_obj { uint32_t range_end; uint64_t gpu_addr; uint32_t *cpu_ptr; + void *gtt_mem; }; struct kfd_vmid_info { @@ -364,7 +396,7 @@ struct queue_properties { uint32_t queue_percent; uint32_t *read_ptr; uint32_t *write_ptr; - uint32_t __iomem *doorbell_ptr; + void __iomem *doorbell_ptr; uint32_t doorbell_off; bool is_interop; bool is_evicted; @@ -427,6 +459,7 @@ struct queue { uint32_t queue; unsigned int sdma_id; + unsigned int doorbell_id; struct kfd_process *process; struct kfd_dev *device; @@ -501,6 +534,9 @@ struct qcm_process_device { /* IB memory */ uint64_t ib_base; void *ib_kaddr; + + /* doorbell resources per process per device */ + unsigned long *doorbell_bitmap; }; /* KFD Memory Eviction */ @@ -512,6 +548,8 @@ struct qcm_process_device { /* Approx. time before evicting the process again */ #define PROCESS_ACTIVE_TIME_MS 10 +int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); @@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); void kfd_unref_process(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); @@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); /* KFD process API for creating and translating handles */ @@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void); void kfd_pasid_free(unsigned int pasid); /* Doorbells */ +size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); int kfd_doorbell_init(struct kfd_dev *kfd); void kfd_doorbell_fini(struct kfd_dev *kfd); -int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); -u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, +int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma); +void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, unsigned int *doorbell_off); void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); -void write_kernel_doorbell(u32 __iomem *db, u32 value); -unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, +void write_kernel_doorbell(void __iomem *db, u32 value); +void write_kernel_doorbell64(void __iomem *db, u64 value); +unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, struct kfd_process *process, - unsigned int queue_id); + unsigned int doorbell_id); phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, struct kfd_process *process); int kfd_alloc_process_doorbells(struct kfd_process *process); @@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, struct kfd_dev *dev); +struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); void device_queue_manager_uninit(struct device_queue_manager *dqm); struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, @@ -832,8 +877,42 @@ struct packet_manager { bool allocated; struct kfd_mem_obj *ib_buffer_obj; unsigned int ib_size_bytes; + + const struct packet_manager_funcs *pmf; +}; + +struct packet_manager_funcs { + /* Support ASIC-specific packet formats for PM4 packets */ + int (*map_process)(struct packet_manager *pm, uint32_t *buffer, + struct qcm_process_device *qpd); + int (*runlist)(struct packet_manager *pm, uint32_t *buffer, + uint64_t ib, size_t ib_size_in_dwords, bool chain); + int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); + int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, + struct queue *q, bool is_static); + int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, + enum kfd_queue_type type, + enum kfd_unmap_queues_filter mode, + uint32_t filter_param, bool reset, + unsigned int sdma_engine); + int (*query_status)(struct packet_manager *pm, uint32_t *buffer, + uint64_t fence_address, uint32_t fence_value); + int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); + + /* Packet sizes */ + int map_process_size; + int runlist_size; + int set_resources_size; + int map_queues_size; + int unmap_queues_size; + int query_status_size; + int release_mem_size; }; +extern const struct packet_manager_funcs kfd_vi_pm_funcs; +extern const struct packet_manager_funcs kfd_v9_pm_funcs; + int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); void pm_uninit(struct packet_manager *pm); int pm_send_set_resources(struct packet_manager *pm, @@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, void pm_release_ib(struct packet_manager *pm); -uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer); +/* Following PM funcs can be shared among VI and AI */ +unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); +int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, + struct scheduling_resources *res); uint64_t kfd_get_number_elems(struct kfd_dev *kfd); /* Events */ extern const struct kfd_event_interrupt_class event_interrupt_class_cik; +extern const struct kfd_event_interrupt_class event_interrupt_class_v9; + extern const struct kfd_device_global_init_class device_global_init_class_cik; void kfd_event_init_process(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 1711ad0..1d80b4f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) free_pages((unsigned long)pdd->qpd.cwsr_kaddr, get_order(KFD_CWSR_TBA_TMA_SIZE)); + kfree(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); kfree(pdd); @@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; - offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT; + offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id)) + << PAGE_SHIFT; qpd->tba_addr = (int64_t)vm_mmap(filep, 0, KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC, MAP_SHARED, offset); @@ -585,6 +587,31 @@ err_alloc_process: return ERR_PTR(err); } +static int init_doorbell_bitmap(struct qcm_process_device *qpd, + struct kfd_dev *dev) +{ + unsigned int i; + + if (!KFD_IS_SOC15(dev->device_info->asic_family)) + return 0; + + qpd->doorbell_bitmap = + kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, + BITS_PER_BYTE), GFP_KERNEL); + if (!qpd->doorbell_bitmap) + return -ENOMEM; + + /* Mask out any reserved doorbells */ + for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++) + if ((dev->shared_resources.reserved_doorbell_mask & i) == + dev->shared_resources.reserved_doorbell_val) { + set_bit(i, qpd->doorbell_bitmap); + pr_debug("reserved doorbell 0x%03x\n", i); + } + + return 0; +} + struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process *p) { @@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; + if (init_doorbell_bitmap(&pdd->qpd, dev)) { + pr_err("Failed to init doorbell for process\n"); + kfree(pdd); + return NULL; + } + pdd->dev = dev; INIT_LIST_HEAD(&pdd->qpd.queues_list); INIT_LIST_HEAD(&pdd->qpd.priv_queue_list); @@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -static int process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r = 0; @@ -844,7 +877,7 @@ fail: } /* process_restore_queues - Restore all user queues of a process */ -static int process_restore_queues(struct kfd_process *p) +int kfd_process_restore_queues(struct kfd_process *p) { struct kfd_process_device *pdd; int r, ret = 0; @@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid %d\n", p->pasid); - ret = process_evict_queues(p); + ret = kfd_process_evict_queues(p); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work) return; } - ret = process_restore_queues(p); + ret = kfd_process_restore_queues(p); if (!ret) pr_debug("Finished restoring pasid %d\n", p->pasid); else @@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (process_evict_queues(p)) + if (kfd_process_evict_queues(p)) pr_err("Failed to suspend process %d\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void) return ret; } -int kfd_reserved_mem_mmap(struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma) { - struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff); struct kfd_process_device *pdd; struct qcm_process_device *qpd; - if (!dev) - return -EINVAL; if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) { pr_err("Incorrect CWSR mapping size.\n"); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 7817e32..d65ce04 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm, /* Doorbell initialized in user space*/ q_properties->doorbell_ptr = NULL; - q_properties->doorbell_off = - kfd_queue_id_to_doorbell(dev, pqm->process, qid); - /* let DQM handle it*/ q_properties->vmid = 0; q_properties->queue_id = qid; @@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("DQM create queue failed\n"); + pr_err("Pasid %d DQM create queue %d failed. ret %d\n", + pqm->process->pasid, type, retval); goto err_create_queue; } + if (q) + /* Return the doorbell offset within the doorbell page + * to the caller so it can be passed up to user mode + * (in bytes). + */ + properties->doorbell_off = + (q->properties.doorbell_off * sizeof(uint32_t)) & + (kfd_doorbell_process_slice(dev) - 1); + pr_debug("PQM After DQM create queue\n"); list_add(&pqn->process_queue_list, &pqm->queues); @@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) dqm = pqn->q->device->dqm; retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q); if (retval) { - pr_debug("Destroy queue failed, returned %d\n", retval); - goto err_destroy_queue; + pr_err("Pasid %d destroy queue %d failed, ret %d\n", + pqm->process->pasid, + pqn->q->properties.queue_id, retval); + if (retval != -ETIME) + goto err_destroy_queue; } uninit_queue(pqn->q); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index a5315d4..6dcd621 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q) pr_debug("Queue Address: 0x%llX\n", q->queue_address); pr_debug("Queue Id: %u\n", q->queue_id); pr_debug("Queue Process Vmid: %u\n", q->vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off); } @@ -53,8 +53,8 @@ void print_queue(struct queue *q) pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address); pr_debug("Queue Id: %u\n", q->properties.queue_id); pr_debug("Queue Process Vmid: %u\n", q->properties.vmid); - pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr); - pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr); + pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr); + pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr); pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr); pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off); pr_debug("Queue MQD Address: 0x%p\n", q->mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index ac28abc..bc95d4df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); break; + case CHIP_VEGA10: + case CHIP_RAVEN: + dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 << + HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & + HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK); + break; default: WARN(1, "Unexpected ASIC family %u", dev->gpu->device_info->asic_family); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index eb54cfc..7d9c3f9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -45,6 +45,7 @@ #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 +#define HSA_CAP_DOORBELL_TYPE_2_0 0x2 #define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000 struct kfd_node_properties { diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h new file mode 100644 index 0000000..0bc0b25 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h @@ -0,0 +1,47 @@ +/* + * Copyright 2016-2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef HSA_SOC15_INT_H_INCLUDED +#define HSA_SOC15_INT_H_INCLUDED + +#include "soc15_ih_clientid.h" + +#define SOC15_INTSRC_CP_END_OF_PIPE 181 +#define SOC15_INTSRC_CP_BAD_OPCODE 183 +#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239 +#define SOC15_INTSRC_VMC_FAULT 0 +#define SOC15_INTSRC_SDMA_TRAP 224 + + +#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff) +#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff) +#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff) +#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf) +#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1) +#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff) +#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4])) +#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5])) +#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6])) +#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7])) + +#endif + diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index e593577..644b2187 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -589,7 +589,7 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context) ******************************************************************************/ struct dc *dc_create(const struct dc_init_data *init_params) - { +{ struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL); unsigned int full_pipe_count; diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 237289a..5733fbe 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources { /* Bit n == 1 means Queue n is available for KFD */ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES); + /* Doorbell assignments (SOC15 and later chips only). Only + * specific doorbells are routed to each SDMA engine. Others + * are routed to IH and VCN. They are not usable by the CP. + * + * Any doorbell number D that satisfies the following condition + * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val + * + * KFD currently uses 1024 (= 0x3ff) doorbells per process. If + * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means + * mask would be set to 0x1f8 and val set to 0x0f0. + */ + unsigned int sdma_doorbell[2][2]; + unsigned int reserved_doorbell_mask; + unsigned int reserved_doorbell_val; + /* Base address of doorbell aperture. */ phys_addr_t doorbell_physical_address; @@ -173,8 +188,6 @@ struct tile_config { * @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp * scheduling mode. Only used for no cp scheduling mode. * - * @init_pipeline: Initialized the compute pipelines. - * * @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp * sceduling mode. * @@ -274,9 +287,6 @@ struct kfd2kgd_calls { int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid, unsigned int vmid); - int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id, - uint32_t hpd_size, uint64_t hpd_gpu_addr); - int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id); int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, @@ -382,6 +392,10 @@ struct kfd2kgd_calls { * * @resume: Notifies amdkfd about a resume action done to a kgd device * + * @quiesce_mm: Quiesce all user queue access to specified MM address space + * + * @resume_mm: Resume user queue access to specified MM address space + * * @schedule_evict_and_restore_process: Schedules work queue that will prepare * for safe eviction of KFD BOs that belong to the specified process. * @@ -399,6 +413,8 @@ struct kgd2kfd_calls { void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry); void (*suspend)(struct kfd_dev *kfd); int (*resume)(struct kfd_dev *kfd); + int (*quiesce_mm)(struct mm_struct *mm); + int (*resume_mm)(struct mm_struct *mm); int (*schedule_evict_and_restore_process)(struct mm_struct *mm, struct dma_fence *fence); }; diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h index 2fb25ab..ceaf493 100644 --- a/drivers/gpu/drm/amd/include/v9_structs.h +++ b/drivers/gpu/drm/amd/include/v9_structs.h @@ -29,10 +29,10 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_rb_base; uint32_t sdmax_rlcx_rb_base_hi; uint32_t sdmax_rlcx_rb_rptr; + uint32_t sdmax_rlcx_rb_rptr_hi; uint32_t sdmax_rlcx_rb_wptr; + uint32_t sdmax_rlcx_rb_wptr_hi; uint32_t sdmax_rlcx_rb_wptr_poll_cntl; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; - uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; uint32_t sdmax_rlcx_rb_rptr_addr_hi; uint32_t sdmax_rlcx_rb_rptr_addr_lo; uint32_t sdmax_rlcx_ib_cntl; @@ -44,29 +44,29 @@ struct v9_sdma_mqd { uint32_t sdmax_rlcx_skip_cntl; uint32_t sdmax_rlcx_context_status; uint32_t sdmax_rlcx_doorbell; - uint32_t sdmax_rlcx_virtual_addr; - uint32_t sdmax_rlcx_ape1_cntl; + uint32_t sdmax_rlcx_status; uint32_t sdmax_rlcx_doorbell_log; - uint32_t reserved_22; - uint32_t reserved_23; - uint32_t reserved_24; - uint32_t reserved_25; - uint32_t reserved_26; - uint32_t reserved_27; - uint32_t reserved_28; - uint32_t reserved_29; - uint32_t reserved_30; - uint32_t reserved_31; - uint32_t reserved_32; - uint32_t reserved_33; - uint32_t reserved_34; - uint32_t reserved_35; - uint32_t reserved_36; - uint32_t reserved_37; - uint32_t reserved_38; - uint32_t reserved_39; - uint32_t reserved_40; - uint32_t reserved_41; + uint32_t sdmax_rlcx_watermark; + uint32_t sdmax_rlcx_doorbell_offset; + uint32_t sdmax_rlcx_csa_addr_lo; + uint32_t sdmax_rlcx_csa_addr_hi; + uint32_t sdmax_rlcx_ib_sub_remain; + uint32_t sdmax_rlcx_preempt; + uint32_t sdmax_rlcx_dummy_reg; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi; + uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo; + uint32_t sdmax_rlcx_rb_aql_cntl; + uint32_t sdmax_rlcx_minor_ptr_update; + uint32_t sdmax_rlcx_midcmd_data0; + uint32_t sdmax_rlcx_midcmd_data1; + uint32_t sdmax_rlcx_midcmd_data2; + uint32_t sdmax_rlcx_midcmd_data3; + uint32_t sdmax_rlcx_midcmd_data4; + uint32_t sdmax_rlcx_midcmd_data5; + uint32_t sdmax_rlcx_midcmd_data6; + uint32_t sdmax_rlcx_midcmd_data7; + uint32_t sdmax_rlcx_midcmd_data8; + uint32_t sdmax_rlcx_midcmd_cntl; uint32_t reserved_42; uint32_t reserved_43; uint32_t reserved_44; diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index 831b733..036dff8 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -799,7 +799,7 @@ static int ast_get_modes(struct drm_connector *connector) return 0; } -static int ast_mode_valid(struct drm_connector *connector, +static enum drm_mode_status ast_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct ast_private *ast = connector->dev->dev_private; diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h index ab32d5b..60c937f 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.h @@ -299,7 +299,6 @@ struct atmel_hlcdc_layer { struct atmel_hlcdc_plane { struct drm_plane base; struct atmel_hlcdc_layer layer; - struct atmel_hlcdc_plane_properties *properties; }; static inline struct atmel_hlcdc_plane * @@ -346,18 +345,6 @@ struct atmel_hlcdc_dc_desc { }; /** - * Atmel HLCDC Plane properties. - * - * This structure stores plane property definitions. - * - * @alpha: alpha blending (or transparency) property - * @rotation: rotation property - */ -struct atmel_hlcdc_plane_properties { - struct drm_property *alpha; -}; - -/** * Atmel HLCDC Display Controller. * * @desc: HLCDC Display Controller description diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c index e18800e..73c875d 100644 --- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c +++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c @@ -31,7 +31,6 @@ * @src_y: y buffer position * @src_w: buffer width * @src_h: buffer height - * @alpha: alpha blending of the plane * @disc_x: x discard position * @disc_y: y discard position * @disc_w: discard width @@ -54,8 +53,6 @@ struct atmel_hlcdc_plane_state { uint32_t src_w; uint32_t src_h; - u8 alpha; - int disc_x; int disc_y; int disc_w; @@ -385,7 +382,7 @@ atmel_hlcdc_plane_update_general_settings(struct atmel_hlcdc_plane *plane, cfg |= ATMEL_HLCDC_LAYER_LAEN; else cfg |= ATMEL_HLCDC_LAYER_GAEN | - ATMEL_HLCDC_LAYER_GA(state->alpha); + ATMEL_HLCDC_LAYER_GA(state->base.alpha >> 8); } if (state->disc_h && state->disc_w) @@ -553,7 +550,7 @@ atmel_hlcdc_plane_prepare_disc_area(struct drm_crtc_state *c_state) if (!ovl_s->fb || ovl_s->fb->format->has_alpha || - ovl_state->alpha != 255) + ovl_s->alpha != DRM_BLEND_ALPHA_OPAQUE) continue; /* TODO: implement a smarter hidden area detection */ @@ -829,51 +826,18 @@ static void atmel_hlcdc_plane_destroy(struct drm_plane *p) drm_plane_cleanup(p); } -static int atmel_hlcdc_plane_atomic_set_property(struct drm_plane *p, - struct drm_plane_state *s, - struct drm_property *property, - uint64_t val) -{ - struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p); - struct atmel_hlcdc_plane_properties *props = plane->properties; - struct atmel_hlcdc_plane_state *state = - drm_plane_state_to_atmel_hlcdc_plane_state(s); - - if (property == props->alpha) - state->alpha = val; - else - return -EINVAL; - - return 0; -} - -static int atmel_hlcdc_plane_atomic_get_property(struct drm_plane *p, - const struct drm_plane_state *s, - struct drm_property *property, - uint64_t *val) -{ - struct atmel_hlcdc_plane *plane = drm_plane_to_atmel_hlcdc_plane(p); - struct atmel_hlcdc_plane_properties *props = plane->properties; - const struct atmel_hlcdc_plane_state *state = - container_of(s, const struct atmel_hlcdc_plane_state, base); - - if (property == props->alpha) - *val = state->alpha; - else - return -EINVAL; - - return 0; -} - -static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane, - struct atmel_hlcdc_plane_properties *props) +static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane) { const struct atmel_hlcdc_layer_desc *desc = plane->layer.desc; if (desc->type == ATMEL_HLCDC_OVERLAY_LAYER || - desc->type == ATMEL_HLCDC_CURSOR_LAYER) - drm_object_attach_property(&plane->base.base, - props->alpha, 255); + desc->type == ATMEL_HLCDC_CURSOR_LAYER) { + int ret; + + ret = drm_plane_create_alpha_property(&plane->base); + if (ret) + return ret; + } if (desc->layout.xstride && desc->layout.pstride) { int ret; @@ -988,8 +952,8 @@ static void atmel_hlcdc_plane_reset(struct drm_plane *p) return; } - state->alpha = 255; p->state = &state->base; + p->state->alpha = DRM_BLEND_ALPHA_OPAQUE; p->state->plane = p; } } @@ -1042,13 +1006,10 @@ static const struct drm_plane_funcs layer_plane_funcs = { .reset = atmel_hlcdc_plane_reset, .atomic_duplicate_state = atmel_hlcdc_plane_atomic_duplicate_state, .atomic_destroy_state = atmel_hlcdc_plane_atomic_destroy_state, - .atomic_set_property = atmel_hlcdc_plane_atomic_set_property, - .atomic_get_property = atmel_hlcdc_plane_atomic_get_property, }; static int atmel_hlcdc_plane_create(struct drm_device *dev, - const struct atmel_hlcdc_layer_desc *desc, - struct atmel_hlcdc_plane_properties *props) + const struct atmel_hlcdc_layer_desc *desc) { struct atmel_hlcdc_dc *dc = dev->dev_private; struct atmel_hlcdc_plane *plane; @@ -1060,7 +1021,6 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, return -ENOMEM; atmel_hlcdc_layer_init(&plane->layer, desc, dc->hlcdc->regmap); - plane->properties = props; if (desc->type == ATMEL_HLCDC_BASE_LAYER) type = DRM_PLANE_TYPE_PRIMARY; @@ -1081,7 +1041,7 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, &atmel_hlcdc_layer_plane_helper_funcs); /* Set default property values*/ - ret = atmel_hlcdc_plane_init_properties(plane, props); + ret = atmel_hlcdc_plane_init_properties(plane); if (ret) return ret; @@ -1090,34 +1050,13 @@ static int atmel_hlcdc_plane_create(struct drm_device *dev, return 0; } -static struct atmel_hlcdc_plane_properties * -atmel_hlcdc_plane_create_properties(struct drm_device *dev) -{ - struct atmel_hlcdc_plane_properties *props; - - props = devm_kzalloc(dev->dev, sizeof(*props), GFP_KERNEL); - if (!props) - return ERR_PTR(-ENOMEM); - - props->alpha = drm_property_create_range(dev, 0, "alpha", 0, 255); - if (!props->alpha) - return ERR_PTR(-ENOMEM); - - return props; -} - int atmel_hlcdc_create_planes(struct drm_device *dev) { struct atmel_hlcdc_dc *dc = dev->dev_private; - struct atmel_hlcdc_plane_properties *props; const struct atmel_hlcdc_layer_desc *descs = dc->desc->layers; int nlayers = dc->desc->nlayers; int i, ret; - props = atmel_hlcdc_plane_create_properties(dev); - if (IS_ERR(props)) - return PTR_ERR(props); - dc->dscrpool = dmam_pool_create("atmel-hlcdc-dscr", dev->dev, sizeof(struct atmel_hlcdc_dma_channel_dscr), sizeof(u64), 0); @@ -1130,7 +1069,7 @@ int atmel_hlcdc_create_planes(struct drm_device *dev) descs[i].type != ATMEL_HLCDC_CURSOR_LAYER) continue; - ret = atmel_hlcdc_plane_create(dev, &descs[i], props); + ret = atmel_hlcdc_plane_create(dev, &descs[i]); if (ret) return ret; } diff --git a/drivers/gpu/drm/bochs/bochs_kms.c b/drivers/gpu/drm/bochs/bochs_kms.c index a24a18f..233980a 100644 --- a/drivers/gpu/drm/bochs/bochs_kms.c +++ b/drivers/gpu/drm/bochs/bochs_kms.c @@ -188,7 +188,7 @@ static int bochs_connector_get_modes(struct drm_connector *connector) return count; } -static int bochs_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status bochs_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct bochs_device *bochs = diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 3aa65bd..1d75d3a 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -25,6 +25,16 @@ config DRM_ANALOGIX_ANX78XX the HDMI output of an application processor to MyDP or DisplayPort. +config DRM_CDNS_DSI + tristate "Cadence DPI/DSI bridge" + select DRM_KMS_HELPER + select DRM_MIPI_DSI + select DRM_PANEL_BRIDGE + depends on OF + help + Support Cadence DPI to DSI bridge. This is an internal + bridge and is meant to be directly embedded in a SoC. + config DRM_DUMB_VGA_DAC tristate "Dumb VGA DAC Bridge support" depends on OF @@ -93,6 +103,12 @@ config DRM_SII9234 It is an I2C driver, that detects connection of MHL bridge and starts encapsulation of HDMI signal. +config DRM_THINE_THC63LVD1024 + tristate "Thine THC63LVD1024 LVDS decoder bridge" + depends on OF + ---help--- + Thine THC63LVD1024 LVDS/parallel converter driver. + config DRM_TOSHIBA_TC358767 tristate "Toshiba TC358767 eDP bridge" depends on OF diff --git a/drivers/gpu/drm/bridge/Makefile b/drivers/gpu/drm/bridge/Makefile index 373eb28..35f88d4 100644 --- a/drivers/gpu/drm/bridge/Makefile +++ b/drivers/gpu/drm/bridge/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DRM_ANALOGIX_ANX78XX) += analogix-anx78xx.o +obj-$(CONFIG_DRM_CDNS_DSI) += cdns-dsi.o obj-$(CONFIG_DRM_DUMB_VGA_DAC) += dumb-vga-dac.o obj-$(CONFIG_DRM_LVDS_ENCODER) += lvds-encoder.o obj-$(CONFIG_DRM_MEGACHIPS_STDPXXXX_GE_B850V3_FW) += megachips-stdpxxxx-ge-b850v3-fw.o @@ -8,6 +9,7 @@ obj-$(CONFIG_DRM_PARADE_PS8622) += parade-ps8622.o obj-$(CONFIG_DRM_SIL_SII8620) += sil-sii8620.o obj-$(CONFIG_DRM_SII902X) += sii902x.o obj-$(CONFIG_DRM_SII9234) += sii9234.o +obj-$(CONFIG_DRM_THINE_THC63LVD1024) += thc63lvd1024.o obj-$(CONFIG_DRM_TOSHIBA_TC358767) += tc358767.o obj-$(CONFIG_DRM_ANALOGIX_DP) += analogix/ obj-$(CONFIG_DRM_I2C_ADV7511) += adv7511/ diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig index 592b9d2..944e440 100644 --- a/drivers/gpu/drm/bridge/adv7511/Kconfig +++ b/drivers/gpu/drm/bridge/adv7511/Kconfig @@ -1,5 +1,5 @@ config DRM_I2C_ADV7511 - tristate "AV7511 encoder" + tristate "ADV7511 encoder" depends on OF select DRM_KMS_HELPER select REGMAP_I2C diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index d034b2c..73d8ccb 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -93,6 +93,11 @@ #define ADV7511_REG_CHIP_ID_HIGH 0xf5 #define ADV7511_REG_CHIP_ID_LOW 0xf6 +/* Hardware defined default addresses for I2C register maps */ +#define ADV7511_CEC_I2C_ADDR_DEFAULT 0x3c +#define ADV7511_EDID_I2C_ADDR_DEFAULT 0x3f +#define ADV7511_PACKET_I2C_ADDR_DEFAULT 0x38 + #define ADV7511_CSC_ENABLE BIT(7) #define ADV7511_CSC_UPDATE_MODE BIT(5) @@ -321,6 +326,7 @@ enum adv7511_type { struct adv7511 { struct i2c_client *i2c_main; struct i2c_client *i2c_edid; + struct i2c_client *i2c_packet; struct i2c_client *i2c_cec; struct regmap *regmap; diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index efa29db..73021b3 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -586,7 +586,7 @@ static int adv7511_get_modes(struct adv7511 *adv7511, /* Reading the EDID only works if the device is powered */ if (!adv7511->powered) { unsigned int edid_i2c_addr = - (adv7511->i2c_main->addr << 1) + 4; + (adv7511->i2c_edid->addr << 1); __adv7511_power_on(adv7511); @@ -654,7 +654,7 @@ adv7511_detect(struct adv7511 *adv7511, struct drm_connector *connector) return status; } -static int adv7511_mode_valid(struct adv7511 *adv7511, +static enum drm_mode_status adv7511_mode_valid(struct adv7511 *adv7511, struct drm_display_mode *mode) { if (mode->clock > 165000) @@ -969,10 +969,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv) { int ret; - adv->i2c_cec = i2c_new_dummy(adv->i2c_main->adapter, - adv->i2c_main->addr - 1); + adv->i2c_cec = i2c_new_secondary_device(adv->i2c_main, "cec", + ADV7511_CEC_I2C_ADDR_DEFAULT); if (!adv->i2c_cec) - return -ENOMEM; + return -EINVAL; i2c_set_clientdata(adv->i2c_cec, adv); adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec, @@ -1082,8 +1082,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) struct adv7511_link_config link_config; struct adv7511 *adv7511; struct device *dev = &i2c->dev; - unsigned int main_i2c_addr = i2c->addr << 1; - unsigned int edid_i2c_addr = main_i2c_addr + 4; unsigned int val; int ret; @@ -1129,7 +1127,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) } if (adv7511->gpio_pd) { - mdelay(5); + usleep_range(5000, 6000); gpiod_set_value_cansleep(adv7511->gpio_pd, 0); } @@ -1153,23 +1151,34 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (ret) goto uninit_regulators; - regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, edid_i2c_addr); - regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, - main_i2c_addr - 0xa); - regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR, - main_i2c_addr - 2); - adv7511_packet_disable(adv7511, 0xffff); - adv7511->i2c_edid = i2c_new_dummy(i2c->adapter, edid_i2c_addr >> 1); + adv7511->i2c_edid = i2c_new_secondary_device(i2c, "edid", + ADV7511_EDID_I2C_ADDR_DEFAULT); if (!adv7511->i2c_edid) { - ret = -ENOMEM; + ret = -EINVAL; goto uninit_regulators; } + regmap_write(adv7511->regmap, ADV7511_REG_EDID_I2C_ADDR, + adv7511->i2c_edid->addr << 1); + + adv7511->i2c_packet = i2c_new_secondary_device(i2c, "packet", + ADV7511_PACKET_I2C_ADDR_DEFAULT); + if (!adv7511->i2c_packet) { + ret = -EINVAL; + goto err_i2c_unregister_edid; + } + + regmap_write(adv7511->regmap, ADV7511_REG_PACKET_I2C_ADDR, + adv7511->i2c_packet->addr << 1); + ret = adv7511_init_cec_regmap(adv7511); if (ret) - goto err_i2c_unregister_edid; + goto err_i2c_unregister_packet; + + regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR, + adv7511->i2c_cec->addr << 1); INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work); @@ -1207,6 +1216,8 @@ err_unregister_cec: i2c_unregister_device(adv7511->i2c_cec); if (adv7511->cec_clk) clk_disable_unprepare(adv7511->cec_clk); +err_i2c_unregister_packet: + i2c_unregister_device(adv7511->i2c_packet); err_i2c_unregister_edid: i2c_unregister_device(adv7511->i2c_edid); uninit_regulators: @@ -1233,6 +1244,7 @@ static int adv7511_remove(struct i2c_client *i2c) cec_unregister_adapter(adv7511->cec_adap); + i2c_unregister_device(adv7511->i2c_packet); i2c_unregister_device(adv7511->i2c_edid); return 0; diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c index 5c52307..2bcbfad 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c @@ -43,8 +43,10 @@ struct bridge_init { struct device_node *node; }; -static void analogix_dp_init_dp(struct analogix_dp_device *dp) +static int analogix_dp_init_dp(struct analogix_dp_device *dp) { + int ret; + analogix_dp_reset(dp); analogix_dp_swreset(dp); @@ -56,10 +58,13 @@ static void analogix_dp_init_dp(struct analogix_dp_device *dp) analogix_dp_enable_sw_function(dp); analogix_dp_config_interrupt(dp); - analogix_dp_init_analog_func(dp); + ret = analogix_dp_init_analog_func(dp); + if (ret) + return ret; analogix_dp_init_hpd(dp); analogix_dp_init_aux(dp); + return 0; } static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) @@ -71,7 +76,7 @@ static int analogix_dp_detect_hpd(struct analogix_dp_device *dp) return 0; timeout_loop++; - usleep_range(10, 11); + usleep_range(1000, 1100); } /* @@ -148,87 +153,146 @@ int analogix_dp_disable_psr(struct analogix_dp_device *dp) psr_vsc.DB1 = 0; ret = drm_dp_dpcd_writeb(&dp->aux, DP_SET_POWER, DP_SET_POWER_D0); - if (ret != 1) + if (ret != 1) { dev_err(dp->dev, "Failed to set DP Power0 %d\n", ret); + return ret; + } return analogix_dp_send_psr_spd(dp, &psr_vsc, false); } EXPORT_SYMBOL_GPL(analogix_dp_disable_psr); -static bool analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) +static int analogix_dp_detect_sink_psr(struct analogix_dp_device *dp) { unsigned char psr_version; + int ret; + + ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); + if (ret != 1) { + dev_err(dp->dev, "failed to get PSR version, disable it\n"); + return ret; + } - drm_dp_dpcd_readb(&dp->aux, DP_PSR_SUPPORT, &psr_version); dev_dbg(dp->dev, "Panel PSR version : %x\n", psr_version); - return (psr_version & DP_PSR_IS_SUPPORTED) ? true : false; + dp->psr_enable = (psr_version & DP_PSR_IS_SUPPORTED) ? true : false; + + return 0; } -static void analogix_dp_enable_sink_psr(struct analogix_dp_device *dp) +static int analogix_dp_enable_sink_psr(struct analogix_dp_device *dp) { unsigned char psr_en; + int ret; /* Disable psr function */ - drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en); + ret = drm_dp_dpcd_readb(&dp->aux, DP_PSR_EN_CFG, &psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to get psr config\n"); + goto end; + } + psr_en &= ~DP_PSR_ENABLE; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to disable panel psr\n"); + goto end; + } /* Main-Link transmitter remains active during PSR active states */ psr_en = DP_PSR_MAIN_LINK_ACTIVE | DP_PSR_CRC_VERIFICATION; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to set panel psr\n"); + goto end; + } /* Enable psr function */ psr_en = DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE | DP_PSR_CRC_VERIFICATION; - drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_PSR_EN_CFG, psr_en); + if (ret != 1) { + dev_err(dp->dev, "failed to set panel psr\n"); + goto end; + } analogix_dp_enable_psr_crc(dp); + + return 0; +end: + dev_err(dp->dev, "enable psr fail, force to disable psr\n"); + dp->psr_enable = false; + + return ret; } -static void +static int analogix_dp_enable_rx_to_enhanced_mode(struct analogix_dp_device *dp, bool enable) { u8 data; + int ret; - drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data); + ret = drm_dp_dpcd_readb(&dp->aux, DP_LANE_COUNT_SET, &data); + if (ret != 1) + return ret; if (enable) - drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, - DP_LANE_COUNT_ENHANCED_FRAME_EN | - DPCD_LANE_COUNT_SET(data)); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, + DP_LANE_COUNT_ENHANCED_FRAME_EN | + DPCD_LANE_COUNT_SET(data)); else - drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, - DPCD_LANE_COUNT_SET(data)); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_LANE_COUNT_SET, + DPCD_LANE_COUNT_SET(data)); + + return ret < 0 ? ret : 0; } -static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp) +static int analogix_dp_is_enhanced_mode_available(struct analogix_dp_device *dp, + u8 *enhanced_mode_support) { u8 data; - int retval; + int ret; - drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); - retval = DPCD_ENHANCED_FRAME_CAP(data); + ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_LANE_COUNT, &data); + if (ret != 1) { + *enhanced_mode_support = 0; + return ret; + } - return retval; + *enhanced_mode_support = DPCD_ENHANCED_FRAME_CAP(data); + + return 0; } -static void analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) +static int analogix_dp_set_enhanced_mode(struct analogix_dp_device *dp) { u8 data; + int ret; + + ret = analogix_dp_is_enhanced_mode_available(dp, &data); + if (ret < 0) + return ret; + + ret = analogix_dp_enable_rx_to_enhanced_mode(dp, data); + if (ret < 0) + return ret; - data = analogix_dp_is_enhanced_mode_available(dp); - analogix_dp_enable_rx_to_enhanced_mode(dp, data); analogix_dp_enable_enhanced_mode(dp, data); + + return 0; } -static void analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) +static int analogix_dp_training_pattern_dis(struct analogix_dp_device *dp) { + int ret; + analogix_dp_set_training_pattern(dp, DP_NONE); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, - DP_TRAINING_PATTERN_DISABLE); + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + DP_TRAINING_PATTERN_DISABLE); + + return ret < 0 ? ret : 0; } static void @@ -276,6 +340,12 @@ static int analogix_dp_link_start(struct analogix_dp_device *dp) retval = drm_dp_dpcd_write(&dp->aux, DP_LINK_BW_SET, buf, 2); if (retval < 0) return retval; + /* set enhanced mode if available */ + retval = analogix_dp_set_enhanced_mode(dp); + if (retval < 0) { + dev_err(dp->dev, "failed to set enhance mode\n"); + return retval; + } /* Set TX pre-emphasis to minimum */ for (lane = 0; lane < lane_count; lane++) @@ -531,7 +601,7 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) { int lane, lane_count, retval; u32 reg; - u8 link_align, link_status[2], adjust_request[2], spread; + u8 link_align, link_status[2], adjust_request[2]; usleep_range(400, 401); @@ -560,10 +630,11 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) if (!analogix_dp_channel_eq_ok(link_status, link_align, lane_count)) { /* traing pattern Set to Normal */ - analogix_dp_training_pattern_dis(dp); + retval = analogix_dp_training_pattern_dis(dp); + if (retval < 0) + return retval; dev_info(dp->dev, "Link Training success!\n"); - analogix_dp_get_link_bandwidth(dp, ®); dp->link_train.link_rate = reg; dev_dbg(dp->dev, "final bandwidth = %.2x\n", @@ -574,22 +645,6 @@ static int analogix_dp_process_equalizer_training(struct analogix_dp_device *dp) dev_dbg(dp->dev, "final lane count = %.2x\n", dp->link_train.lane_count); - retval = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, - &spread); - if (retval != 1) { - dev_err(dp->dev, "failed to read downspread %d\n", - retval); - dp->fast_train_support = false; - } else { - dp->fast_train_support = - (spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING) ? - true : false; - } - dev_dbg(dp->dev, "fast link training %s\n", - dp->fast_train_support ? "supported" : "unsupported"); - - /* set enhanced mode if available */ - analogix_dp_set_enhanced_mode(dp); dp->link_train.lt_state = FINISHED; return 0; @@ -793,7 +848,7 @@ static int analogix_dp_fast_link_train(struct analogix_dp_device *dp) static int analogix_dp_train_link(struct analogix_dp_device *dp) { - if (dp->fast_train_support) + if (dp->fast_train_enable) return analogix_dp_fast_link_train(dp); return analogix_dp_full_link_train(dp, dp->video_info.max_lane_count, @@ -819,11 +874,10 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) if (analogix_dp_is_slave_video_stream_clock_on(dp) == 0) break; if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "Timeout of video streamclk ok\n"); + dev_err(dp->dev, "Timeout of slave video streamclk ok\n"); return -ETIMEDOUT; } - - usleep_range(1, 2); + usleep_range(1000, 1001); } /* Set to use the register calculated M/N video */ @@ -838,6 +892,9 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) /* Configure video slave mode */ analogix_dp_enable_video_master(dp, 0); + /* Enable video */ + analogix_dp_start_video(dp); + timeout_loop = 0; for (;;) { @@ -850,8 +907,9 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) done_count = 0; } if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "Timeout of video streamclk ok\n"); - return -ETIMEDOUT; + dev_warn(dp->dev, + "Ignoring timeout of video streamclk ok\n"); + break; } usleep_range(1000, 1001); @@ -860,24 +918,32 @@ static int analogix_dp_config_video(struct analogix_dp_device *dp) return 0; } -static void analogix_dp_enable_scramble(struct analogix_dp_device *dp, - bool enable) +static int analogix_dp_enable_scramble(struct analogix_dp_device *dp, + bool enable) { u8 data; + int ret; if (enable) { analogix_dp_enable_scrambling(dp); - drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, + &data); + if (ret != 1) + return ret; + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, (u8)(data & ~DP_LINK_SCRAMBLING_DISABLE)); } else { analogix_dp_disable_scrambling(dp); - drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, &data); - drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, + ret = drm_dp_dpcd_readb(&dp->aux, DP_TRAINING_PATTERN_SET, + &data); + if (ret != 1) + return ret; + ret = drm_dp_dpcd_writeb(&dp->aux, DP_TRAINING_PATTERN_SET, (u8)(data | DP_LINK_SCRAMBLING_DISABLE)); } + return ret < 0 ? ret : 0; } static irqreturn_t analogix_dp_hardirq(int irq, void *arg) @@ -916,7 +982,23 @@ static irqreturn_t analogix_dp_irq_thread(int irq, void *arg) return IRQ_HANDLED; } -static void analogix_dp_commit(struct analogix_dp_device *dp) +static int analogix_dp_fast_link_train_detection(struct analogix_dp_device *dp) +{ + int ret; + u8 spread; + + ret = drm_dp_dpcd_readb(&dp->aux, DP_MAX_DOWNSPREAD, &spread); + if (ret != 1) { + dev_err(dp->dev, "failed to read downspread %d\n", ret); + return ret; + } + dp->fast_train_enable = !!(spread & DP_NO_AUX_HANDSHAKE_LINK_TRAINING); + dev_dbg(dp->dev, "fast link training %s\n", + dp->fast_train_enable ? "supported" : "unsupported"); + return 0; +} + +static int analogix_dp_commit(struct analogix_dp_device *dp) { int ret; @@ -926,34 +1008,50 @@ static void analogix_dp_commit(struct analogix_dp_device *dp) DRM_ERROR("failed to disable the panel\n"); } - ret = readx_poll_timeout(analogix_dp_train_link, dp, ret, !ret, 100, - DP_TIMEOUT_TRAINING_US * 5); + ret = analogix_dp_train_link(dp); if (ret) { dev_err(dp->dev, "unable to do link train, ret=%d\n", ret); - return; + return ret; } - analogix_dp_enable_scramble(dp, 1); - analogix_dp_enable_rx_to_enhanced_mode(dp, 1); - analogix_dp_enable_enhanced_mode(dp, 1); + ret = analogix_dp_enable_scramble(dp, 1); + if (ret < 0) { + dev_err(dp->dev, "can not enable scramble\n"); + return ret; + } analogix_dp_init_video(dp); ret = analogix_dp_config_video(dp); - if (ret) + if (ret) { dev_err(dp->dev, "unable to config video\n"); + return ret; + } /* Safe to enable the panel now */ if (dp->plat_data->panel) { - if (drm_panel_enable(dp->plat_data->panel)) + ret = drm_panel_enable(dp->plat_data->panel); + if (ret) { DRM_ERROR("failed to enable the panel\n"); + return ret; + } } - /* Enable video */ - analogix_dp_start_video(dp); + ret = analogix_dp_detect_sink_psr(dp); + if (ret) + return ret; - dp->psr_enable = analogix_dp_detect_sink_psr(dp); - if (dp->psr_enable) - analogix_dp_enable_sink_psr(dp); + if (dp->psr_enable) { + ret = analogix_dp_enable_sink_psr(dp); + if (ret) + return ret; + } + + /* Check whether panel supports fast training */ + ret = analogix_dp_fast_link_train_detection(dp); + if (ret) + dp->psr_enable = false; + + return ret; } /* @@ -1150,24 +1248,80 @@ static void analogix_dp_bridge_pre_enable(struct drm_bridge *bridge) DRM_ERROR("failed to setup the panel ret = %d\n", ret); } -static void analogix_dp_bridge_enable(struct drm_bridge *bridge) +static int analogix_dp_set_bridge(struct analogix_dp_device *dp) { - struct analogix_dp_device *dp = bridge->driver_private; - - if (dp->dpms_mode == DRM_MODE_DPMS_ON) - return; + int ret; pm_runtime_get_sync(dp->dev); - if (dp->plat_data->power_on) - dp->plat_data->power_on(dp->plat_data); + ret = clk_prepare_enable(dp->clock); + if (ret < 0) { + DRM_ERROR("Failed to prepare_enable the clock clk [%d]\n", ret); + goto out_dp_clk_pre; + } + + if (dp->plat_data->power_on_start) + dp->plat_data->power_on_start(dp->plat_data); phy_power_on(dp->phy); - analogix_dp_init_dp(dp); + + ret = analogix_dp_init_dp(dp); + if (ret) + goto out_dp_init; + + /* + * According to DP spec v1.3 chap 3.5.1.2 Link Training, + * We should first make sure the HPD signal is asserted high by device + * when we want to establish a link with it. + */ + ret = analogix_dp_detect_hpd(dp); + if (ret) { + DRM_ERROR("failed to get hpd single ret = %d\n", ret); + goto out_dp_init; + } + + ret = analogix_dp_commit(dp); + if (ret) { + DRM_ERROR("dp commit error, ret = %d\n", ret); + goto out_dp_init; + } + + if (dp->plat_data->power_on_end) + dp->plat_data->power_on_end(dp->plat_data); + enable_irq(dp->irq); - analogix_dp_commit(dp); + return 0; - dp->dpms_mode = DRM_MODE_DPMS_ON; +out_dp_init: + phy_power_off(dp->phy); + if (dp->plat_data->power_off) + dp->plat_data->power_off(dp->plat_data); + clk_disable_unprepare(dp->clock); +out_dp_clk_pre: + pm_runtime_put_sync(dp->dev); + + return ret; +} + +static void analogix_dp_bridge_enable(struct drm_bridge *bridge) +{ + struct analogix_dp_device *dp = bridge->driver_private; + int timeout_loop = 0; + + if (dp->dpms_mode == DRM_MODE_DPMS_ON) + return; + + while (timeout_loop < MAX_PLL_LOCK_LOOP) { + if (analogix_dp_set_bridge(dp) == 0) { + dp->dpms_mode = DRM_MODE_DPMS_ON; + return; + } + dev_err(dp->dev, "failed to set bridge, retry: %d\n", + timeout_loop); + timeout_loop++; + usleep_range(10, 11); + } + dev_err(dp->dev, "too many times retry set bridge, give it up\n"); } static void analogix_dp_bridge_disable(struct drm_bridge *bridge) @@ -1186,11 +1340,15 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) } disable_irq(dp->irq); - phy_power_off(dp->phy); if (dp->plat_data->power_off) dp->plat_data->power_off(dp->plat_data); + analogix_dp_set_analog_power_down(dp, POWER_ALL, 1); + phy_power_off(dp->phy); + + clk_disable_unprepare(dp->clock); + pm_runtime_put_sync(dp->dev); ret = analogix_dp_prepare_panel(dp, false, true); @@ -1198,6 +1356,7 @@ static void analogix_dp_bridge_disable(struct drm_bridge *bridge) DRM_ERROR("failed to setup the panel ret = %d\n", ret); dp->psr_enable = false; + dp->fast_train_enable = false; dp->dpms_mode = DRM_MODE_DPMS_OFF; } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h index 6a96ef7..769255d 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.h @@ -19,6 +19,7 @@ #define DP_TIMEOUT_LOOP_COUNT 100 #define MAX_CR_LOOP 5 #define MAX_EQ_LOOP 5 +#define MAX_PLL_LOCK_LOOP 5 /* Training takes 22ms if AUX channel comm fails. Use this as retry interval */ #define DP_TIMEOUT_TRAINING_US 22000 @@ -173,7 +174,7 @@ struct analogix_dp_device { int hpd_gpio; bool force_hpd; bool psr_enable; - bool fast_train_support; + bool fast_train_enable; struct mutex panel_lock; bool panel_is_modeset; @@ -197,7 +198,7 @@ void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable); void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, enum analog_power_block block, bool enable); -void analogix_dp_init_analog_func(struct analogix_dp_device *dp); +int analogix_dp_init_analog_func(struct analogix_dp_device *dp); void analogix_dp_init_hpd(struct analogix_dp_device *dp); void analogix_dp_force_hpd(struct analogix_dp_device *dp); enum dp_irq_type analogix_dp_get_irq_type(struct analogix_dp_device *dp); diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c index 9df2f3e..a5f2763 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.c @@ -126,9 +126,14 @@ void analogix_dp_reset(struct analogix_dp_device *dp) analogix_dp_stop_video(dp); analogix_dp_enable_video_mute(dp, 0); - reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N | - AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | - HDCP_FUNC_EN_N | SW_FUNC_EN_N; + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + reg = RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N | + SW_FUNC_EN_N; + else + reg = MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N | + AUD_FIFO_FUNC_EN_N | AUD_FUNC_EN_N | + HDCP_FUNC_EN_N | SW_FUNC_EN_N; + writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); reg = SSC_FUNC_EN_N | AUX_FUNC_EN_N | @@ -230,16 +235,20 @@ enum pll_status analogix_dp_get_pll_lock_status(struct analogix_dp_device *dp) void analogix_dp_set_pll_power_down(struct analogix_dp_device *dp, bool enable) { u32 reg; + u32 mask = DP_PLL_PD; + u32 pd_addr = ANALOGIX_DP_PLL_CTL; - if (enable) { - reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL); - reg |= DP_PLL_PD; - writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL); - } else { - reg = readl(dp->reg_base + ANALOGIX_DP_PLL_CTL); - reg &= ~DP_PLL_PD; - writel(reg, dp->reg_base + ANALOGIX_DP_PLL_CTL); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + pd_addr = ANALOGIX_DP_PD; + mask = RK_PLL_PD; } + + reg = readl(dp->reg_base + pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + pd_addr); } void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, @@ -248,83 +257,98 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, { u32 reg; u32 phy_pd_addr = ANALOGIX_DP_PHY_PD; + u32 mask; if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) phy_pd_addr = ANALOGIX_DP_PD; switch (block) { case AUX_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= AUX_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~AUX_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + mask = RK_AUX_PD; + else + mask = AUX_PD; + + reg = readl(dp->reg_base + phy_pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH0_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH0_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH0_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH0_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH1_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH1_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH1_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH1_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH2_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH2_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH2_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH2_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case CH3_BLOCK: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= CH3_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~CH3_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + mask = CH3_PD; + reg = readl(dp->reg_base + phy_pd_addr); + + if (enable) + reg |= mask; + else + reg &= ~mask; + writel(reg, dp->reg_base + phy_pd_addr); break; case ANALOG_TOTAL: - if (enable) { - reg = readl(dp->reg_base + phy_pd_addr); - reg |= DP_PHY_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } else { - reg = readl(dp->reg_base + phy_pd_addr); - reg &= ~DP_PHY_PD; - writel(reg, dp->reg_base + phy_pd_addr); - } + /* + * There is no bit named DP_PHY_PD, so We used DP_INC_BG + * to power off everything instead of DP_PHY_PD in + * Rockchip + */ + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + mask = DP_INC_BG; + else + mask = DP_PHY_PD; + + reg = readl(dp->reg_base + phy_pd_addr); + if (enable) + reg |= mask; + else + reg &= ~mask; + + writel(reg, dp->reg_base + phy_pd_addr); + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) + usleep_range(10, 15); break; case POWER_ALL: if (enable) { - reg = DP_PHY_PD | AUX_PD | CH3_PD | CH2_PD | - CH1_PD | CH0_PD; + reg = DP_ALL_PD; writel(reg, dp->reg_base + phy_pd_addr); } else { + reg = DP_ALL_PD; + writel(reg, dp->reg_base + phy_pd_addr); + usleep_range(10, 15); + reg &= ~DP_INC_BG; + writel(reg, dp->reg_base + phy_pd_addr); + usleep_range(10, 15); + writel(0x00, dp->reg_base + phy_pd_addr); } break; @@ -333,7 +357,7 @@ void analogix_dp_set_analog_power_down(struct analogix_dp_device *dp, } } -void analogix_dp_init_analog_func(struct analogix_dp_device *dp) +int analogix_dp_init_analog_func(struct analogix_dp_device *dp) { u32 reg; int timeout_loop = 0; @@ -355,7 +379,7 @@ void analogix_dp_init_analog_func(struct analogix_dp_device *dp) timeout_loop++; if (DP_TIMEOUT_LOOP_COUNT < timeout_loop) { dev_err(dp->dev, "failed to get pll lock status\n"); - return; + return -ETIMEDOUT; } usleep_range(10, 20); } @@ -366,6 +390,7 @@ void analogix_dp_init_analog_func(struct analogix_dp_device *dp) reg &= ~(SERDES_FIFO_FUNC_EN_N | LS_CLK_DOMAIN_FUNC_EN_N | AUX_FUNC_EN_N); writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_2); + return 0; } void analogix_dp_clear_hotplug_interrupts(struct analogix_dp_device *dp) @@ -450,17 +475,22 @@ void analogix_dp_init_aux(struct analogix_dp_device *dp) reg = RPLY_RECEIV | AUX_ERR; writel(reg, dp->reg_base + ANALOGIX_DP_INT_STA); + analogix_dp_set_analog_power_down(dp, AUX_BLOCK, true); + usleep_range(10, 11); + analogix_dp_set_analog_power_down(dp, AUX_BLOCK, false); + analogix_dp_reset_aux(dp); - /* Disable AUX transaction H/W retry */ + /* AUX_BIT_PERIOD_EXPECTED_DELAY doesn't apply to Rockchip IP */ if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) - reg = AUX_BIT_PERIOD_EXPECTED_DELAY(0) | - AUX_HW_RETRY_COUNT_SEL(3) | - AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + reg = 0; else - reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3) | - AUX_HW_RETRY_COUNT_SEL(0) | - AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + reg = AUX_BIT_PERIOD_EXPECTED_DELAY(3); + + /* Disable AUX transaction H/W retry */ + reg |= AUX_HW_RETRY_COUNT_SEL(0) | + AUX_HW_RETRY_INTERVAL_600_MICROSECONDS; + writel(reg, dp->reg_base + ANALOGIX_DP_AUX_HW_RETRY_CTL); /* Receive AUX Channel DEFER commands equal to DEFFER_COUNT*64 */ @@ -947,8 +977,12 @@ void analogix_dp_config_video_slave_mode(struct analogix_dp_device *dp) u32 reg; reg = readl(dp->reg_base + ANALOGIX_DP_FUNC_EN_1); - reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); - reg |= MASTER_VID_FUNC_EN_N; + if (dp->plat_data && is_rockchip(dp->plat_data->dev_type)) { + reg &= ~(RK_VID_CAP_FUNC_EN_N | RK_VID_FIFO_FUNC_EN_N); + } else { + reg &= ~(MASTER_VID_FUNC_EN_N | SLAVE_VID_FUNC_EN_N); + reg |= MASTER_VID_FUNC_EN_N; + } writel(reg, dp->reg_base + ANALOGIX_DP_FUNC_EN_1); reg = readl(dp->reg_base + ANALOGIX_DP_VIDEO_CTL_10); @@ -1072,10 +1106,11 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, struct drm_dp_aux_msg *msg) { u32 reg; + u32 status_reg; u8 *buffer = msg->buffer; - int timeout_loop = 0; unsigned int i; int num_transferred = 0; + int ret; /* Buffer size of AUX CH is 16 bytes */ if (WARN_ON(msg->size > 16)) @@ -1139,17 +1174,20 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, writel(reg, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2); - /* Is AUX CH command reply received? */ + ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_AUX_CH_CTL_2, + reg, !(reg & AUX_EN), 25, 500 * 1000); + if (ret) { + dev_err(dp->dev, "AUX CH enable timeout!\n"); + goto aux_error; + } + /* TODO: Wait for an interrupt instead of looping? */ - reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - while (!(reg & RPLY_RECEIV)) { - timeout_loop++; - if (timeout_loop > DP_TIMEOUT_LOOP_COUNT) { - dev_err(dp->dev, "AUX CH command reply failed!\n"); - return -ETIMEDOUT; - } - reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - usleep_range(10, 11); + /* Is AUX CH command reply received? */ + ret = readx_poll_timeout(readl, dp->reg_base + ANALOGIX_DP_INT_STA, + reg, reg & RPLY_RECEIV, 10, 20 * 1000); + if (ret) { + dev_err(dp->dev, "AUX CH cmd reply timeout!\n"); + goto aux_error; } /* Clear interrupt source for AUX CH command reply */ @@ -1157,17 +1195,13 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, /* Clear interrupt source for AUX CH access error */ reg = readl(dp->reg_base + ANALOGIX_DP_INT_STA); - if (reg & AUX_ERR) { + status_reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); + if ((reg & AUX_ERR) || (status_reg & AUX_STATUS_MASK)) { writel(AUX_ERR, dp->reg_base + ANALOGIX_DP_INT_STA); - return -EREMOTEIO; - } - /* Check AUX CH error access status */ - reg = readl(dp->reg_base + ANALOGIX_DP_AUX_CH_STA); - if ((reg & AUX_STATUS_MASK)) { - dev_err(dp->dev, "AUX CH error happened: %d\n\n", - reg & AUX_STATUS_MASK); - return -EREMOTEIO; + dev_warn(dp->dev, "AUX CH error happened: %#x (%d)\n", + status_reg & AUX_STATUS_MASK, !!(reg & AUX_ERR)); + goto aux_error; } if (msg->request & DP_AUX_I2C_READ) { @@ -1193,4 +1227,10 @@ ssize_t analogix_dp_transfer(struct analogix_dp_device *dp, msg->reply = DP_AUX_NATIVE_REPLY_ACK; return num_transferred > 0 ? num_transferred : -EBUSY; + +aux_error: + /* if aux err happen, reset aux */ + analogix_dp_init_aux(dp); + + return -EREMOTEIO; } diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h index 40200c6..0cf27c7 100644 --- a/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h +++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_reg.h @@ -127,7 +127,9 @@ /* ANALOGIX_DP_FUNC_EN_1 */ #define MASTER_VID_FUNC_EN_N (0x1 << 7) +#define RK_VID_CAP_FUNC_EN_N (0x1 << 6) #define SLAVE_VID_FUNC_EN_N (0x1 << 5) +#define RK_VID_FIFO_FUNC_EN_N (0x1 << 5) #define AUD_FIFO_FUNC_EN_N (0x1 << 4) #define AUD_FUNC_EN_N (0x1 << 3) #define HDCP_FUNC_EN_N (0x1 << 2) @@ -342,12 +344,17 @@ #define DP_PLL_REF_BIT_1_2500V (0x7 << 0) /* ANALOGIX_DP_PHY_PD */ +#define DP_INC_BG (0x1 << 7) +#define DP_EXP_BG (0x1 << 6) #define DP_PHY_PD (0x1 << 5) +#define RK_AUX_PD (0x1 << 5) #define AUX_PD (0x1 << 4) +#define RK_PLL_PD (0x1 << 4) #define CH3_PD (0x1 << 3) #define CH2_PD (0x1 << 2) #define CH1_PD (0x1 << 1) #define CH0_PD (0x1 << 0) +#define DP_ALL_PD (0xff) /* ANALOGIX_DP_PHY_TEST */ #define MACRO_RST (0x1 << 5) diff --git a/drivers/gpu/drm/bridge/cdns-dsi.c b/drivers/gpu/drm/bridge/cdns-dsi.c new file mode 100644 index 0000000..c255fc3 --- /dev/null +++ b/drivers/gpu/drm/bridge/cdns-dsi.c @@ -0,0 +1,1623 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright: 2017 Cadence Design Systems, Inc. + * + * Author: Boris Brezillon <boris.brezillon@bootlin.com> + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_bridge.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> +#include <video/mipi_display.h> + +#include <linux/clk.h> +#include <linux/iopoll.h> +#include <linux/module.h> +#include <linux/of_address.h> +#include <linux/of_graph.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/reset.h> + +#define IP_CONF 0x0 +#define SP_HS_FIFO_DEPTH(x) (((x) & GENMASK(30, 26)) >> 26) +#define SP_LP_FIFO_DEPTH(x) (((x) & GENMASK(25, 21)) >> 21) +#define VRS_FIFO_DEPTH(x) (((x) & GENMASK(20, 16)) >> 16) +#define DIRCMD_FIFO_DEPTH(x) (((x) & GENMASK(15, 13)) >> 13) +#define SDI_IFACE_32 BIT(12) +#define INTERNAL_DATAPATH_32 (0 << 10) +#define INTERNAL_DATAPATH_16 (1 << 10) +#define INTERNAL_DATAPATH_8 (3 << 10) +#define INTERNAL_DATAPATH_SIZE ((x) & GENMASK(11, 10)) +#define NUM_IFACE(x) ((((x) & GENMASK(9, 8)) >> 8) + 1) +#define MAX_LANE_NB(x) (((x) & GENMASK(7, 6)) >> 6) +#define RX_FIFO_DEPTH(x) ((x) & GENMASK(5, 0)) + +#define MCTL_MAIN_DATA_CTL 0x4 +#define TE_MIPI_POLLING_EN BIT(25) +#define TE_HW_POLLING_EN BIT(24) +#define DISP_EOT_GEN BIT(18) +#define HOST_EOT_GEN BIT(17) +#define DISP_GEN_CHECKSUM BIT(16) +#define DISP_GEN_ECC BIT(15) +#define BTA_EN BIT(14) +#define READ_EN BIT(13) +#define REG_TE_EN BIT(12) +#define IF_TE_EN(x) BIT(8 + (x)) +#define TVG_SEL BIT(6) +#define VID_EN BIT(5) +#define IF_VID_SELECT(x) ((x) << 2) +#define IF_VID_SELECT_MASK GENMASK(3, 2) +#define IF_VID_MODE BIT(1) +#define LINK_EN BIT(0) + +#define MCTL_MAIN_PHY_CTL 0x8 +#define HS_INVERT_DAT(x) BIT(19 + ((x) * 2)) +#define SWAP_PINS_DAT(x) BIT(18 + ((x) * 2)) +#define HS_INVERT_CLK BIT(17) +#define SWAP_PINS_CLK BIT(16) +#define HS_SKEWCAL_EN BIT(15) +#define WAIT_BURST_TIME(x) ((x) << 10) +#define DATA_ULPM_EN(x) BIT(6 + (x)) +#define CLK_ULPM_EN BIT(5) +#define CLK_CONTINUOUS BIT(4) +#define DATA_LANE_EN(x) BIT((x) - 1) + +#define MCTL_MAIN_EN 0xc +#define DATA_FORCE_STOP BIT(17) +#define CLK_FORCE_STOP BIT(16) +#define IF_EN(x) BIT(13 + (x)) +#define DATA_LANE_ULPM_REQ(l) BIT(9 + (l)) +#define CLK_LANE_ULPM_REQ BIT(8) +#define DATA_LANE_START(x) BIT(4 + (x)) +#define CLK_LANE_EN BIT(3) +#define PLL_START BIT(0) + +#define MCTL_DPHY_CFG0 0x10 +#define DPHY_C_RSTB BIT(20) +#define DPHY_D_RSTB(x) GENMASK(15 + (x), 16) +#define DPHY_PLL_PDN BIT(10) +#define DPHY_CMN_PDN BIT(9) +#define DPHY_C_PDN BIT(8) +#define DPHY_D_PDN(x) GENMASK(3 + (x), 4) +#define DPHY_ALL_D_PDN GENMASK(7, 4) +#define DPHY_PLL_PSO BIT(1) +#define DPHY_CMN_PSO BIT(0) + +#define MCTL_DPHY_TIMEOUT1 0x14 +#define HSTX_TIMEOUT(x) ((x) << 4) +#define HSTX_TIMEOUT_MAX GENMASK(17, 0) +#define CLK_DIV(x) (x) +#define CLK_DIV_MAX GENMASK(3, 0) + +#define MCTL_DPHY_TIMEOUT2 0x18 +#define LPRX_TIMEOUT(x) (x) + +#define MCTL_ULPOUT_TIME 0x1c +#define DATA_LANE_ULPOUT_TIME(x) ((x) << 9) +#define CLK_LANE_ULPOUT_TIME(x) (x) + +#define MCTL_3DVIDEO_CTL 0x20 +#define VID_VSYNC_3D_EN BIT(7) +#define VID_VSYNC_3D_LR BIT(5) +#define VID_VSYNC_3D_SECOND_EN BIT(4) +#define VID_VSYNC_3DFORMAT_LINE (0 << 2) +#define VID_VSYNC_3DFORMAT_FRAME (1 << 2) +#define VID_VSYNC_3DFORMAT_PIXEL (2 << 2) +#define VID_VSYNC_3DMODE_OFF 0 +#define VID_VSYNC_3DMODE_PORTRAIT 1 +#define VID_VSYNC_3DMODE_LANDSCAPE 2 + +#define MCTL_MAIN_STS 0x24 +#define MCTL_MAIN_STS_CTL 0x130 +#define MCTL_MAIN_STS_CLR 0x150 +#define MCTL_MAIN_STS_FLAG 0x170 +#define HS_SKEWCAL_DONE BIT(11) +#define IF_UNTERM_PKT_ERR(x) BIT(8 + (x)) +#define LPRX_TIMEOUT_ERR BIT(7) +#define HSTX_TIMEOUT_ERR BIT(6) +#define DATA_LANE_RDY(l) BIT(2 + (l)) +#define CLK_LANE_RDY BIT(1) +#define PLL_LOCKED BIT(0) + +#define MCTL_DPHY_ERR 0x28 +#define MCTL_DPHY_ERR_CTL1 0x148 +#define MCTL_DPHY_ERR_CLR 0x168 +#define MCTL_DPHY_ERR_FLAG 0x188 +#define ERR_CONT_LP(x, l) BIT(18 + ((x) * 4) + (l)) +#define ERR_CONTROL(l) BIT(14 + (l)) +#define ERR_SYNESC(l) BIT(10 + (l)) +#define ERR_ESC(l) BIT(6 + (l)) + +#define MCTL_DPHY_ERR_CTL2 0x14c +#define ERR_CONT_LP_EDGE(x, l) BIT(12 + ((x) * 4) + (l)) +#define ERR_CONTROL_EDGE(l) BIT(8 + (l)) +#define ERR_SYN_ESC_EDGE(l) BIT(4 + (l)) +#define ERR_ESC_EDGE(l) BIT(0 + (l)) + +#define MCTL_LANE_STS 0x2c +#define PPI_C_TX_READY_HS BIT(18) +#define DPHY_PLL_LOCK BIT(17) +#define PPI_D_RX_ULPS_ESC(x) (((x) & GENMASK(15, 12)) >> 12) +#define LANE_STATE_START 0 +#define LANE_STATE_IDLE 1 +#define LANE_STATE_WRITE 2 +#define LANE_STATE_ULPM 3 +#define LANE_STATE_READ 4 +#define DATA_LANE_STATE(l, val) \ + (((val) >> (2 + 2 * (l) + ((l) ? 1 : 0))) & GENMASK((l) ? 1 : 2, 0)) +#define CLK_LANE_STATE_HS 2 +#define CLK_LANE_STATE(val) ((val) & GENMASK(1, 0)) + +#define DSC_MODE_CTL 0x30 +#define DSC_MODE_EN BIT(0) + +#define DSC_CMD_SEND 0x34 +#define DSC_SEND_PPS BIT(0) +#define DSC_EXECUTE_QUEUE BIT(1) + +#define DSC_PPS_WRDAT 0x38 + +#define DSC_MODE_STS 0x3c +#define DSC_PPS_DONE BIT(1) +#define DSC_EXEC_DONE BIT(2) + +#define CMD_MODE_CTL 0x70 +#define IF_LP_EN(x) BIT(9 + (x)) +#define IF_VCHAN_ID(x, c) ((c) << ((x) * 2)) + +#define CMD_MODE_CTL2 0x74 +#define TE_TIMEOUT(x) ((x) << 11) +#define FILL_VALUE(x) ((x) << 3) +#define ARB_IF_WITH_HIGHEST_PRIORITY(x) ((x) << 1) +#define ARB_ROUND_ROBIN_MODE BIT(0) + +#define CMD_MODE_STS 0x78 +#define CMD_MODE_STS_CTL 0x134 +#define CMD_MODE_STS_CLR 0x154 +#define CMD_MODE_STS_FLAG 0x174 +#define ERR_IF_UNDERRUN(x) BIT(4 + (x)) +#define ERR_UNWANTED_READ BIT(3) +#define ERR_TE_MISS BIT(2) +#define ERR_NO_TE BIT(1) +#define CSM_RUNNING BIT(0) + +#define DIRECT_CMD_SEND 0x80 + +#define DIRECT_CMD_MAIN_SETTINGS 0x84 +#define TRIGGER_VAL(x) ((x) << 25) +#define CMD_LP_EN BIT(24) +#define CMD_SIZE(x) ((x) << 16) +#define CMD_VCHAN_ID(x) ((x) << 14) +#define CMD_DATATYPE(x) ((x) << 8) +#define CMD_LONG BIT(3) +#define WRITE_CMD 0 +#define READ_CMD 1 +#define TE_REQ 4 +#define TRIGGER_REQ 5 +#define BTA_REQ 6 + +#define DIRECT_CMD_STS 0x88 +#define DIRECT_CMD_STS_CTL 0x138 +#define DIRECT_CMD_STS_CLR 0x158 +#define DIRECT_CMD_STS_FLAG 0x178 +#define RCVD_ACK_VAL(val) ((val) >> 16) +#define RCVD_TRIGGER_VAL(val) (((val) & GENMASK(14, 11)) >> 11) +#define READ_COMPLETED_WITH_ERR BIT(10) +#define BTA_FINISHED BIT(9) +#define BTA_COMPLETED BIT(8) +#define TE_RCVD BIT(7) +#define TRIGGER_RCVD BIT(6) +#define ACK_WITH_ERR_RCVD BIT(5) +#define ACK_RCVD BIT(4) +#define READ_COMPLETED BIT(3) +#define TRIGGER_COMPLETED BIT(2) +#define WRITE_COMPLETED BIT(1) +#define SENDING_CMD BIT(0) + +#define DIRECT_CMD_STOP_READ 0x8c + +#define DIRECT_CMD_WRDATA 0x90 + +#define DIRECT_CMD_FIFO_RST 0x94 + +#define DIRECT_CMD_RDDATA 0xa0 + +#define DIRECT_CMD_RD_PROPS 0xa4 +#define RD_DCS BIT(18) +#define RD_VCHAN_ID(val) (((val) >> 16) & GENMASK(1, 0)) +#define RD_SIZE(val) ((val) & GENMASK(15, 0)) + +#define DIRECT_CMD_RD_STS 0xa8 +#define DIRECT_CMD_RD_STS_CTL 0x13c +#define DIRECT_CMD_RD_STS_CLR 0x15c +#define DIRECT_CMD_RD_STS_FLAG 0x17c +#define ERR_EOT_WITH_ERR BIT(8) +#define ERR_MISSING_EOT BIT(7) +#define ERR_WRONG_LENGTH BIT(6) +#define ERR_OVERSIZE BIT(5) +#define ERR_RECEIVE BIT(4) +#define ERR_UNDECODABLE BIT(3) +#define ERR_CHECKSUM BIT(2) +#define ERR_UNCORRECTABLE BIT(1) +#define ERR_FIXED BIT(0) + +#define VID_MAIN_CTL 0xb0 +#define VID_IGNORE_MISS_VSYNC BIT(31) +#define VID_FIELD_SW BIT(28) +#define VID_INTERLACED_EN BIT(27) +#define RECOVERY_MODE(x) ((x) << 25) +#define RECOVERY_MODE_NEXT_HSYNC 0 +#define RECOVERY_MODE_NEXT_STOP_POINT 2 +#define RECOVERY_MODE_NEXT_VSYNC 3 +#define REG_BLKEOL_MODE(x) ((x) << 23) +#define REG_BLKLINE_MODE(x) ((x) << 21) +#define REG_BLK_MODE_NULL_PKT 0 +#define REG_BLK_MODE_BLANKING_PKT 1 +#define REG_BLK_MODE_LP 2 +#define SYNC_PULSE_HORIZONTAL BIT(20) +#define SYNC_PULSE_ACTIVE BIT(19) +#define BURST_MODE BIT(18) +#define VID_PIXEL_MODE_MASK GENMASK(17, 14) +#define VID_PIXEL_MODE_RGB565 (0 << 14) +#define VID_PIXEL_MODE_RGB666_PACKED (1 << 14) +#define VID_PIXEL_MODE_RGB666 (2 << 14) +#define VID_PIXEL_MODE_RGB888 (3 << 14) +#define VID_PIXEL_MODE_RGB101010 (4 << 14) +#define VID_PIXEL_MODE_RGB121212 (5 << 14) +#define VID_PIXEL_MODE_YUV420 (8 << 14) +#define VID_PIXEL_MODE_YUV422_PACKED (9 << 14) +#define VID_PIXEL_MODE_YUV422 (10 << 14) +#define VID_PIXEL_MODE_YUV422_24B (11 << 14) +#define VID_PIXEL_MODE_DSC_COMP (12 << 14) +#define VID_DATATYPE(x) ((x) << 8) +#define VID_VIRTCHAN_ID(iface, x) ((x) << (4 + (iface) * 2)) +#define STOP_MODE(x) ((x) << 2) +#define START_MODE(x) (x) + +#define VID_VSIZE1 0xb4 +#define VFP_LEN(x) ((x) << 12) +#define VBP_LEN(x) ((x) << 6) +#define VSA_LEN(x) (x) + +#define VID_VSIZE2 0xb8 +#define VACT_LEN(x) (x) + +#define VID_HSIZE1 0xc0 +#define HBP_LEN(x) ((x) << 16) +#define HSA_LEN(x) (x) + +#define VID_HSIZE2 0xc4 +#define HFP_LEN(x) ((x) << 16) +#define HACT_LEN(x) (x) + +#define VID_BLKSIZE1 0xcc +#define BLK_EOL_PKT_LEN(x) ((x) << 15) +#define BLK_LINE_EVENT_PKT_LEN(x) (x) + +#define VID_BLKSIZE2 0xd0 +#define BLK_LINE_PULSE_PKT_LEN(x) (x) + +#define VID_PKT_TIME 0xd8 +#define BLK_EOL_DURATION(x) (x) + +#define VID_DPHY_TIME 0xdc +#define REG_WAKEUP_TIME(x) ((x) << 17) +#define REG_LINE_DURATION(x) (x) + +#define VID_ERR_COLOR1 0xe0 +#define COL_GREEN(x) ((x) << 12) +#define COL_RED(x) (x) + +#define VID_ERR_COLOR2 0xe4 +#define PAD_VAL(x) ((x) << 12) +#define COL_BLUE(x) (x) + +#define VID_VPOS 0xe8 +#define LINE_VAL(val) (((val) & GENMASK(14, 2)) >> 2) +#define LINE_POS(val) ((val) & GENMASK(1, 0)) + +#define VID_HPOS 0xec +#define HORIZ_VAL(val) (((val) & GENMASK(17, 3)) >> 3) +#define HORIZ_POS(val) ((val) & GENMASK(2, 0)) + +#define VID_MODE_STS 0xf0 +#define VID_MODE_STS_CTL 0x140 +#define VID_MODE_STS_CLR 0x160 +#define VID_MODE_STS_FLAG 0x180 +#define VSG_RECOVERY BIT(10) +#define ERR_VRS_WRONG_LEN BIT(9) +#define ERR_LONG_READ BIT(8) +#define ERR_LINE_WRITE BIT(7) +#define ERR_BURST_WRITE BIT(6) +#define ERR_SMALL_HEIGHT BIT(5) +#define ERR_SMALL_LEN BIT(4) +#define ERR_MISSING_VSYNC BIT(3) +#define ERR_MISSING_HSYNC BIT(2) +#define ERR_MISSING_DATA BIT(1) +#define VSG_RUNNING BIT(0) + +#define VID_VCA_SETTING1 0xf4 +#define BURST_LP BIT(16) +#define MAX_BURST_LIMIT(x) (x) + +#define VID_VCA_SETTING2 0xf8 +#define MAX_LINE_LIMIT(x) ((x) << 16) +#define EXACT_BURST_LIMIT(x) (x) + +#define TVG_CTL 0xfc +#define TVG_STRIPE_SIZE(x) ((x) << 5) +#define TVG_MODE_MASK GENMASK(4, 3) +#define TVG_MODE_SINGLE_COLOR (0 << 3) +#define TVG_MODE_VSTRIPES (2 << 3) +#define TVG_MODE_HSTRIPES (3 << 3) +#define TVG_STOPMODE_MASK GENMASK(2, 1) +#define TVG_STOPMODE_EOF (0 << 1) +#define TVG_STOPMODE_EOL (1 << 1) +#define TVG_STOPMODE_NOW (2 << 1) +#define TVG_RUN BIT(0) + +#define TVG_IMG_SIZE 0x100 +#define TVG_NBLINES(x) ((x) << 16) +#define TVG_LINE_SIZE(x) (x) + +#define TVG_COLOR1 0x104 +#define TVG_COL1_GREEN(x) ((x) << 12) +#define TVG_COL1_RED(x) (x) + +#define TVG_COLOR1_BIS 0x108 +#define TVG_COL1_BLUE(x) (x) + +#define TVG_COLOR2 0x10c +#define TVG_COL2_GREEN(x) ((x) << 12) +#define TVG_COL2_RED(x) (x) + +#define TVG_COLOR2_BIS 0x110 +#define TVG_COL2_BLUE(x) (x) + +#define TVG_STS 0x114 +#define TVG_STS_CTL 0x144 +#define TVG_STS_CLR 0x164 +#define TVG_STS_FLAG 0x184 +#define TVG_STS_RUNNING BIT(0) + +#define STS_CTL_EDGE(e) ((e) << 16) + +#define DPHY_LANES_MAP 0x198 +#define DAT_REMAP_CFG(b, l) ((l) << ((b) * 8)) + +#define DPI_IRQ_EN 0x1a0 +#define DPI_IRQ_CLR 0x1a4 +#define DPI_IRQ_STS 0x1a8 +#define PIXEL_BUF_OVERFLOW BIT(0) + +#define DPI_CFG 0x1ac +#define DPI_CFG_FIFO_DEPTH(x) ((x) >> 16) +#define DPI_CFG_FIFO_LEVEL(x) ((x) & GENMASK(15, 0)) + +#define TEST_GENERIC 0x1f0 +#define TEST_STATUS(x) ((x) >> 16) +#define TEST_CTRL(x) (x) + +#define ID_REG 0x1fc +#define REV_VENDOR_ID(x) (((x) & GENMASK(31, 20)) >> 20) +#define REV_PRODUCT_ID(x) (((x) & GENMASK(19, 12)) >> 12) +#define REV_HW(x) (((x) & GENMASK(11, 8)) >> 8) +#define REV_MAJOR(x) (((x) & GENMASK(7, 4)) >> 4) +#define REV_MINOR(x) ((x) & GENMASK(3, 0)) + +#define DSI_OUTPUT_PORT 0 +#define DSI_INPUT_PORT(inputid) (1 + (inputid)) + +#define DSI_HBP_FRAME_OVERHEAD 12 +#define DSI_HSA_FRAME_OVERHEAD 14 +#define DSI_HFP_FRAME_OVERHEAD 6 +#define DSI_HSS_VSS_VSE_FRAME_OVERHEAD 4 +#define DSI_BLANKING_FRAME_OVERHEAD 6 +#define DSI_NULL_FRAME_OVERHEAD 6 +#define DSI_EOT_PKT_SIZE 4 + +#define REG_WAKEUP_TIME_NS 800 +#define DPHY_PLL_RATE_HZ 108000000 + +/* DPHY registers */ +#define DPHY_PMA_CMN(reg) (reg) +#define DPHY_PMA_LCLK(reg) (0x100 + (reg)) +#define DPHY_PMA_LDATA(lane, reg) (0x200 + ((lane) * 0x100) + (reg)) +#define DPHY_PMA_RCLK(reg) (0x600 + (reg)) +#define DPHY_PMA_RDATA(lane, reg) (0x700 + ((lane) * 0x100) + (reg)) +#define DPHY_PCS(reg) (0xb00 + (reg)) + +#define DPHY_CMN_SSM DPHY_PMA_CMN(0x20) +#define DPHY_CMN_SSM_EN BIT(0) +#define DPHY_CMN_TX_MODE_EN BIT(9) + +#define DPHY_CMN_PWM DPHY_PMA_CMN(0x40) +#define DPHY_CMN_PWM_DIV(x) ((x) << 20) +#define DPHY_CMN_PWM_LOW(x) ((x) << 10) +#define DPHY_CMN_PWM_HIGH(x) (x) + +#define DPHY_CMN_FBDIV DPHY_PMA_CMN(0x4c) +#define DPHY_CMN_FBDIV_VAL(low, high) (((high) << 11) | ((low) << 22)) +#define DPHY_CMN_FBDIV_FROM_REG (BIT(10) | BIT(21)) + +#define DPHY_CMN_OPIPDIV DPHY_PMA_CMN(0x50) +#define DPHY_CMN_IPDIV_FROM_REG BIT(0) +#define DPHY_CMN_IPDIV(x) ((x) << 1) +#define DPHY_CMN_OPDIV_FROM_REG BIT(6) +#define DPHY_CMN_OPDIV(x) ((x) << 7) + +#define DPHY_PSM_CFG DPHY_PCS(0x4) +#define DPHY_PSM_CFG_FROM_REG BIT(0) +#define DPHY_PSM_CLK_DIV(x) ((x) << 1) + +struct cdns_dsi_output { + struct mipi_dsi_device *dev; + struct drm_panel *panel; + struct drm_bridge *bridge; +}; + +enum cdns_dsi_input_id { + CDNS_SDI_INPUT, + CDNS_DPI_INPUT, + CDNS_DSC_INPUT, +}; + +struct cdns_dphy_cfg { + u8 pll_ipdiv; + u8 pll_opdiv; + u16 pll_fbdiv; + unsigned long lane_bps; + unsigned int nlanes; +}; + +struct cdns_dsi_cfg { + unsigned int hfp; + unsigned int hsa; + unsigned int hbp; + unsigned int hact; + unsigned int htotal; +}; + +struct cdns_dphy; + +enum cdns_dphy_clk_lane_cfg { + DPHY_CLK_CFG_LEFT_DRIVES_ALL = 0, + DPHY_CLK_CFG_LEFT_DRIVES_RIGHT = 1, + DPHY_CLK_CFG_LEFT_DRIVES_LEFT = 2, + DPHY_CLK_CFG_RIGHT_DRIVES_ALL = 3, +}; + +struct cdns_dphy_ops { + int (*probe)(struct cdns_dphy *dphy); + void (*remove)(struct cdns_dphy *dphy); + void (*set_psm_div)(struct cdns_dphy *dphy, u8 div); + void (*set_clk_lane_cfg)(struct cdns_dphy *dphy, + enum cdns_dphy_clk_lane_cfg cfg); + void (*set_pll_cfg)(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg); + unsigned long (*get_wakeup_time_ns)(struct cdns_dphy *dphy); +}; + +struct cdns_dphy { + struct cdns_dphy_cfg cfg; + void __iomem *regs; + struct clk *psm_clk; + struct clk *pll_ref_clk; + const struct cdns_dphy_ops *ops; +}; + +struct cdns_dsi_input { + enum cdns_dsi_input_id id; + struct drm_bridge bridge; +}; + +struct cdns_dsi { + struct mipi_dsi_host base; + void __iomem *regs; + struct cdns_dsi_input input; + struct cdns_dsi_output output; + unsigned int direct_cmd_fifo_depth; + unsigned int rx_fifo_depth; + struct completion direct_cmd_comp; + struct clk *dsi_p_clk; + struct reset_control *dsi_p_rst; + struct clk *dsi_sys_clk; + bool link_initialized; + struct cdns_dphy *dphy; +}; + +static inline struct cdns_dsi *input_to_dsi(struct cdns_dsi_input *input) +{ + return container_of(input, struct cdns_dsi, input); +} + +static inline struct cdns_dsi *to_cdns_dsi(struct mipi_dsi_host *host) +{ + return container_of(host, struct cdns_dsi, base); +} + +static inline struct cdns_dsi_input * +bridge_to_cdns_dsi_input(struct drm_bridge *bridge) +{ + return container_of(bridge, struct cdns_dsi_input, bridge); +} + +static int cdns_dsi_get_dphy_pll_cfg(struct cdns_dphy *dphy, + struct cdns_dphy_cfg *cfg, + unsigned int dpi_htotal, + unsigned int dpi_bpp, + unsigned int dpi_hz, + unsigned int dsi_htotal, + unsigned int dsi_nlanes, + unsigned int *dsi_hfp_ext) +{ + u64 dlane_bps, dlane_bps_max, fbdiv, fbdiv_max, adj_dsi_htotal; + unsigned long pll_ref_hz = clk_get_rate(dphy->pll_ref_clk); + + memset(cfg, 0, sizeof(*cfg)); + + cfg->nlanes = dsi_nlanes; + + if (pll_ref_hz < 9600000 || pll_ref_hz >= 150000000) + return -EINVAL; + else if (pll_ref_hz < 19200000) + cfg->pll_ipdiv = 1; + else if (pll_ref_hz < 38400000) + cfg->pll_ipdiv = 2; + else if (pll_ref_hz < 76800000) + cfg->pll_ipdiv = 4; + else + cfg->pll_ipdiv = 8; + + /* + * Make sure DSI htotal is aligned on a lane boundary when calculating + * the expected data rate. This is done by extending HFP in case of + * misalignment. + */ + adj_dsi_htotal = dsi_htotal; + if (dsi_htotal % dsi_nlanes) + adj_dsi_htotal += dsi_nlanes - (dsi_htotal % dsi_nlanes); + + dlane_bps = (u64)dpi_hz * adj_dsi_htotal; + + /* data rate in bytes/sec is not an integer, refuse the mode. */ + if (do_div(dlane_bps, dsi_nlanes * dpi_htotal)) + return -EINVAL; + + /* data rate was in bytes/sec, convert to bits/sec. */ + dlane_bps *= 8; + + if (dlane_bps > 2500000000UL || dlane_bps < 160000000UL) + return -EINVAL; + else if (dlane_bps >= 1250000000) + cfg->pll_opdiv = 1; + else if (dlane_bps >= 630000000) + cfg->pll_opdiv = 2; + else if (dlane_bps >= 320000000) + cfg->pll_opdiv = 4; + else if (dlane_bps >= 160000000) + cfg->pll_opdiv = 8; + + /* + * Allow a deviation of 0.2% on the per-lane data rate to try to + * recover a potential mismatch between DPI and PPI clks. + */ + dlane_bps_max = dlane_bps + DIV_ROUND_DOWN_ULL(dlane_bps, 500); + fbdiv_max = DIV_ROUND_DOWN_ULL(dlane_bps_max * 2 * + cfg->pll_opdiv * cfg->pll_ipdiv, + pll_ref_hz); + fbdiv = DIV_ROUND_UP_ULL(dlane_bps * 2 * cfg->pll_opdiv * + cfg->pll_ipdiv, + pll_ref_hz); + + /* + * Iterate over all acceptable fbdiv and try to find an adjusted DSI + * htotal length providing an exact match. + * + * Note that we could do something even trickier by relying on the fact + * that a new line is not necessarily aligned on a lane boundary, so, + * by making adj_dsi_htotal non aligned on a dsi_lanes we can improve a + * bit the precision. With this, the step would be + * + * pll_ref_hz / (2 * opdiv * ipdiv * nlanes) + * + * instead of + * + * pll_ref_hz / (2 * opdiv * ipdiv) + * + * The drawback of this approach is that we would need to make sure the + * number or lines is a multiple of the realignment periodicity which is + * a function of the number of lanes and the original misalignment. For + * example, for NLANES = 4 and HTOTAL % NLANES = 3, it takes 4 lines + * to realign on a lane: + * LINE 0: expected number of bytes, starts emitting first byte of + * LINE 1 on LANE 3 + * LINE 1: expected number of bytes, starts emitting first 2 bytes of + * LINE 2 on LANES 2 and 3 + * LINE 2: expected number of bytes, starts emitting first 3 bytes of + * of LINE 3 on LANES 1, 2 and 3 + * LINE 3: one byte less, now things are realigned on LANE 0 for LINE 4 + * + * I figured this extra complexity was not worth the benefit, but if + * someone really has unfixable mismatch, that would be something to + * investigate. + */ + for (; fbdiv <= fbdiv_max; fbdiv++) { + u32 rem; + + adj_dsi_htotal = (u64)fbdiv * pll_ref_hz * dsi_nlanes * + dpi_htotal; + + /* + * Do the division in 2 steps to avoid an overflow on the + * divider. + */ + rem = do_div(adj_dsi_htotal, dpi_hz); + if (rem) + continue; + + rem = do_div(adj_dsi_htotal, + cfg->pll_opdiv * cfg->pll_ipdiv * 2 * 8); + if (rem) + continue; + + cfg->pll_fbdiv = fbdiv; + *dsi_hfp_ext = adj_dsi_htotal - dsi_htotal; + break; + } + + /* No match, let's just reject the display mode. */ + if (!cfg->pll_fbdiv) + return -EINVAL; + + dlane_bps = DIV_ROUND_DOWN_ULL((u64)dpi_hz * adj_dsi_htotal * 8, + dsi_nlanes * dpi_htotal); + cfg->lane_bps = dlane_bps; + + return 0; +} + +static int cdns_dphy_setup_psm(struct cdns_dphy *dphy) +{ + unsigned long psm_clk_hz = clk_get_rate(dphy->psm_clk); + unsigned long psm_div; + + if (!psm_clk_hz || psm_clk_hz > 100000000) + return -EINVAL; + + psm_div = DIV_ROUND_CLOSEST(psm_clk_hz, 1000000); + if (dphy->ops->set_psm_div) + dphy->ops->set_psm_div(dphy, psm_div); + + return 0; +} + +static void cdns_dphy_set_clk_lane_cfg(struct cdns_dphy *dphy, + enum cdns_dphy_clk_lane_cfg cfg) +{ + if (dphy->ops->set_clk_lane_cfg) + dphy->ops->set_clk_lane_cfg(dphy, cfg); +} + +static void cdns_dphy_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) +{ + if (dphy->ops->set_pll_cfg) + dphy->ops->set_pll_cfg(dphy, cfg); +} + +static unsigned long cdns_dphy_get_wakeup_time_ns(struct cdns_dphy *dphy) +{ + return dphy->ops->get_wakeup_time_ns(dphy); +} + +static unsigned int dpi_to_dsi_timing(unsigned int dpi_timing, + unsigned int dpi_bpp, + unsigned int dsi_pkt_overhead) +{ + unsigned int dsi_timing = DIV_ROUND_UP(dpi_timing * dpi_bpp, 8); + + if (dsi_timing < dsi_pkt_overhead) + dsi_timing = 0; + else + dsi_timing -= dsi_pkt_overhead; + + return dsi_timing; +} + +static int cdns_dsi_mode2cfg(struct cdns_dsi *dsi, + const struct drm_display_mode *mode, + struct cdns_dsi_cfg *dsi_cfg, + struct cdns_dphy_cfg *dphy_cfg, + bool mode_valid_check) +{ + unsigned long dsi_htotal = 0, dsi_hss_hsa_hse_hbp = 0; + struct cdns_dsi_output *output = &dsi->output; + unsigned int dsi_hfp_ext = 0, dpi_hfp, tmp; + bool sync_pulse = false; + int bpp, nlanes, ret; + + memset(dsi_cfg, 0, sizeof(*dsi_cfg)); + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + sync_pulse = true; + + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + nlanes = output->dev->lanes; + + if (mode_valid_check) + tmp = mode->htotal - + (sync_pulse ? mode->hsync_end : mode->hsync_start); + else + tmp = mode->crtc_htotal - + (sync_pulse ? + mode->crtc_hsync_end : mode->crtc_hsync_start); + + dsi_cfg->hbp = dpi_to_dsi_timing(tmp, bpp, DSI_HBP_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; + dsi_hss_hsa_hse_hbp += dsi_cfg->hbp + DSI_HBP_FRAME_OVERHEAD; + + if (sync_pulse) { + if (mode_valid_check) + tmp = mode->hsync_end - mode->hsync_start; + else + tmp = mode->crtc_hsync_end - mode->crtc_hsync_start; + + dsi_cfg->hsa = dpi_to_dsi_timing(tmp, bpp, + DSI_HSA_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + dsi_hss_hsa_hse_hbp += dsi_cfg->hsa + DSI_HSA_FRAME_OVERHEAD; + } + + dsi_cfg->hact = dpi_to_dsi_timing(mode_valid_check ? + mode->hdisplay : mode->crtc_hdisplay, + bpp, 0); + dsi_htotal += dsi_cfg->hact; + + if (mode_valid_check) + dpi_hfp = mode->hsync_start - mode->hdisplay; + else + dpi_hfp = mode->crtc_hsync_start - mode->crtc_hdisplay; + + dsi_cfg->hfp = dpi_to_dsi_timing(dpi_hfp, bpp, DSI_HFP_FRAME_OVERHEAD); + dsi_htotal += dsi_cfg->hfp + DSI_HFP_FRAME_OVERHEAD; + + if (mode_valid_check) + ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg, + mode->htotal, bpp, + mode->clock * 1000, + dsi_htotal, nlanes, + &dsi_hfp_ext); + else + ret = cdns_dsi_get_dphy_pll_cfg(dsi->dphy, dphy_cfg, + mode->crtc_htotal, bpp, + mode->crtc_clock * 1000, + dsi_htotal, nlanes, + &dsi_hfp_ext); + + if (ret) + return ret; + + dsi_cfg->hfp += dsi_hfp_ext; + dsi_htotal += dsi_hfp_ext; + dsi_cfg->htotal = dsi_htotal; + + /* + * Make sure DPI(HFP) > DSI(HSS+HSA+HSE+HBP) to guarantee that the FIFO + * is empty before we start a receiving a new line on the DPI + * interface. + */ + if ((u64)dphy_cfg->lane_bps * dpi_hfp * nlanes < + (u64)dsi_hss_hsa_hse_hbp * + (mode_valid_check ? mode->clock : mode->crtc_clock) * 1000) + return -EINVAL; + + return 0; +} + +static int cdns_dsi_bridge_attach(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + + if (!drm_core_check_feature(bridge->dev, DRIVER_ATOMIC)) { + dev_err(dsi->base.dev, + "cdns-dsi driver is only compatible with DRM devices supporting atomic updates"); + return -ENOTSUPP; + } + + return drm_bridge_attach(bridge->encoder, output->bridge, bridge); +} + +static enum drm_mode_status +cdns_dsi_bridge_mode_valid(struct drm_bridge *bridge, + const struct drm_display_mode *mode) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dphy_cfg dphy_cfg; + struct cdns_dsi_cfg dsi_cfg; + int bpp, nlanes, ret; + + /* + * VFP_DSI should be less than VFP_DPI and VFP_DSI should be at + * least 1. + */ + if (mode->vtotal - mode->vsync_end < 2) + return MODE_V_ILLEGAL; + + /* VSA_DSI = VSA_DPI and must be at least 2. */ + if (mode->vsync_end - mode->vsync_start < 2) + return MODE_V_ILLEGAL; + + /* HACT must be 32-bits aligned. */ + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + if ((mode->hdisplay * bpp) % 32) + return MODE_H_ILLEGAL; + + nlanes = output->dev->lanes; + + ret = cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, true); + if (ret) + return MODE_CLOCK_RANGE; + + return MODE_OK; +} + +static void cdns_dsi_bridge_disable(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + u32 val; + + val = readl(dsi->regs + MCTL_MAIN_DATA_CTL); + val &= ~(IF_VID_SELECT_MASK | IF_VID_MODE | VID_EN | HOST_EOT_GEN | + DISP_EOT_GEN); + writel(val, dsi->regs + MCTL_MAIN_DATA_CTL); + + val = readl(dsi->regs + MCTL_MAIN_EN) & ~IF_EN(input->id); + writel(val, dsi->regs + MCTL_MAIN_EN); + pm_runtime_put(dsi->base.dev); +} + +static void cdns_dsi_hs_init(struct cdns_dsi *dsi, + const struct cdns_dphy_cfg *dphy_cfg) +{ + u32 status; + + /* + * Power all internal DPHY blocks down and maintain their reset line + * asserted before changing the DPHY config. + */ + writel(DPHY_CMN_PSO | DPHY_PLL_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | + DPHY_CMN_PDN | DPHY_PLL_PDN, + dsi->regs + MCTL_DPHY_CFG0); + + /* + * Configure the internal PSM clk divider so that the DPHY has a + * 1MHz clk (or something close). + */ + WARN_ON_ONCE(cdns_dphy_setup_psm(dsi->dphy)); + + /* + * Configure attach clk lanes to data lanes: the DPHY has 2 clk lanes + * and 8 data lanes, each clk lane can be attache different set of + * data lanes. The 2 groups are named 'left' and 'right', so here we + * just say that we want the 'left' clk lane to drive the 'left' data + * lanes. + */ + cdns_dphy_set_clk_lane_cfg(dsi->dphy, DPHY_CLK_CFG_LEFT_DRIVES_LEFT); + + /* + * Configure the DPHY PLL that will be used to generate the TX byte + * clk. + */ + cdns_dphy_set_pll_cfg(dsi->dphy, dphy_cfg); + + /* Start TX state machine. */ + writel(DPHY_CMN_SSM_EN | DPHY_CMN_TX_MODE_EN, + dsi->dphy->regs + DPHY_CMN_SSM); + + /* Activate the PLL and wait until it's locked. */ + writel(PLL_LOCKED, dsi->regs + MCTL_MAIN_STS_CLR); + writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN, + dsi->regs + MCTL_DPHY_CFG0); + WARN_ON_ONCE(readl_poll_timeout(dsi->regs + MCTL_MAIN_STS, status, + status & PLL_LOCKED, 100, 100)); + /* De-assert data and clock reset lines. */ + writel(DPHY_CMN_PSO | DPHY_ALL_D_PDN | DPHY_C_PDN | DPHY_CMN_PDN | + DPHY_D_RSTB(dphy_cfg->nlanes) | DPHY_C_RSTB, + dsi->regs + MCTL_DPHY_CFG0); +} + +static void cdns_dsi_init_link(struct cdns_dsi *dsi) +{ + struct cdns_dsi_output *output = &dsi->output; + unsigned long sysclk_period, ulpout; + u32 val; + int i; + + if (dsi->link_initialized) + return; + + val = 0; + for (i = 1; i < output->dev->lanes; i++) + val |= DATA_LANE_EN(i); + + if (!(output->dev->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)) + val |= CLK_CONTINUOUS; + + writel(val, dsi->regs + MCTL_MAIN_PHY_CTL); + + /* ULPOUT should be set to 1ms and is expressed in sysclk cycles. */ + sysclk_period = NSEC_PER_SEC / clk_get_rate(dsi->dsi_sys_clk); + ulpout = DIV_ROUND_UP(NSEC_PER_MSEC, sysclk_period); + writel(CLK_LANE_ULPOUT_TIME(ulpout) | DATA_LANE_ULPOUT_TIME(ulpout), + dsi->regs + MCTL_ULPOUT_TIME); + + writel(LINK_EN, dsi->regs + MCTL_MAIN_DATA_CTL); + + val = CLK_LANE_EN | PLL_START; + for (i = 0; i < output->dev->lanes; i++) + val |= DATA_LANE_START(i); + + writel(val, dsi->regs + MCTL_MAIN_EN); + + dsi->link_initialized = true; +} + +static void cdns_dsi_bridge_enable(struct drm_bridge *bridge) +{ + struct cdns_dsi_input *input = bridge_to_cdns_dsi_input(bridge); + struct cdns_dsi *dsi = input_to_dsi(input); + struct cdns_dsi_output *output = &dsi->output; + struct drm_display_mode *mode; + struct cdns_dphy_cfg dphy_cfg; + unsigned long tx_byte_period; + struct cdns_dsi_cfg dsi_cfg; + u32 tmp, reg_wakeup, div; + int bpp, nlanes; + + if (WARN_ON(pm_runtime_get_sync(dsi->base.dev) < 0)) + return; + + mode = &bridge->encoder->crtc->state->adjusted_mode; + bpp = mipi_dsi_pixel_format_to_bpp(output->dev->format); + nlanes = output->dev->lanes; + + WARN_ON_ONCE(cdns_dsi_mode2cfg(dsi, mode, &dsi_cfg, &dphy_cfg, false)); + + cdns_dsi_hs_init(dsi, &dphy_cfg); + cdns_dsi_init_link(dsi); + + writel(HBP_LEN(dsi_cfg.hbp) | HSA_LEN(dsi_cfg.hsa), + dsi->regs + VID_HSIZE1); + writel(HFP_LEN(dsi_cfg.hfp) | HACT_LEN(dsi_cfg.hact), + dsi->regs + VID_HSIZE2); + + writel(VBP_LEN(mode->crtc_vtotal - mode->crtc_vsync_end - 1) | + VFP_LEN(mode->crtc_vsync_start - mode->crtc_vdisplay) | + VSA_LEN(mode->crtc_vsync_end - mode->crtc_vsync_start + 1), + dsi->regs + VID_VSIZE1); + writel(mode->crtc_vdisplay, dsi->regs + VID_VSIZE2); + + tmp = dsi_cfg.htotal - + (dsi_cfg.hsa + DSI_BLANKING_FRAME_OVERHEAD + + DSI_HSA_FRAME_OVERHEAD); + writel(BLK_LINE_PULSE_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE2); + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD), + dsi->regs + VID_VCA_SETTING2); + + tmp = dsi_cfg.htotal - + (DSI_HSS_VSS_VSE_FRAME_OVERHEAD + DSI_BLANKING_FRAME_OVERHEAD); + writel(BLK_LINE_EVENT_PKT_LEN(tmp), dsi->regs + VID_BLKSIZE1); + if (!(output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)) + writel(MAX_LINE_LIMIT(tmp - DSI_NULL_FRAME_OVERHEAD), + dsi->regs + VID_VCA_SETTING2); + + tmp = DIV_ROUND_UP(dsi_cfg.htotal, nlanes) - + DIV_ROUND_UP(dsi_cfg.hsa, nlanes); + + if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + tmp -= DIV_ROUND_UP(DSI_EOT_PKT_SIZE, nlanes); + + tx_byte_period = DIV_ROUND_DOWN_ULL((u64)NSEC_PER_SEC * 8, + dphy_cfg.lane_bps); + reg_wakeup = cdns_dphy_get_wakeup_time_ns(dsi->dphy) / + tx_byte_period; + writel(REG_WAKEUP_TIME(reg_wakeup) | REG_LINE_DURATION(tmp), + dsi->regs + VID_DPHY_TIME); + + /* + * HSTX and LPRX timeouts are both expressed in TX byte clk cycles and + * both should be set to at least the time it takes to transmit a + * frame. + */ + tmp = NSEC_PER_SEC / drm_mode_vrefresh(mode); + tmp /= tx_byte_period; + + for (div = 0; div <= CLK_DIV_MAX; div++) { + if (tmp <= HSTX_TIMEOUT_MAX) + break; + + tmp >>= 1; + } + + if (tmp > HSTX_TIMEOUT_MAX) + tmp = HSTX_TIMEOUT_MAX; + + writel(CLK_DIV(div) | HSTX_TIMEOUT(tmp), + dsi->regs + MCTL_DPHY_TIMEOUT1); + + writel(LPRX_TIMEOUT(tmp), dsi->regs + MCTL_DPHY_TIMEOUT2); + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) { + switch (output->dev->format) { + case MIPI_DSI_FMT_RGB888: + tmp = VID_PIXEL_MODE_RGB888 | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_24); + break; + + case MIPI_DSI_FMT_RGB666: + tmp = VID_PIXEL_MODE_RGB666 | + VID_DATATYPE(MIPI_DSI_PIXEL_STREAM_3BYTE_18); + break; + + case MIPI_DSI_FMT_RGB666_PACKED: + tmp = VID_PIXEL_MODE_RGB666_PACKED | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_18); + break; + + case MIPI_DSI_FMT_RGB565: + tmp = VID_PIXEL_MODE_RGB565 | + VID_DATATYPE(MIPI_DSI_PACKED_PIXEL_STREAM_16); + break; + + default: + dev_err(dsi->base.dev, "Unsupported DSI format\n"); + return; + } + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) + tmp |= SYNC_PULSE_ACTIVE | SYNC_PULSE_HORIZONTAL; + + tmp |= REG_BLKLINE_MODE(REG_BLK_MODE_BLANKING_PKT) | + REG_BLKEOL_MODE(REG_BLK_MODE_BLANKING_PKT) | + RECOVERY_MODE(RECOVERY_MODE_NEXT_HSYNC) | + VID_IGNORE_MISS_VSYNC; + + writel(tmp, dsi->regs + VID_MAIN_CTL); + } + + tmp = readl(dsi->regs + MCTL_MAIN_DATA_CTL); + tmp &= ~(IF_VID_SELECT_MASK | HOST_EOT_GEN | IF_VID_MODE); + + if (!(output->dev->mode_flags & MIPI_DSI_MODE_EOT_PACKET)) + tmp |= HOST_EOT_GEN; + + if (output->dev->mode_flags & MIPI_DSI_MODE_VIDEO) + tmp |= IF_VID_MODE | IF_VID_SELECT(input->id) | VID_EN; + + writel(tmp, dsi->regs + MCTL_MAIN_DATA_CTL); + + tmp = readl(dsi->regs + MCTL_MAIN_EN) | IF_EN(input->id); + writel(tmp, dsi->regs + MCTL_MAIN_EN); +} + +static const struct drm_bridge_funcs cdns_dsi_bridge_funcs = { + .attach = cdns_dsi_bridge_attach, + .mode_valid = cdns_dsi_bridge_mode_valid, + .disable = cdns_dsi_bridge_disable, + .enable = cdns_dsi_bridge_enable, +}; + +static int cdns_dsi_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *dev) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dsi_input *input = &dsi->input; + struct drm_bridge *bridge; + struct drm_panel *panel; + struct device_node *np; + int ret; + + /* + * We currently do not support connecting several DSI devices to the + * same host. In order to support that we'd need the DRM bridge + * framework to allow dynamic reconfiguration of the bridge chain. + */ + if (output->dev) + return -EBUSY; + + /* We do not support burst mode yet. */ + if (dev->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) + return -ENOTSUPP; + + /* + * The host <-> device link might be described using an OF-graph + * representation, in this case we extract the device of_node from + * this representation, otherwise we use dsidev->dev.of_node which + * should have been filled by the core. + */ + np = of_graph_get_remote_node(dsi->base.dev->of_node, DSI_OUTPUT_PORT, + dev->channel); + if (!np) + np = of_node_get(dev->dev.of_node); + + panel = of_drm_find_panel(np); + if (panel) { + bridge = drm_panel_bridge_add(panel, DRM_MODE_CONNECTOR_DSI); + } else { + bridge = of_drm_find_bridge(dev->dev.of_node); + if (!bridge) + bridge = ERR_PTR(-EINVAL); + } + + of_node_put(np); + + if (IS_ERR(bridge)) { + ret = PTR_ERR(bridge); + dev_err(host->dev, "failed to add DSI device %s (err = %d)", + dev->name, ret); + return ret; + } + + output->dev = dev; + output->bridge = bridge; + output->panel = panel; + + /* + * The DSI output has been properly configured, we can now safely + * register the input to the bridge framework so that it can take place + * in a display pipeline. + */ + drm_bridge_add(&input->bridge); + + return 0; +} + +static int cdns_dsi_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *dev) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + struct cdns_dsi_output *output = &dsi->output; + struct cdns_dsi_input *input = &dsi->input; + + drm_bridge_remove(&input->bridge); + if (output->panel) + drm_panel_bridge_remove(output->bridge); + + return 0; +} + +static irqreturn_t cdns_dsi_interrupt(int irq, void *data) +{ + struct cdns_dsi *dsi = data; + irqreturn_t ret = IRQ_NONE; + u32 flag, ctl; + + flag = readl(dsi->regs + DIRECT_CMD_STS_FLAG); + if (flag) { + ctl = readl(dsi->regs + DIRECT_CMD_STS_CTL); + ctl &= ~flag; + writel(ctl, dsi->regs + DIRECT_CMD_STS_CTL); + complete(&dsi->direct_cmd_comp); + ret = IRQ_HANDLED; + } + + return ret; +} + +static ssize_t cdns_dsi_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct cdns_dsi *dsi = to_cdns_dsi(host); + u32 cmd, sts, val, wait = WRITE_COMPLETED, ctl = 0; + struct mipi_dsi_packet packet; + int ret, i, tx_len, rx_len; + + ret = pm_runtime_get_sync(host->dev); + if (ret < 0) + return ret; + + cdns_dsi_init_link(dsi); + + ret = mipi_dsi_create_packet(&packet, msg); + if (ret) + goto out; + + tx_len = msg->tx_buf ? msg->tx_len : 0; + rx_len = msg->rx_buf ? msg->rx_len : 0; + + /* For read operations, the maximum TX len is 2. */ + if (rx_len && tx_len > 2) { + ret = -ENOTSUPP; + goto out; + } + + /* TX len is limited by the CMD FIFO depth. */ + if (tx_len > dsi->direct_cmd_fifo_depth) { + ret = -ENOTSUPP; + goto out; + } + + /* RX len is limited by the RX FIFO depth. */ + if (rx_len > dsi->rx_fifo_depth) { + ret = -ENOTSUPP; + goto out; + } + + cmd = CMD_SIZE(tx_len) | CMD_VCHAN_ID(msg->channel) | + CMD_DATATYPE(msg->type); + + if (msg->flags & MIPI_DSI_MSG_USE_LPM) + cmd |= CMD_LP_EN; + + if (mipi_dsi_packet_format_is_long(msg->type)) + cmd |= CMD_LONG; + + if (rx_len) { + cmd |= READ_CMD; + wait = READ_COMPLETED_WITH_ERR | READ_COMPLETED; + ctl = READ_EN | BTA_EN; + } else if (msg->flags & MIPI_DSI_MSG_REQ_ACK) { + cmd |= BTA_REQ; + wait = ACK_WITH_ERR_RCVD | ACK_RCVD; + ctl = BTA_EN; + } + + writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) | ctl, + dsi->regs + MCTL_MAIN_DATA_CTL); + + writel(cmd, dsi->regs + DIRECT_CMD_MAIN_SETTINGS); + + for (i = 0; i < tx_len; i += 4) { + const u8 *buf = msg->tx_buf; + int j; + + val = 0; + for (j = 0; j < 4 && j + i < tx_len; j++) + val |= (u32)buf[i + j] << (8 * j); + + writel(val, dsi->regs + DIRECT_CMD_WRDATA); + } + + /* Clear status flags before sending the command. */ + writel(wait, dsi->regs + DIRECT_CMD_STS_CLR); + writel(wait, dsi->regs + DIRECT_CMD_STS_CTL); + reinit_completion(&dsi->direct_cmd_comp); + writel(0, dsi->regs + DIRECT_CMD_SEND); + + wait_for_completion_timeout(&dsi->direct_cmd_comp, + msecs_to_jiffies(1000)); + + sts = readl(dsi->regs + DIRECT_CMD_STS); + writel(wait, dsi->regs + DIRECT_CMD_STS_CLR); + writel(0, dsi->regs + DIRECT_CMD_STS_CTL); + + writel(readl(dsi->regs + MCTL_MAIN_DATA_CTL) & ~ctl, + dsi->regs + MCTL_MAIN_DATA_CTL); + + /* We did not receive the events we were waiting for. */ + if (!(sts & wait)) { + ret = -ETIMEDOUT; + goto out; + } + + /* 'READ' or 'WRITE with ACK' failed. */ + if (sts & (READ_COMPLETED_WITH_ERR | ACK_WITH_ERR_RCVD)) { + ret = -EIO; + goto out; + } + + for (i = 0; i < rx_len; i += 4) { + u8 *buf = msg->rx_buf; + int j; + + val = readl(dsi->regs + DIRECT_CMD_RDDATA); + for (j = 0; j < 4 && j + i < rx_len; j++) + buf[i + j] = val >> (8 * j); + } + +out: + pm_runtime_put(host->dev); + return ret; +} + +static const struct mipi_dsi_host_ops cdns_dsi_ops = { + .attach = cdns_dsi_attach, + .detach = cdns_dsi_detach, + .transfer = cdns_dsi_transfer, +}; + +static int cdns_dsi_resume(struct device *dev) +{ + struct cdns_dsi *dsi = dev_get_drvdata(dev); + + reset_control_deassert(dsi->dsi_p_rst); + clk_prepare_enable(dsi->dsi_p_clk); + clk_prepare_enable(dsi->dsi_sys_clk); + clk_prepare_enable(dsi->dphy->psm_clk); + clk_prepare_enable(dsi->dphy->pll_ref_clk); + + return 0; +} + +static int cdns_dsi_suspend(struct device *dev) +{ + struct cdns_dsi *dsi = dev_get_drvdata(dev); + + clk_disable_unprepare(dsi->dphy->pll_ref_clk); + clk_disable_unprepare(dsi->dphy->psm_clk); + clk_disable_unprepare(dsi->dsi_sys_clk); + clk_disable_unprepare(dsi->dsi_p_clk); + reset_control_assert(dsi->dsi_p_rst); + dsi->link_initialized = false; + return 0; +} + +static UNIVERSAL_DEV_PM_OPS(cdns_dsi_pm_ops, cdns_dsi_suspend, cdns_dsi_resume, + NULL); + +static unsigned long cdns_dphy_ref_get_wakeup_time_ns(struct cdns_dphy *dphy) +{ + /* Default wakeup time is 800 ns (in a simulated environment). */ + return 800; +} + +static void cdns_dphy_ref_set_pll_cfg(struct cdns_dphy *dphy, + const struct cdns_dphy_cfg *cfg) +{ + u32 fbdiv_low, fbdiv_high; + + fbdiv_low = (cfg->pll_fbdiv / 4) - 2; + fbdiv_high = cfg->pll_fbdiv - fbdiv_low - 2; + + writel(DPHY_CMN_IPDIV_FROM_REG | DPHY_CMN_OPDIV_FROM_REG | + DPHY_CMN_IPDIV(cfg->pll_ipdiv) | + DPHY_CMN_OPDIV(cfg->pll_opdiv), + dphy->regs + DPHY_CMN_OPIPDIV); + writel(DPHY_CMN_FBDIV_FROM_REG | + DPHY_CMN_FBDIV_VAL(fbdiv_low, fbdiv_high), + dphy->regs + DPHY_CMN_FBDIV); + writel(DPHY_CMN_PWM_HIGH(6) | DPHY_CMN_PWM_LOW(0x101) | + DPHY_CMN_PWM_DIV(0x8), + dphy->regs + DPHY_CMN_PWM); +} + +static void cdns_dphy_ref_set_psm_div(struct cdns_dphy *dphy, u8 div) +{ + writel(DPHY_PSM_CFG_FROM_REG | DPHY_PSM_CLK_DIV(div), + dphy->regs + DPHY_PSM_CFG); +} + +/* + * This is the reference implementation of DPHY hooks. Specific integration of + * this IP may have to re-implement some of them depending on how they decided + * to wire things in the SoC. + */ +static const struct cdns_dphy_ops ref_dphy_ops = { + .get_wakeup_time_ns = cdns_dphy_ref_get_wakeup_time_ns, + .set_pll_cfg = cdns_dphy_ref_set_pll_cfg, + .set_psm_div = cdns_dphy_ref_set_psm_div, +}; + +static const struct of_device_id cdns_dphy_of_match[] = { + { .compatible = "cdns,dphy", .data = &ref_dphy_ops }, + { /* sentinel */ }, +}; + +static struct cdns_dphy *cdns_dphy_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct cdns_dphy *dphy; + struct of_phandle_args args; + struct resource res; + int ret; + + ret = of_parse_phandle_with_args(pdev->dev.of_node, "phys", + "#phy-cells", 0, &args); + if (ret) + return ERR_PTR(-ENOENT); + + match = of_match_node(cdns_dphy_of_match, args.np); + if (!match || !match->data) + return ERR_PTR(-EINVAL); + + dphy = devm_kzalloc(&pdev->dev, sizeof(*dphy), GFP_KERNEL); + if (!dphy) + return ERR_PTR(-ENOMEM); + + dphy->ops = match->data; + + ret = of_address_to_resource(args.np, 0, &res); + if (ret) + return ERR_PTR(ret); + + dphy->regs = devm_ioremap_resource(&pdev->dev, &res); + if (IS_ERR(dphy->regs)) + return ERR_CAST(dphy->regs); + + dphy->psm_clk = of_clk_get_by_name(args.np, "psm"); + if (IS_ERR(dphy->psm_clk)) + return ERR_CAST(dphy->psm_clk); + + dphy->pll_ref_clk = of_clk_get_by_name(args.np, "pll_ref"); + if (IS_ERR(dphy->pll_ref_clk)) { + ret = PTR_ERR(dphy->pll_ref_clk); + goto err_put_psm_clk; + } + + if (dphy->ops->probe) { + ret = dphy->ops->probe(dphy); + if (ret) + goto err_put_pll_ref_clk; + } + + return dphy; + +err_put_pll_ref_clk: + clk_put(dphy->pll_ref_clk); + +err_put_psm_clk: + clk_put(dphy->psm_clk); + + return ERR_PTR(ret); +} + +static void cdns_dphy_remove(struct cdns_dphy *dphy) +{ + if (dphy->ops->remove) + dphy->ops->remove(dphy); + + clk_put(dphy->pll_ref_clk); + clk_put(dphy->psm_clk); +} + +static int cdns_dsi_drm_probe(struct platform_device *pdev) +{ + struct cdns_dsi *dsi; + struct cdns_dsi_input *input; + struct resource *res; + int ret, irq; + u32 val; + + dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + + platform_set_drvdata(pdev, dsi); + + input = &dsi->input; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + dsi->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dsi->regs)) + return PTR_ERR(dsi->regs); + + dsi->dsi_p_clk = devm_clk_get(&pdev->dev, "dsi_p_clk"); + if (IS_ERR(dsi->dsi_p_clk)) + return PTR_ERR(dsi->dsi_p_clk); + + dsi->dsi_p_rst = devm_reset_control_get_optional_exclusive(&pdev->dev, + "dsi_p_rst"); + if (IS_ERR(dsi->dsi_p_rst)) + return PTR_ERR(dsi->dsi_p_rst); + + dsi->dsi_sys_clk = devm_clk_get(&pdev->dev, "dsi_sys_clk"); + if (IS_ERR(dsi->dsi_sys_clk)) + return PTR_ERR(dsi->dsi_sys_clk); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + dsi->dphy = cdns_dphy_probe(pdev); + if (IS_ERR(dsi->dphy)) + return PTR_ERR(dsi->dphy); + + ret = clk_prepare_enable(dsi->dsi_p_clk); + if (ret) + goto err_remove_dphy; + + val = readl(dsi->regs + ID_REG); + if (REV_VENDOR_ID(val) != 0xcad) { + dev_err(&pdev->dev, "invalid vendor id\n"); + ret = -EINVAL; + goto err_disable_pclk; + } + + val = readl(dsi->regs + IP_CONF); + dsi->direct_cmd_fifo_depth = 1 << (DIRCMD_FIFO_DEPTH(val) + 2); + dsi->rx_fifo_depth = RX_FIFO_DEPTH(val); + init_completion(&dsi->direct_cmd_comp); + + writel(0, dsi->regs + MCTL_MAIN_DATA_CTL); + writel(0, dsi->regs + MCTL_MAIN_EN); + writel(0, dsi->regs + MCTL_MAIN_PHY_CTL); + + /* + * We only support the DPI input, so force input->id to + * CDNS_DPI_INPUT. + */ + input->id = CDNS_DPI_INPUT; + input->bridge.funcs = &cdns_dsi_bridge_funcs; + input->bridge.of_node = pdev->dev.of_node; + + /* Mask all interrupts before registering the IRQ handler. */ + writel(0, dsi->regs + MCTL_MAIN_STS_CTL); + writel(0, dsi->regs + MCTL_DPHY_ERR_CTL1); + writel(0, dsi->regs + CMD_MODE_STS_CTL); + writel(0, dsi->regs + DIRECT_CMD_STS_CTL); + writel(0, dsi->regs + DIRECT_CMD_RD_STS_CTL); + writel(0, dsi->regs + VID_MODE_STS_CTL); + writel(0, dsi->regs + TVG_STS_CTL); + writel(0, dsi->regs + DPI_IRQ_EN); + ret = devm_request_irq(&pdev->dev, irq, cdns_dsi_interrupt, 0, + dev_name(&pdev->dev), dsi); + if (ret) + goto err_disable_pclk; + + pm_runtime_enable(&pdev->dev); + dsi->base.dev = &pdev->dev; + dsi->base.ops = &cdns_dsi_ops; + + ret = mipi_dsi_host_register(&dsi->base); + if (ret) + goto err_disable_runtime_pm; + + clk_disable_unprepare(dsi->dsi_p_clk); + + return 0; + +err_disable_runtime_pm: + pm_runtime_disable(&pdev->dev); + +err_disable_pclk: + clk_disable_unprepare(dsi->dsi_p_clk); + +err_remove_dphy: + cdns_dphy_remove(dsi->dphy); + + return ret; +} + +static int cdns_dsi_drm_remove(struct platform_device *pdev) +{ + struct cdns_dsi *dsi = platform_get_drvdata(pdev); + + mipi_dsi_host_unregister(&dsi->base); + pm_runtime_disable(&pdev->dev); + cdns_dphy_remove(dsi->dphy); + + return 0; +} + +static const struct of_device_id cdns_dsi_of_match[] = { + { .compatible = "cdns,dsi" }, + { }, +}; + +static struct platform_driver cdns_dsi_platform_driver = { + .probe = cdns_dsi_drm_probe, + .remove = cdns_dsi_drm_remove, + .driver = { + .name = "cdns-dsi", + .of_match_table = cdns_dsi_of_match, + .pm = &cdns_dsi_pm_ops, + }, +}; +module_platform_driver(cdns_dsi_platform_driver); + +MODULE_AUTHOR("Boris Brezillon <boris.brezillon@bootlin.com>"); +MODULE_DESCRIPTION("Cadence DSI driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:cdns-dsi"); + diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c index 498d594..9837c8d 100644 --- a/drivers/gpu/drm/bridge/dumb-vga-dac.c +++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c @@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector) } drm_mode_connector_update_edid_property(connector, edid); - return drm_add_edid_modes(connector, edid); + ret = drm_add_edid_modes(connector, edid); + kfree(edid); + return ret; fallback: /* diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index 86789f8..7ab3604 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -17,6 +17,7 @@ #include <linux/clk.h> #include <linux/delay.h> +#include <linux/extcon.h> #include <linux/gpio/consumer.h> #include <linux/i2c.h> #include <linux/interrupt.h> @@ -25,6 +26,7 @@ #include <linux/list.h> #include <linux/module.h> #include <linux/mutex.h> +#include <linux/of_graph.h> #include <linux/regulator/consumer.h> #include <linux/slab.h> @@ -81,6 +83,10 @@ struct sii8620 { struct edid *edid; unsigned int gen2_write_burst:1; enum sii8620_mt_state mt_state; + struct extcon_dev *extcon; + struct notifier_block extcon_nb; + struct work_struct extcon_wq; + int cable_state; struct list_head mt_queue; struct { int r_size; @@ -2170,6 +2176,77 @@ static void sii8620_init_rcp_input_dev(struct sii8620 *ctx) ctx->rc_dev = rc_dev; } +static void sii8620_cable_out(struct sii8620 *ctx) +{ + disable_irq(to_i2c_client(ctx->dev)->irq); + sii8620_hw_off(ctx); +} + +static void sii8620_extcon_work(struct work_struct *work) +{ + struct sii8620 *ctx = + container_of(work, struct sii8620, extcon_wq); + int state = extcon_get_state(ctx->extcon, EXTCON_DISP_MHL); + + if (state == ctx->cable_state) + return; + + ctx->cable_state = state; + + if (state > 0) + sii8620_cable_in(ctx); + else + sii8620_cable_out(ctx); +} + +static int sii8620_extcon_notifier(struct notifier_block *self, + unsigned long event, void *ptr) +{ + struct sii8620 *ctx = + container_of(self, struct sii8620, extcon_nb); + + schedule_work(&ctx->extcon_wq); + + return NOTIFY_DONE; +} + +static int sii8620_extcon_init(struct sii8620 *ctx) +{ + struct extcon_dev *edev; + struct device_node *musb, *muic; + int ret; + + /* get micro-USB connector node */ + musb = of_graph_get_remote_node(ctx->dev->of_node, 1, -1); + /* next get micro-USB Interface Controller node */ + muic = of_get_next_parent(musb); + + if (!muic) { + dev_info(ctx->dev, "no extcon found, switching to 'always on' mode\n"); + return 0; + } + + edev = extcon_find_edev_by_node(muic); + of_node_put(muic); + if (IS_ERR(edev)) { + if (PTR_ERR(edev) == -EPROBE_DEFER) + return -EPROBE_DEFER; + dev_err(ctx->dev, "Invalid or missing extcon\n"); + return PTR_ERR(edev); + } + + ctx->extcon = edev; + ctx->extcon_nb.notifier_call = sii8620_extcon_notifier; + INIT_WORK(&ctx->extcon_wq, sii8620_extcon_work); + ret = extcon_register_notifier(edev, EXTCON_DISP_MHL, &ctx->extcon_nb); + if (ret) { + dev_err(ctx->dev, "failed to register notifier for MHL\n"); + return ret; + } + + return 0; +} + static inline struct sii8620 *bridge_to_sii8620(struct drm_bridge *bridge) { return container_of(bridge, struct sii8620, bridge); @@ -2302,13 +2379,20 @@ static int sii8620_probe(struct i2c_client *client, if (ret) return ret; + ret = sii8620_extcon_init(ctx); + if (ret < 0) { + dev_err(ctx->dev, "failed to initialize EXTCON\n"); + return ret; + } + i2c_set_clientdata(client, ctx); ctx->bridge.funcs = &sii8620_bridge_funcs; ctx->bridge.of_node = dev->of_node; drm_bridge_add(&ctx->bridge); - sii8620_cable_in(ctx); + if (!ctx->extcon) + sii8620_cable_in(ctx); return 0; } @@ -2317,8 +2401,15 @@ static int sii8620_remove(struct i2c_client *client) { struct sii8620 *ctx = i2c_get_clientdata(client); - disable_irq(to_i2c_client(ctx->dev)->irq); - sii8620_hw_off(ctx); + if (ctx->extcon) { + extcon_unregister_notifier(ctx->extcon, EXTCON_DISP_MHL, + &ctx->extcon_nb); + flush_work(&ctx->extcon_wq); + if (ctx->cable_state > 0) + sii8620_cable_out(ctx); + } else { + sii8620_cable_out(ctx); + } drm_bridge_remove(&ctx->bridge); return 0; diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c index 3b7e5c5..8f9c8a6 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi-i2s-audio.c @@ -152,7 +152,6 @@ static struct platform_driver snd_dw_hdmi_driver = { .remove = snd_dw_hdmi_remove, .driver = { .name = DRIVER_NAME, - .owner = THIS_MODULE, }, }; module_platform_driver(snd_dw_hdmi_driver); diff --git a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c index 226171a..fd79996 100644 --- a/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c +++ b/drivers/gpu/drm/bridge/synopsys/dw-mipi-dsi.c @@ -1,12 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2016, Fuzhou Rockchip Electronics Co., Ltd * Copyright (C) STMicroelectronics SA 2017 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * * Modified by Philippe Cornu <philippe.cornu@st.com> * This generic Synopsys DesignWare MIPI DSI host driver is based on the * Rockchip version from rockchip/dw-mipi-dsi.c with phy & bridge APIs. @@ -775,20 +771,20 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, clk_prepare_enable(dsi->pclk); - ret = phy_ops->get_lane_mbps(priv_data, mode, dsi->mode_flags, + ret = phy_ops->get_lane_mbps(priv_data, adjusted_mode, dsi->mode_flags, dsi->lanes, dsi->format, &dsi->lane_mbps); if (ret) DRM_DEBUG_DRIVER("Phy get_lane_mbps() failed\n"); pm_runtime_get_sync(dsi->dev); dw_mipi_dsi_init(dsi); - dw_mipi_dsi_dpi_config(dsi, mode); + dw_mipi_dsi_dpi_config(dsi, adjusted_mode); dw_mipi_dsi_packet_handler_config(dsi); dw_mipi_dsi_video_mode_config(dsi); - dw_mipi_dsi_video_packet_config(dsi, mode); + dw_mipi_dsi_video_packet_config(dsi, adjusted_mode); dw_mipi_dsi_command_mode_config(dsi); - dw_mipi_dsi_line_timer_config(dsi, mode); - dw_mipi_dsi_vertical_timing_config(dsi, mode); + dw_mipi_dsi_line_timer_config(dsi, adjusted_mode); + dw_mipi_dsi_vertical_timing_config(dsi, adjusted_mode); dw_mipi_dsi_dphy_init(dsi); dw_mipi_dsi_dphy_timing_config(dsi); @@ -802,7 +798,7 @@ static void dw_mipi_dsi_bridge_mode_set(struct drm_bridge *bridge, dw_mipi_dsi_dphy_enable(dsi); - dw_mipi_dsi_wait_for_two_frames(mode); + dw_mipi_dsi_wait_for_two_frames(adjusted_mode); /* Switch to cmd mode for panel-bridge pre_enable & panel prepare */ dw_mipi_dsi_set_mode(dsi, 0); diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 08ab7d6a..0fd9cf2 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1102,7 +1102,7 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge, return true; } -static int tc_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { /* DPI interface clock limitation: upto 154 MHz */ diff --git a/drivers/gpu/drm/bridge/thc63lvd1024.c b/drivers/gpu/drm/bridge/thc63lvd1024.c new file mode 100644 index 0000000..c8b9edd --- /dev/null +++ b/drivers/gpu/drm/bridge/thc63lvd1024.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * THC63LVD1024 LVDS to parallel data DRM bridge driver. + * + * Copyright (C) 2018 Jacopo Mondi <jacopo+renesas@jmondi.org> + */ + +#include <drm/drmP.h> +#include <drm/drm_bridge.h> +#include <drm/drm_panel.h> + +#include <linux/gpio/consumer.h> +#include <linux/of_graph.h> +#include <linux/regulator/consumer.h> +#include <linux/slab.h> + +enum thc63_ports { + THC63_LVDS_IN0, + THC63_LVDS_IN1, + THC63_RGB_OUT0, + THC63_RGB_OUT1, +}; + +struct thc63_dev { + struct device *dev; + + struct regulator *vcc; + + struct gpio_desc *pdwn; + struct gpio_desc *oe; + + struct drm_bridge bridge; + struct drm_bridge *next; +}; + +static inline struct thc63_dev *to_thc63(struct drm_bridge *bridge) +{ + return container_of(bridge, struct thc63_dev, bridge); +} + +static int thc63_attach(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + + return drm_bridge_attach(bridge->encoder, thc63->next, bridge); +} + +static void thc63_enable(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + int ret; + + ret = regulator_enable(thc63->vcc); + if (ret) { + dev_err(thc63->dev, + "Failed to enable regulator \"vcc\": %d\n", ret); + return; + } + + gpiod_set_value(thc63->pdwn, 0); + gpiod_set_value(thc63->oe, 1); +} + +static void thc63_disable(struct drm_bridge *bridge) +{ + struct thc63_dev *thc63 = to_thc63(bridge); + int ret; + + gpiod_set_value(thc63->oe, 0); + gpiod_set_value(thc63->pdwn, 1); + + ret = regulator_disable(thc63->vcc); + if (ret) + dev_err(thc63->dev, + "Failed to disable regulator \"vcc\": %d\n", ret); +} + +static const struct drm_bridge_funcs thc63_bridge_func = { + .attach = thc63_attach, + .enable = thc63_enable, + .disable = thc63_disable, +}; + +static int thc63_parse_dt(struct thc63_dev *thc63) +{ + struct device_node *thc63_out; + struct device_node *remote; + + thc63_out = of_graph_get_endpoint_by_regs(thc63->dev->of_node, + THC63_RGB_OUT0, -1); + if (!thc63_out) { + dev_err(thc63->dev, "Missing endpoint in port@%u\n", + THC63_RGB_OUT0); + return -ENODEV; + } + + remote = of_graph_get_remote_port_parent(thc63_out); + of_node_put(thc63_out); + if (!remote) { + dev_err(thc63->dev, "Endpoint in port@%u unconnected\n", + THC63_RGB_OUT0); + return -ENODEV; + } + + if (!of_device_is_available(remote)) { + dev_err(thc63->dev, "port@%u remote endpoint is disabled\n", + THC63_RGB_OUT0); + of_node_put(remote); + return -ENODEV; + } + + thc63->next = of_drm_find_bridge(remote); + of_node_put(remote); + if (!thc63->next) + return -EPROBE_DEFER; + + return 0; +} + +static int thc63_gpio_init(struct thc63_dev *thc63) +{ + thc63->oe = devm_gpiod_get_optional(thc63->dev, "oe", GPIOD_OUT_LOW); + if (IS_ERR(thc63->oe)) { + dev_err(thc63->dev, "Unable to get \"oe-gpios\": %ld\n", + PTR_ERR(thc63->oe)); + return PTR_ERR(thc63->oe); + } + + thc63->pdwn = devm_gpiod_get_optional(thc63->dev, "powerdown", + GPIOD_OUT_HIGH); + if (IS_ERR(thc63->pdwn)) { + dev_err(thc63->dev, "Unable to get \"powerdown-gpios\": %ld\n", + PTR_ERR(thc63->pdwn)); + return PTR_ERR(thc63->pdwn); + } + + return 0; +} + +static int thc63_probe(struct platform_device *pdev) +{ + struct thc63_dev *thc63; + int ret; + + thc63 = devm_kzalloc(&pdev->dev, sizeof(*thc63), GFP_KERNEL); + if (!thc63) + return -ENOMEM; + + thc63->dev = &pdev->dev; + platform_set_drvdata(pdev, thc63); + + thc63->vcc = devm_regulator_get_optional(thc63->dev, "vcc"); + if (IS_ERR(thc63->vcc)) { + if (PTR_ERR(thc63->vcc) == -EPROBE_DEFER) + return -EPROBE_DEFER; + + dev_err(thc63->dev, "Unable to get \"vcc\" supply: %ld\n", + PTR_ERR(thc63->vcc)); + return PTR_ERR(thc63->vcc); + } + + ret = thc63_gpio_init(thc63); + if (ret) + return ret; + + ret = thc63_parse_dt(thc63); + if (ret) + return ret; + + thc63->bridge.driver_private = thc63; + thc63->bridge.of_node = pdev->dev.of_node; + thc63->bridge.funcs = &thc63_bridge_func; + + drm_bridge_add(&thc63->bridge); + + return 0; +} + +static int thc63_remove(struct platform_device *pdev) +{ + struct thc63_dev *thc63 = platform_get_drvdata(pdev); + + drm_bridge_remove(&thc63->bridge); + + return 0; +} + +static const struct of_device_id thc63_match[] = { + { .compatible = "thine,thc63lvd1024", }, + { }, +}; +MODULE_DEVICE_TABLE(of, thc63_match); + +static struct platform_driver thc63_driver = { + .probe = thc63_probe, + .remove = thc63_remove, + .driver = { + .name = "thc63lvd1024", + .of_match_table = thc63_match, + }, +}; +module_platform_driver(thc63_driver); + +MODULE_AUTHOR("Jacopo Mondi <jacopo@jmondi.org>"); +MODULE_DESCRIPTION("Thine THC63LVD1024 LVDS decoder DRM bridge driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index 7d25c42..26a22f5 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -783,6 +783,8 @@ static int drm_atomic_plane_set_property(struct drm_plane *plane, state->src_w = val; } else if (property == config->prop_src_h) { state->src_h = val; + } else if (property == plane->alpha_property) { + state->alpha = val; } else if (property == plane->rotation_property) { if (!is_power_of_2(val & DRM_MODE_ROTATE_MASK)) return -EINVAL; @@ -848,6 +850,8 @@ drm_atomic_plane_get_property(struct drm_plane *plane, *val = state->src_w; } else if (property == config->prop_src_h) { *val = state->src_h; + } else if (property == plane->alpha_property) { + *val = state->alpha; } else if (property == plane->rotation_property) { *val = state->rotation; } else if (property == plane->zpos_property) { @@ -1421,7 +1425,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state, { struct drm_plane *plane = plane_state->plane; struct drm_crtc_state *crtc_state; - + /* Nothing to do for same crtc*/ + if (plane_state->crtc == crtc) + return 0; if (plane_state->crtc) { crtc_state = drm_atomic_get_crtc_state(plane_state->state, plane_state->crtc); @@ -1492,6 +1498,14 @@ EXPORT_SYMBOL(drm_atomic_set_fb_for_plane); * Otherwise, if &drm_plane_state.fence is not set this function we just set it * with the received implicit fence. In both cases this function consumes a * reference for @fence. + * + * This way explicit fencing can be used to overrule implicit fencing, which is + * important to make explicit fencing use-cases work: One example is using one + * buffer for 2 screens with different refresh rates. Implicit fencing will + * clamp rendering to the refresh rate of the slower screen, whereas explicit + * fence allows 2 independent render and display loops on a single buffer. If a + * driver allows obeys both implicit and explicit fences for plane updates, then + * it will break all the benefits of explicit fencing. */ void drm_atomic_set_fence_for_plane(struct drm_plane_state *plane_state, @@ -1702,11 +1716,15 @@ int drm_atomic_check_only(struct drm_atomic_state *state) } } - if (config->funcs->atomic_check) + if (config->funcs->atomic_check) { ret = config->funcs->atomic_check(state->dev, state); - if (ret) - return ret; + if (ret) { + DRM_DEBUG_ATOMIC("atomic driver check for %p failed: %d\n", + state, ret); + return ret; + } + } if (!state->allow_modeset) { for_each_new_crtc_in_state(state, crtc, crtc_state, i) { diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c index c356545..130da51 100644 --- a/drivers/gpu/drm/drm_atomic_helper.c +++ b/drivers/gpu/drm/drm_atomic_helper.c @@ -766,7 +766,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state, if (crtc_state->enable) drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2); - plane_state->visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); + plane_state->visible = drm_rect_clip_scaled(src, dst, &clip); drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation); @@ -875,6 +875,11 @@ EXPORT_SYMBOL(drm_atomic_helper_check_planes); * functions depend upon an updated adjusted_mode.clock to e.g. properly compute * watermarks. * + * Note that zpos normalization will add all enable planes to the state which + * might not desired for some drivers. + * For example enable/disable of a cursor plane which have fixed zpos value + * would trigger all other enabled planes to be forced to the state change. + * * RETURNS: * Zero for success or -errno */ @@ -887,6 +892,12 @@ int drm_atomic_helper_check(struct drm_device *dev, if (ret) return ret; + if (dev->mode_config.normalize_zpos) { + ret = drm_atomic_normalize_zpos(dev, state); + if (ret) + return ret; + } + ret = drm_atomic_helper_check_planes(dev, state); if (ret) return ret; @@ -1561,6 +1572,17 @@ void drm_atomic_helper_async_commit(struct drm_device *dev, for_each_new_plane_in_state(state, plane, plane_state, i) { funcs = plane->helper_private; funcs->atomic_async_update(plane, plane_state); + + /* + * ->atomic_async_update() is supposed to update the + * plane->state in-place, make sure at least common + * properties have been properly updated. + */ + WARN_ON_ONCE(plane->state->fb != plane_state->fb); + WARN_ON_ONCE(plane->state->crtc_x != plane_state->crtc_x); + WARN_ON_ONCE(plane->state->crtc_y != plane_state->crtc_y); + WARN_ON_ONCE(plane->state->src_x != plane_state->src_x); + WARN_ON_ONCE(plane->state->src_y != plane_state->src_y); } } EXPORT_SYMBOL(drm_atomic_helper_async_commit); @@ -2659,7 +2681,7 @@ int drm_atomic_helper_disable_plane(struct drm_plane *plane, goto fail; } - if (plane_state->crtc && (plane == plane->crtc->cursor)) + if (plane_state->crtc && plane_state->crtc->cursor == plane) plane_state->state->legacy_cursor_update = true; ret = __drm_atomic_helper_disable_plane(plane, plane_state); @@ -2881,31 +2903,9 @@ commit: return 0; } -/** - * drm_atomic_helper_disable_all - disable all currently active outputs - * @dev: DRM device - * @ctx: lock acquisition context - * - * Loops through all connectors, finding those that aren't turned off and then - * turns them off by setting their DPMS mode to OFF and deactivating the CRTC - * that they are connected to. - * - * This is used for example in suspend/resume to disable all currently active - * functions when suspending. If you just want to shut down everything at e.g. - * driver unload, look at drm_atomic_helper_shutdown(). - * - * Note that if callers haven't already acquired all modeset locks this might - * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). - * - * Returns: - * 0 on success or a negative error code on failure. - * - * See also: - * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and - * drm_atomic_helper_shutdown(). - */ -int drm_atomic_helper_disable_all(struct drm_device *dev, - struct drm_modeset_acquire_ctx *ctx) +static int __drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx, + bool clean_old_fbs) { struct drm_atomic_state *state; struct drm_connector_state *conn_state; @@ -2957,8 +2957,11 @@ int drm_atomic_helper_disable_all(struct drm_device *dev, goto free; drm_atomic_set_fb_for_plane(plane_state, NULL); - plane_mask |= BIT(drm_plane_index(plane)); - plane->old_fb = plane->fb; + + if (clean_old_fbs) { + plane->old_fb = plane->fb; + plane_mask |= BIT(drm_plane_index(plane)); + } } ret = drm_atomic_commit(state); @@ -2969,6 +2972,34 @@ free: return ret; } +/** + * drm_atomic_helper_disable_all - disable all currently active outputs + * @dev: DRM device + * @ctx: lock acquisition context + * + * Loops through all connectors, finding those that aren't turned off and then + * turns them off by setting their DPMS mode to OFF and deactivating the CRTC + * that they are connected to. + * + * This is used for example in suspend/resume to disable all currently active + * functions when suspending. If you just want to shut down everything at e.g. + * driver unload, look at drm_atomic_helper_shutdown(). + * + * Note that if callers haven't already acquired all modeset locks this might + * return -EDEADLK, which must be handled by calling drm_modeset_backoff(). + * + * Returns: + * 0 on success or a negative error code on failure. + * + * See also: + * drm_atomic_helper_suspend(), drm_atomic_helper_resume() and + * drm_atomic_helper_shutdown(). + */ +int drm_atomic_helper_disable_all(struct drm_device *dev, + struct drm_modeset_acquire_ctx *ctx) +{ + return __drm_atomic_helper_disable_all(dev, ctx, false); +} EXPORT_SYMBOL(drm_atomic_helper_disable_all); /** @@ -2991,7 +3022,7 @@ void drm_atomic_helper_shutdown(struct drm_device *dev) while (1) { ret = drm_modeset_lock_all_ctx(dev, &ctx); if (!ret) - ret = drm_atomic_helper_disable_all(dev, &ctx); + ret = __drm_atomic_helper_disable_all(dev, &ctx, true); if (ret != -EDEADLK) break; @@ -3095,14 +3126,14 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, struct drm_connector_state *new_conn_state; struct drm_crtc *crtc; struct drm_crtc_state *new_crtc_state; - unsigned plane_mask = 0; - struct drm_device *dev = state->dev; - int ret; state->acquire_ctx = ctx; for_each_new_plane_in_state(state, plane, new_plane_state, i) { - plane_mask |= BIT(drm_plane_index(plane)); + WARN_ON(plane->crtc != new_plane_state->crtc); + WARN_ON(plane->fb != new_plane_state->fb); + WARN_ON(plane->old_fb); + state->planes[i].old_state = plane->state; } @@ -3112,11 +3143,7 @@ int drm_atomic_helper_commit_duplicated_state(struct drm_atomic_state *state, for_each_new_connector_in_state(state, connector, new_conn_state, i) state->connectors[i].old_state = connector->state; - ret = drm_atomic_commit(state); - if (plane_mask) - drm_atomic_clean_old_fb(dev, plane_mask, ret); - - return ret; + return drm_atomic_commit(state); } EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state); @@ -3484,6 +3511,10 @@ void drm_atomic_helper_plane_reset(struct drm_plane *plane) if (plane->state) { plane->state->plane = plane; plane->state->rotation = DRM_MODE_ROTATE_0; + + /* Reset the alpha value to fully opaque if it matters */ + if (plane->alpha_property) + plane->state->alpha = plane->alpha_property->values[1]; } } EXPORT_SYMBOL(drm_atomic_helper_plane_reset); diff --git a/drivers/gpu/drm/drm_blend.c b/drivers/gpu/drm/drm_blend.c index 5a81e1b..a16a74d 100644 --- a/drivers/gpu/drm/drm_blend.c +++ b/drivers/gpu/drm/drm_blend.c @@ -88,6 +88,13 @@ * On top of this basic transformation additional properties can be exposed by * the driver: * + * alpha: + * Alpha is setup with drm_plane_create_alpha_property(). It controls the + * plane-wide opacity, from transparent (0) to opaque (0xffff). It can be + * combined with pixel alpha. + * The pixel values in the framebuffers are expected to not be + * pre-multiplied by the global alpha associated to the plane. + * * rotation: * Rotation is set up with drm_plane_create_rotation_property(). It adds a * rotation and reflection step between the source and destination rectangles. @@ -106,6 +113,38 @@ */ /** + * drm_plane_create_alpha_property - create a new alpha property + * @plane: drm plane + * + * This function creates a generic, mutable, alpha property and enables support + * for it in the DRM core. It is attached to @plane. + * + * The alpha property will be allowed to be within the bounds of 0 + * (transparent) to 0xffff (opaque). + * + * Returns: + * 0 on success, negative error code on failure. + */ +int drm_plane_create_alpha_property(struct drm_plane *plane) +{ + struct drm_property *prop; + + prop = drm_property_create_range(plane->dev, 0, "alpha", + 0, DRM_BLEND_ALPHA_OPAQUE); + if (!prop) + return -ENOMEM; + + drm_object_attach_property(&plane->base, prop, DRM_BLEND_ALPHA_OPAQUE); + plane->alpha_property = prop; + + if (plane->state) + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; + + return 0; +} +EXPORT_SYMBOL(drm_plane_create_alpha_property); + +/** * drm_plane_create_rotation_property - create a new rotation property * @plane: drm plane * @rotation: initial value of the rotation property diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c index 4ff0646..b97e2de 100644 --- a/drivers/gpu/drm/drm_color_mgmt.c +++ b/drivers/gpu/drm/drm_color_mgmt.c @@ -417,8 +417,8 @@ int drm_plane_create_color_properties(struct drm_plane *plane, { struct drm_device *dev = plane->dev; struct drm_property *prop; - struct drm_prop_enum_list enum_list[max(DRM_COLOR_ENCODING_MAX, - DRM_COLOR_RANGE_MAX)]; + struct drm_prop_enum_list enum_list[max_t(int, DRM_COLOR_ENCODING_MAX, + DRM_COLOR_RANGE_MAX)]; int i, len; if (WARN_ON(supported_encodings == 0 || diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b3cde89..9b9ba5d 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1069,7 +1069,7 @@ int drm_mode_create_tv_properties(struct drm_device *dev, goto nomem; for (i = 0; i < num_modes; i++) - drm_property_add_enum(dev->mode_config.tv_mode_property, i, + drm_property_add_enum(dev->mode_config.tv_mode_property, i, modes[i]); dev->mode_config.tv_brightness_property = @@ -1156,7 +1156,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, { struct drm_device *dev = connector->dev; struct drm_property *scaling_mode_property; - int i, j = 0; + int i; const unsigned valid_scaling_mode_mask = (1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1; @@ -1177,7 +1177,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector, if (!(BIT(i) & scaling_mode_mask)) continue; - ret = drm_property_add_enum(scaling_mode_property, j++, + ret = drm_property_add_enum(scaling_mode_property, drm_scaling_mode_enum_list[i].type, drm_scaling_mode_enum_list[i].name); @@ -1531,8 +1531,10 @@ static struct drm_encoder *drm_connector_get_encoder(struct drm_connector *conne return connector->encoder; } -static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, - const struct drm_file *file_priv) +static bool +drm_mode_expose_to_userspace(const struct drm_display_mode *mode, + const struct list_head *export_list, + const struct drm_file *file_priv) { /* * If user-space hasn't configured the driver to expose the stereo 3D @@ -1540,6 +1542,23 @@ static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode, */ if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode)) return false; + /* + * If user-space hasn't configured the driver to expose the modes + * with aspect-ratio, don't expose them. However if such a mode + * is unique, let it be exposed, but reset the aspect-ratio flags + * while preparing the list of user-modes. + */ + if (!file_priv->aspect_ratio_allowed) { + struct drm_display_mode *mode_itr; + + list_for_each_entry(mode_itr, export_list, export_head) + if (drm_mode_match(mode_itr, mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) + return false; + } return true; } @@ -1559,6 +1578,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, struct drm_mode_modeinfo u_mode; struct drm_mode_modeinfo __user *mode_ptr; uint32_t __user *encoder_ptr; + LIST_HEAD(export_list); if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -1607,21 +1627,31 @@ int drm_mode_getconnector(struct drm_device *dev, void *data, /* delayed so we get modes regardless of pre-fill_modes state */ list_for_each_entry(mode, &connector->modes, head) - if (drm_mode_expose_to_userspace(mode, file_priv)) + if (drm_mode_expose_to_userspace(mode, &export_list, + file_priv)) { + list_add_tail(&mode->export_head, &export_list); mode_count++; + } /* * This ioctl is called twice, once to determine how much space is * needed, and the 2nd time to fill it. + * The modes that need to be exposed to the user are maintained in the + * 'export_list'. When the ioctl is called first time to determine the, + * space, the export_list gets filled, to find the no.of modes. In the + * 2nd time, the user modes are filled, one by one from the export_list. */ if ((out_resp->count_modes >= mode_count) && mode_count) { copied = 0; mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr; - list_for_each_entry(mode, &connector->modes, head) { - if (!drm_mode_expose_to_userspace(mode, file_priv)) - continue; - + list_for_each_entry(mode, &export_list, export_head) { drm_mode_convert_to_umode(&u_mode, mode); + /* + * Reset aspect ratio flags of user-mode, if modes with + * aspect-ratio are not supported. + */ + if (!file_priv->aspect_ratio_allowed) + u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; if (copy_to_user(mode_ptr + copied, &u_mode, sizeof(u_mode))) { ret = -EFAULT; diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index 0358388..98a36e6 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -402,6 +402,7 @@ int drm_mode_getcrtc(struct drm_device *dev, { struct drm_mode_crtc *crtc_resp = data; struct drm_crtc *crtc; + struct drm_plane *plane; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EINVAL; @@ -410,34 +411,36 @@ int drm_mode_getcrtc(struct drm_device *dev, if (!crtc) return -ENOENT; + plane = crtc->primary; + crtc_resp->gamma_size = crtc->gamma_size; - drm_modeset_lock(&crtc->primary->mutex, NULL); - if (crtc->primary->state && crtc->primary->state->fb) - crtc_resp->fb_id = crtc->primary->state->fb->base.id; - else if (!crtc->primary->state && crtc->primary->fb) - crtc_resp->fb_id = crtc->primary->fb->base.id; + drm_modeset_lock(&plane->mutex, NULL); + if (plane->state && plane->state->fb) + crtc_resp->fb_id = plane->state->fb->base.id; + else if (!plane->state && plane->fb) + crtc_resp->fb_id = plane->fb->base.id; else crtc_resp->fb_id = 0; - if (crtc->primary->state) { - crtc_resp->x = crtc->primary->state->src_x >> 16; - crtc_resp->y = crtc->primary->state->src_y >> 16; + if (plane->state) { + crtc_resp->x = plane->state->src_x >> 16; + crtc_resp->y = plane->state->src_y >> 16; } - drm_modeset_unlock(&crtc->primary->mutex); + drm_modeset_unlock(&plane->mutex); drm_modeset_lock(&crtc->mutex, NULL); if (crtc->state) { if (crtc->state->enable) { drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->state->mode); crtc_resp->mode_valid = 1; - } else { crtc_resp->mode_valid = 0; } } else { crtc_resp->x = crtc->x; crtc_resp->y = crtc->y; + if (crtc->enabled) { drm_mode_convert_to_umode(&crtc_resp->mode, &crtc->mode); crtc_resp->mode_valid = 1; @@ -446,6 +449,8 @@ int drm_mode_getcrtc(struct drm_device *dev, crtc_resp->mode_valid = 0; } } + if (!file_priv->aspect_ratio_allowed) + crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; drm_modeset_unlock(&crtc->mutex); return 0; @@ -471,7 +476,7 @@ static int __drm_mode_set_config_internal(struct drm_mode_set *set, ret = crtc->funcs->set_config(set, ctx); if (ret == 0) { - crtc->primary->crtc = crtc; + crtc->primary->crtc = fb ? crtc : NULL; crtc->primary->fb = fb; } @@ -554,6 +559,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, struct drm_mode_config *config = &dev->mode_config; struct drm_mode_crtc *crtc_req = data; struct drm_crtc *crtc; + struct drm_plane *plane; struct drm_connector **connector_set = NULL, *connector; struct drm_framebuffer *fb = NULL; struct drm_display_mode *mode = NULL; @@ -580,22 +586,33 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data, } DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name); + plane = crtc->primary; + mutex_lock(&crtc->dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE); retry: ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx); if (ret) goto out; + if (crtc_req->mode_valid) { /* If we have a mode we need a framebuffer. */ /* If we pass -1, set the mode with the currently bound fb */ if (crtc_req->fb_id == -1) { - if (!crtc->primary->fb) { + struct drm_framebuffer *old_fb; + + if (plane->state) + old_fb = plane->state->fb; + else + old_fb = plane->fb; + + if (!old_fb) { DRM_DEBUG_KMS("CRTC doesn't have current FB\n"); ret = -EINVAL; goto out; } - fb = crtc->primary->fb; + + fb = old_fb; /* Make refcounting symmetric with the lookup path. */ drm_framebuffer_get(fb); } else { @@ -613,6 +630,13 @@ retry: ret = -ENOMEM; goto out; } + if (!file_priv->aspect_ratio_allowed && + (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) { + DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n"); + ret = -EINVAL; + goto out; + } + ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode); if (ret) { @@ -627,8 +651,8 @@ retry: * match real hardware capabilities. Skip the check in that * case. */ - if (!crtc->primary->format_default) { - ret = drm_plane_check_pixel_format(crtc->primary, + if (!plane->format_default) { + ret = drm_plane_check_pixel_format(plane, fb->format->format, fb->modifier); if (ret) { diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 3c2b828..5d307b2 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -220,3 +220,5 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, /* drm_edid.c */ void drm_mode_fixup_1366x768(struct drm_display_mode *mode); +void drm_reset_display_info(struct drm_connector *connector); +u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid); diff --git a/drivers/gpu/drm/drm_dp_aux_dev.c b/drivers/gpu/drm/drm_dp_aux_dev.c index 0530442..0e4f25d 100644 --- a/drivers/gpu/drm/drm_dp_aux_dev.c +++ b/drivers/gpu/drm/drm_dp_aux_dev.c @@ -177,8 +177,9 @@ static ssize_t auxdev_read_iter(struct kiocb *iocb, struct iov_iter *to) res = pos - iocb->ki_pos; iocb->ki_pos = pos; - atomic_dec(&aux_dev->usecount); - wake_up_atomic_t(&aux_dev->usecount); + if (atomic_dec_and_test(&aux_dev->usecount)) + wake_up_var(&aux_dev->usecount); + return res; } @@ -218,8 +219,9 @@ static ssize_t auxdev_write_iter(struct kiocb *iocb, struct iov_iter *from) res = pos - iocb->ki_pos; iocb->ki_pos = pos; - atomic_dec(&aux_dev->usecount); - wake_up_atomic_t(&aux_dev->usecount); + if (atomic_dec_and_test(&aux_dev->usecount)) + wake_up_var(&aux_dev->usecount); + return res; } @@ -277,8 +279,7 @@ void drm_dp_aux_unregister_devnode(struct drm_dp_aux *aux) mutex_unlock(&aux_idr_mutex); atomic_dec(&aux_dev->usecount); - wait_on_atomic_t(&aux_dev->usecount, atomic_t_wait, - TASK_UNINTERRUPTIBLE); + wait_var_event(&aux_dev->usecount, !atomic_read(&aux_dev->usecount)); minor = aux_dev->index; if (aux_dev->dev) diff --git a/drivers/gpu/drm/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/drm_dp_dual_mode_helper.c index 02a5092..e7f4fe2 100644 --- a/drivers/gpu/drm/drm_dp_dual_mode_helper.c +++ b/drivers/gpu/drm/drm_dp_dual_mode_helper.c @@ -350,19 +350,44 @@ int drm_dp_dual_mode_set_tmds_output(enum drm_dp_dual_mode_type type, { uint8_t tmds_oen = enable ? 0 : DP_DUAL_MODE_TMDS_DISABLE; ssize_t ret; + int retry; if (type < DRM_DP_DUAL_MODE_TYPE2_DVI) return 0; - ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, - &tmds_oen, sizeof(tmds_oen)); - if (ret) { - DRM_DEBUG_KMS("Failed to %s TMDS output buffers\n", - enable ? "enable" : "disable"); - return ret; + /* + * LSPCON adapters in low-power state may ignore the first write, so + * read back and verify the written value a few times. + */ + for (retry = 0; retry < 3; retry++) { + uint8_t tmp; + + ret = drm_dp_dual_mode_write(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmds_oen, sizeof(tmds_oen)); + if (ret) { + DRM_DEBUG_KMS("Failed to %s TMDS output buffers (%d attempts)\n", + enable ? "enable" : "disable", + retry + 1); + return ret; + } + + ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_TMDS_OEN, + &tmp, sizeof(tmp)); + if (ret) { + DRM_DEBUG_KMS("I2C read failed during TMDS output buffer %s (%d attempts)\n", + enable ? "enabling" : "disabling", + retry + 1); + return ret; + } + + if (tmp == tmds_oen) + return 0; } - return 0; + DRM_DEBUG_KMS("I2C write value mismatch during TMDS output buffer %s\n", + enable ? "enabling" : "disabling"); + + return -EIO; } EXPORT_SYMBOL(drm_dp_dual_mode_set_tmds_output); diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c index ffe14ec..36c7609 100644 --- a/drivers/gpu/drm/drm_dp_helper.c +++ b/drivers/gpu/drm/drm_dp_helper.c @@ -119,18 +119,32 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis); void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14) udelay(100); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay); void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) { - if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0) + int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] & + DP_TRAINING_AUX_RD_MASK; + + if (rd_interval > 4) + DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n", + rd_interval); + + if (rd_interval == 0) udelay(400); else - mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4); + mdelay(rd_interval * 4); } EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay); diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 6fac412..6588306 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2941,12 +2941,14 @@ static void drm_dp_mst_dump_mstb(struct seq_file *m, } } +#define DP_PAYLOAD_TABLE_SIZE 64 + static bool dump_dp_payload_table(struct drm_dp_mst_topology_mgr *mgr, char *buf) { int i; - for (i = 0; i < 64; i += 16) { + for (i = 0; i < DP_PAYLOAD_TABLE_SIZE; i += 16) { if (drm_dp_dpcd_read(mgr->aux, DP_PAYLOAD_TABLE_UPDATE_STATUS + i, &buf[i], 16) != 16) @@ -3015,7 +3017,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, mutex_lock(&mgr->lock); if (mgr->mst_primary) { - u8 buf[64]; + u8 buf[DP_PAYLOAD_TABLE_SIZE]; int ret; ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE); @@ -3033,8 +3035,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, seq_printf(m, " revision: hw: %x.%x sw: %x.%x\n", buf[0x9] >> 4, buf[0x9] & 0xf, buf[0xa], buf[0xb]); if (dump_dp_payload_table(mgr, buf)) - seq_printf(m, "payload table: %*ph\n", 63, buf); - + seq_printf(m, "payload table: %*ph\n", DP_PAYLOAD_TABLE_SIZE, buf); } mutex_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a1b9338..f6910eb 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -32,6 +32,7 @@ #include <linux/moduleparam.h> #include <linux/mount.h> #include <linux/slab.h> +#include <linux/srcu.h> #include <drm/drm_drv.h> #include <drm/drmP.h> @@ -75,6 +76,8 @@ static bool drm_core_init_complete = false; static struct dentry *drm_debugfs_root; +DEFINE_STATIC_SRCU(drm_unplug_srcu); + /* * DRM Minors * A DRM device can provide several char-dev interfaces on the DRM-Major. Each @@ -96,8 +99,6 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, return &dev->primary; case DRM_MINOR_RENDER: return &dev->render; - case DRM_MINOR_CONTROL: - return &dev->control; default: BUG(); } @@ -318,18 +319,51 @@ void drm_put_dev(struct drm_device *dev) } EXPORT_SYMBOL(drm_put_dev); -static void drm_device_set_unplugged(struct drm_device *dev) +/** + * drm_dev_enter - Enter device critical section + * @dev: DRM device + * @idx: Pointer to index that will be passed to the matching drm_dev_exit() + * + * This function marks and protects the beginning of a section that should not + * be entered after the device has been unplugged. The section end is marked + * with drm_dev_exit(). Calls to this function can be nested. + * + * Returns: + * True if it is OK to enter the section, false otherwise. + */ +bool drm_dev_enter(struct drm_device *dev, int *idx) { - smp_wmb(); - atomic_set(&dev->unplugged, 1); + *idx = srcu_read_lock(&drm_unplug_srcu); + + if (dev->unplugged) { + srcu_read_unlock(&drm_unplug_srcu, *idx); + return false; + } + + return true; } +EXPORT_SYMBOL(drm_dev_enter); + +/** + * drm_dev_exit - Exit device critical section + * @idx: index returned from drm_dev_enter() + * + * This function marks the end of a section that should not be entered after + * the device has been unplugged. + */ +void drm_dev_exit(int idx) +{ + srcu_read_unlock(&drm_unplug_srcu, idx); +} +EXPORT_SYMBOL(drm_dev_exit); /** * drm_dev_unplug - unplug a DRM device * @dev: DRM device * * This unplugs a hotpluggable DRM device, which makes it inaccessible to - * userspace operations. Entry-points can use drm_dev_is_unplugged(). This + * userspace operations. Entry-points can use drm_dev_enter() and + * drm_dev_exit() to protect device resources in a race free manner. This * essentially unregisters the device like drm_dev_unregister(), but can be * called while there are still open users of @dev. */ @@ -338,10 +372,18 @@ void drm_dev_unplug(struct drm_device *dev) drm_dev_unregister(dev); mutex_lock(&drm_global_mutex); - drm_device_set_unplugged(dev); if (dev->open_count == 0) drm_dev_put(dev); mutex_unlock(&drm_global_mutex); + + /* + * After synchronizing any critical read section is guaranteed to see + * the new value of ->unplugged, and any critical section which might + * still have seen the old value of ->unplugged is guaranteed to have + * finished. + */ + dev->unplugged = true; + synchronize_srcu(&drm_unplug_srcu); } EXPORT_SYMBOL(drm_dev_unplug); @@ -523,7 +565,6 @@ err_ctxbitmap: err_minors: drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); drm_fs_inode_free(dev->anon_inode); err_free: mutex_destroy(&dev->master_mutex); @@ -559,7 +600,6 @@ void drm_dev_fini(struct drm_device *dev) drm_minor_free(dev, DRM_MINOR_PRIMARY); drm_minor_free(dev, DRM_MINOR_RENDER); - drm_minor_free(dev, DRM_MINOR_CONTROL); mutex_destroy(&dev->master_mutex); mutex_destroy(&dev->ctxlist_mutex); @@ -752,10 +792,6 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags) mutex_lock(&drm_global_mutex); - ret = drm_minor_register(dev, DRM_MINOR_CONTROL); - if (ret) - goto err_minors; - ret = drm_minor_register(dev, DRM_MINOR_RENDER); if (ret) goto err_minors; @@ -793,7 +829,6 @@ err_minors: remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); out_unlock: mutex_unlock(&drm_global_mutex); return ret; @@ -838,7 +873,6 @@ void drm_dev_unregister(struct drm_device *dev) remove_compat_control_link(dev); drm_minor_unregister(dev, DRM_MINOR_PRIMARY); drm_minor_unregister(dev, DRM_MINOR_RENDER); - drm_minor_unregister(dev, DRM_MINOR_CONTROL); } EXPORT_SYMBOL(drm_dev_unregister); diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 134069f..40e1e24 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -2930,11 +2930,15 @@ cea_mode_alternate_timings(u8 vic, struct drm_display_mode *mode) static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2948,7 +2952,7 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -2965,11 +2969,15 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m */ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) return 0; + if (to_match->picture_aspect_ratio) + match_flags |= DRM_MODE_MATCH_ASPECT_RATIO; + for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) { struct drm_display_mode cea_mode = edid_cea_modes[vic]; unsigned int clock1, clock2; @@ -2983,7 +2991,7 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match) continue; do { - if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode)) + if (drm_mode_match(to_match, &cea_mode, match_flags)) return vic; } while (cea_mode_alternate_timings(vic, &cea_mode)); } @@ -3030,6 +3038,7 @@ hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode) static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match, unsigned int clock_tolerance) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3047,7 +3056,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ abs(to_match->clock - clock2) > clock_tolerance) continue; - if (drm_mode_equal_no_clocks(to_match, hdmi_mode)) + if (drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } @@ -3064,6 +3073,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_ */ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) { + unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS; u8 vic; if (!to_match->clock) @@ -3079,7 +3089,7 @@ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match) if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) || KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) && - drm_mode_equal_no_clocks_no_stereo(to_match, hdmi_mode)) + drm_mode_match(to_match, hdmi_mode, match_flags)) return vic; } return 0; @@ -4451,10 +4461,10 @@ drm_reset_display_info(struct drm_connector *connector) info->max_tmds_clock = 0; info->dvi_dual = false; info->has_hdmi_infoframe = false; + memset(&info->hdmi, 0, sizeof(info->hdmi)); info->non_desktop = 0; } -EXPORT_SYMBOL_GPL(drm_reset_display_info); u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edid) { @@ -4462,17 +4472,11 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi u32 quirks = edid_get_quirks(edid); + drm_reset_display_info(connector); + info->width_mm = edid->width_cm * 10; info->height_mm = edid->height_cm * 10; - /* driver figures it out in this case */ - info->bpc = 0; - info->color_formats = 0; - info->cea_rev = 0; - info->max_tmds_clock = 0; - info->dvi_dual = false; - info->has_hdmi_infoframe = false; - info->non_desktop = !!(quirks & EDID_QUIRK_NON_DESKTOP); DRM_DEBUG_KMS("non_desktop set to %d\n", info->non_desktop); @@ -4538,7 +4542,6 @@ u32 drm_add_display_info(struct drm_connector *connector, const struct edid *edi info->color_formats |= DRM_COLOR_FORMAT_YCRCB422; return quirks; } -EXPORT_SYMBOL_GPL(drm_add_display_info); static int validate_displayid(u8 *displayid, int length, int idx) { @@ -4830,6 +4833,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, const struct drm_display_mode *mode, bool is_hdmi2_sink) { + enum hdmi_picture_aspect picture_aspect; int err; if (!frame || !mode) @@ -4872,13 +4876,23 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame, * Populate picture aspect ratio from either * user input (if specified) or from the CEA mode list. */ - if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_4_3 || - mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_16_9) - frame->picture_aspect = mode->picture_aspect_ratio; - else if (frame->video_code > 0) - frame->picture_aspect = drm_get_cea_aspect_ratio( - frame->video_code); + picture_aspect = mode->picture_aspect_ratio; + if (picture_aspect == HDMI_PICTURE_ASPECT_NONE) + picture_aspect = drm_get_cea_aspect_ratio(frame->video_code); + + /* + * The infoframe can't convey anything but none, 4:3 + * and 16:9, so if the user has asked for anything else + * we can only satisfy it by specifying the right VIC. + */ + if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) { + if (picture_aspect != + drm_get_cea_aspect_ratio(frame->video_code)) + return -EINVAL; + picture_aspect = HDMI_PICTURE_ASPECT_NONE; + } + frame->picture_aspect = picture_aspect; frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE; frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 0646b10..2ee1eaa 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2183,7 +2183,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, for (j = 0; j < i; j++) { if (!enabled[j]) continue; - if (!drm_mode_equal(modes[j], modes[i])) + if (!drm_mode_match(modes[j], modes[i], + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) can_clone = false; } } @@ -2203,7 +2207,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper, fb_helper_conn = fb_helper->connector_info[i]; list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) { - if (drm_mode_equal(mode, dmt_mode)) + if (drm_mode_match(mode, dmt_mode, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS)) modes[i] = mode; } if (!modes[i]) diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c index ad67203..bfedcef 100644 --- a/drivers/gpu/drm/drm_framebuffer.c +++ b/drivers/gpu/drm/drm_framebuffer.c @@ -468,29 +468,30 @@ int drm_mode_getfb(struct drm_device *dev, goto out; } + if (!fb->funcs->create_handle) { + ret = -ENODEV; + goto out; + } + r->height = fb->height; r->width = fb->width; r->depth = fb->format->depth; r->bpp = fb->format->cpp[0] * 8; r->pitch = fb->pitches[0]; - if (fb->funcs->create_handle) { - if (drm_is_current_master(file_priv) || capable(CAP_SYS_ADMIN) || - drm_is_control_client(file_priv)) { - ret = fb->funcs->create_handle(fb, file_priv, - &r->handle); - } else { - /* GET_FB() is an unprivileged ioctl so we must not - * return a buffer-handle to non-master processes! For - * backwards-compatibility reasons, we cannot make - * GET_FB() privileged, so just return an invalid handle - * for non-masters. */ - r->handle = 0; - ret = 0; - } - } else { - ret = -ENODEV; + + /* GET_FB() is an unprivileged ioctl so we must not return a + * buffer-handle to non-master processes! For + * backwards-compatibility reasons, we cannot make GET_FB() privileged, + * so just return an invalid handle for non-masters. + */ + if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN)) { + r->handle = 0; + ret = 0; + goto out; } + ret = fb->funcs->create_handle(fb, file_priv, &r->handle); + out: drm_framebuffer_put(fb); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 4975ba9..4a16d7b 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -436,9 +436,12 @@ err_unref: * @obj: object to register * @handlep: pionter to return the created handle to the caller * - * Create a handle for this object. This adds a handle reference - * to the object, which includes a regular reference count. Callers - * will likely want to dereference the object afterwards. + * Create a handle for this object. This adds a handle reference to the object, + * which includes a regular reference count. Callers will likely want to + * dereference the object afterwards. + * + * Since this publishes @obj to userspace it must be fully set up by this point, + * drivers must call this last in their buffer object creation callbacks. */ int drm_gem_handle_create(struct drm_file *file_priv, struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c index 4d682a6..acfbc06 100644 --- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c +++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c @@ -22,6 +22,7 @@ #include <drm/drm_gem.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_modeset_helper.h> +#include <drm/drm_simple_kms_helper.h> /** * DOC: overview @@ -266,6 +267,24 @@ int drm_gem_fb_prepare_fb(struct drm_plane *plane, EXPORT_SYMBOL_GPL(drm_gem_fb_prepare_fb); /** + * drm_gem_fb_simple_display_pipe_prepare_fb - prepare_fb helper for + * &drm_simple_display_pipe + * @pipe: Simple display pipe + * @plane_state: Plane state + * + * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has a + * &dma_buf attached, extracts the exclusive fence and attaches it to plane + * state for the atomic helper to wait on. Drivers can use this as their + * &drm_simple_display_pipe_funcs.prepare_fb callback. + */ +int drm_gem_fb_simple_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *plane_state) +{ + return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); +} +EXPORT_SYMBOL(drm_gem_fb_simple_display_pipe_prepare_fb); + +/** * drm_gem_fbdev_fb_create - Create a GEM backed &drm_framebuffer for fbdev * emulation * @dev: DRM device diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index f8e96e6..67b1fca 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd, .desc = compat_ptr(v32.desc), }; err = drm_ioctl_kernel(file, drm_version, &v, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW); + DRM_UNLOCKED|DRM_RENDER_ALLOW); if (err) return err; @@ -885,7 +885,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, return -EFAULT; err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64, - DRM_CONTROL_ALLOW|DRM_UNLOCKED); + DRM_UNLOCKED); if (err) return err; diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c index af78291..0d4cfb2 100644 --- a/drivers/gpu/drm/drm_ioctl.c +++ b/drivers/gpu/drm/drm_ioctl.c @@ -324,6 +324,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv) return -EINVAL; file_priv->atomic = req->value; file_priv->universal_planes = req->value; + /* + * No atomic user-space blows up on aspect ratio mode bits. + */ + file_priv->aspect_ratio_allowed = req->value; + break; + case DRM_CLIENT_CAP_ASPECT_RATIO: + if (req->value > 1) + return -EINVAL; + file_priv->aspect_ratio_allowed = req->value; break; default: return -EINVAL; @@ -510,13 +519,7 @@ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv) /* MASTER is only for master or control clients */ if (unlikely((flags & DRM_MASTER) && - !drm_is_current_master(file_priv) && - !drm_is_control_client(file_priv))) - return -EACCES; - - /* Control clients must be explicitly allowed */ - if (unlikely(!(flags & DRM_CONTROL_ALLOW) && - drm_is_control_client(file_priv))) + !drm_is_current_master(file_priv))) return -EACCES; /* Render clients must be explicitly allowed */ @@ -539,7 +542,7 @@ EXPORT_SYMBOL(drm_ioctl_permit); /* Ioctl table */ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version, - DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW), + DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY), @@ -613,41 +616,41 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl, DRM_UNLOCKED|DRM_RENDER_ALLOW), @@ -665,10 +668,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = { DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED), DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), - DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), + DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED), }; #define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls ) diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c index d345563..50c73c0 100644 --- a/drivers/gpu/drm/drm_lease.c +++ b/drivers/gpu/drm/drm_lease.c @@ -340,7 +340,7 @@ static void _drm_lease_revoke(struct drm_master *top) break; /* Over */ - master = list_entry(master->lessee_list.next, struct drm_master, lessee_list); + master = list_next_entry(master, lessee_list); } } } diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index e82b61e..c78ca0e 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -939,17 +939,68 @@ struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev, } EXPORT_SYMBOL(drm_mode_duplicate); +static bool drm_mode_match_timings(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->hdisplay == mode2->hdisplay && + mode1->hsync_start == mode2->hsync_start && + mode1->hsync_end == mode2->hsync_end && + mode1->htotal == mode2->htotal && + mode1->hskew == mode2->hskew && + mode1->vdisplay == mode2->vdisplay && + mode1->vsync_start == mode2->vsync_start && + mode1->vsync_end == mode2->vsync_end && + mode1->vtotal == mode2->vtotal && + mode1->vscan == mode2->vscan; +} + +static bool drm_mode_match_clock(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + /* + * do clock check convert to PICOS + * so fb modes get matched the same + */ + if (mode1->clock && mode2->clock) + return KHZ2PICOS(mode1->clock) == KHZ2PICOS(mode2->clock); + else + return mode1->clock == mode2->clock; +} + +static bool drm_mode_match_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & ~DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_3d_flags(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return (mode1->flags & DRM_MODE_FLAG_3D_MASK) == + (mode2->flags & DRM_MODE_FLAG_3D_MASK); +} + +static bool drm_mode_match_aspect_ratio(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return mode1->picture_aspect_ratio == mode2->picture_aspect_ratio; +} + /** - * drm_mode_equal - test modes for equality + * drm_mode_match - test modes for (partial) equality * @mode1: first mode * @mode2: second mode + * @match_flags: which parts need to match (DRM_MODE_MATCH_*) * * Check to see if @mode1 and @mode2 are equivalent. * * Returns: - * True if the modes are equal, false otherwise. + * True if the modes are (partially) equal, false otherwise. */ -bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_match(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2, + unsigned int match_flags) { if (!mode1 && !mode2) return true; @@ -957,15 +1008,49 @@ bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_displ if (!mode1 || !mode2) return false; - /* do clock check convert to PICOS so fb modes get matched - * the same */ - if (mode1->clock && mode2->clock) { - if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock)) - return false; - } else if (mode1->clock != mode2->clock) + if (match_flags & DRM_MODE_MATCH_TIMINGS && + !drm_mode_match_timings(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_CLOCK && + !drm_mode_match_clock(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_FLAGS && + !drm_mode_match_flags(mode1, mode2)) + return false; + + if (match_flags & DRM_MODE_MATCH_3D_FLAGS && + !drm_mode_match_3d_flags(mode1, mode2)) return false; - return drm_mode_equal_no_clocks(mode1, mode2); + if (match_flags & DRM_MODE_MATCH_ASPECT_RATIO && + !drm_mode_match_aspect_ratio(mode1, mode2)) + return false; + + return true; +} +EXPORT_SYMBOL(drm_mode_match); + +/** + * drm_mode_equal - test modes for equality + * @mode1: first mode + * @mode2: second mode + * + * Check to see if @mode1 and @mode2 are equivalent. + * + * Returns: + * True if the modes are equal, false otherwise. + */ +bool drm_mode_equal(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) +{ + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_CLOCK | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS| + DRM_MODE_MATCH_ASPECT_RATIO); } EXPORT_SYMBOL(drm_mode_equal); @@ -980,13 +1065,13 @@ EXPORT_SYMBOL(drm_mode_equal); * Returns: * True if the modes are equal, false otherwise. */ -bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) +bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, + const struct drm_display_mode *mode2) { - if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) != - (mode2->flags & DRM_MODE_FLAG_3D_MASK)) - return false; - - return drm_mode_equal_no_clocks_no_stereo(mode1, mode2); + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS | + DRM_MODE_MATCH_3D_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks); @@ -1004,21 +1089,9 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks); bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2) { - if (mode1->hdisplay == mode2->hdisplay && - mode1->hsync_start == mode2->hsync_start && - mode1->hsync_end == mode2->hsync_end && - mode1->htotal == mode2->htotal && - mode1->hskew == mode2->hskew && - mode1->vdisplay == mode2->vdisplay && - mode1->vsync_start == mode2->vsync_start && - mode1->vsync_end == mode2->vsync_end && - mode1->vtotal == mode2->vtotal && - mode1->vscan == mode2->vscan && - (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) == - (mode2->flags & ~DRM_MODE_FLAG_3D_MASK)) - return true; - - return false; + return drm_mode_match(mode1, mode2, + DRM_MODE_MATCH_TIMINGS | + DRM_MODE_MATCH_FLAGS); } EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo); @@ -1575,6 +1648,26 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out, out->vrefresh = in->vrefresh; out->flags = in->flags; out->type = in->type; + + switch (in->picture_aspect_ratio) { + case HDMI_PICTURE_ASPECT_4_3: + out->flags |= DRM_MODE_FLAG_PIC_AR_4_3; + break; + case HDMI_PICTURE_ASPECT_16_9: + out->flags |= DRM_MODE_FLAG_PIC_AR_16_9; + break; + case HDMI_PICTURE_ASPECT_64_27: + out->flags |= DRM_MODE_FLAG_PIC_AR_64_27; + break; + case HDMI_PICTURE_ASPECT_256_135: + out->flags |= DRM_MODE_FLAG_PIC_AR_256_135; + break; + case HDMI_PICTURE_ASPECT_RESERVED: + default: + out->flags |= DRM_MODE_FLAG_PIC_AR_NONE; + break; + } + strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; } @@ -1621,6 +1714,30 @@ int drm_mode_convert_umode(struct drm_device *dev, strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN); out->name[DRM_DISPLAY_MODE_LEN-1] = 0; + /* Clearing picture aspect ratio bits from out flags, + * as the aspect-ratio information is not stored in + * flags for kernel-mode, but in picture_aspect_ratio. + */ + out->flags &= ~DRM_MODE_FLAG_PIC_AR_MASK; + + switch (in->flags & DRM_MODE_FLAG_PIC_AR_MASK) { + case DRM_MODE_FLAG_PIC_AR_4_3: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_4_3; + break; + case DRM_MODE_FLAG_PIC_AR_16_9: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_16_9; + break; + case DRM_MODE_FLAG_PIC_AR_64_27: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_64_27; + break; + case DRM_MODE_FLAG_PIC_AR_256_135: + out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_256_135; + break; + default: + out->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE; + break; + } + out->status = drm_mode_validate_driver(dev, out); if (out->status != MODE_OK) return -EINVAL; diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 902cc1a..fe9c6c7 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -60,7 +60,7 @@ static const struct drm_dmi_panel_orientation_data itworks_tw891 = { .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, }; -static const struct drm_dmi_panel_orientation_data vios_lth17 = { +static const struct drm_dmi_panel_orientation_data lcd800x1280_rightside_up = { .width = 800, .height = 1280, .orientation = DRM_MODE_PANEL_ORIENTATION_RIGHT_UP, @@ -102,12 +102,30 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_BOARD_NAME, "TW891"), }, .driver_data = (void *)&itworks_tw891, + }, { /* + * Lenovo Ideapad Miix 310 laptop, only some production batches + * have a portrait screen, the resolution checks makes the quirk + * apply only to those batches. + */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80SG"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, + }, { /* Lenovo Ideapad Miix 320 */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "80XF"), + DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "Lenovo MIIX 320-10ICR"), + }, + .driver_data = (void *)&lcd800x1280_rightside_up, }, { /* VIOS LTH17 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "VIOS"), DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "LTH17"), }, - .driver_data = (void *)&vios_lth17, + .driver_data = (void *)&lcd800x1280_rightside_up, }, {} }; @@ -154,10 +172,9 @@ int drm_get_panel_orientation_quirk(int width, int height) if (!bios_date) continue; - for (i = 0; data->bios_dates[i]; i++) { - if (!strcmp(data->bios_dates[i], bios_date)) - return data->orientation; - } + i = match_string(data->bios_dates, -1, bios_date); + if (i >= 0) + return data->orientation; } return DRM_MODE_PANEL_ORIENTATION_UNKNOWN; diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c index 6d2a6e4..0350544 100644 --- a/drivers/gpu/drm/drm_plane.c +++ b/drivers/gpu/drm/drm_plane.c @@ -756,6 +756,7 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, struct drm_modeset_acquire_ctx *ctx) { struct drm_device *dev = crtc->dev; + struct drm_plane *plane = crtc->cursor; struct drm_framebuffer *fb = NULL; struct drm_mode_fb_cmd2 fbreq = { .width = req->width, @@ -769,8 +770,8 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, uint32_t src_w = 0, src_h = 0; int ret = 0; - BUG_ON(!crtc->cursor); - WARN_ON(crtc->cursor->crtc != crtc && crtc->cursor->crtc != NULL); + BUG_ON(!plane); + WARN_ON(plane->crtc != crtc && plane->crtc != NULL); /* * Obtain fb we'll be using (either new or existing) and take an extra @@ -784,13 +785,18 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, DRM_DEBUG_KMS("failed to wrap cursor buffer in drm framebuffer\n"); return PTR_ERR(fb); } + fb->hot_x = req->hot_x; fb->hot_y = req->hot_y; } else { fb = NULL; } } else { - fb = crtc->cursor->fb; + if (plane->state) + fb = plane->state->fb; + else + fb = plane->fb; + if (fb) drm_framebuffer_get(fb); } @@ -810,7 +816,7 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc, src_h = fb->height << 16; } - ret = __setplane_internal(crtc->cursor, crtc, fb, + ret = __setplane_internal(plane, crtc, fb, crtc_x, crtc_y, crtc_w, crtc_h, 0, 0, src_w, src_h, ctx); @@ -931,7 +937,8 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, { struct drm_mode_crtc_page_flip_target *page_flip = data; struct drm_crtc *crtc; - struct drm_framebuffer *fb = NULL; + struct drm_plane *plane; + struct drm_framebuffer *fb = NULL, *old_fb; struct drm_pending_vblank_event *e = NULL; u32 target_vblank = page_flip->sequence; struct drm_modeset_acquire_ctx ctx; @@ -959,6 +966,8 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, if (!crtc) return -ENOENT; + plane = crtc->primary; + if (crtc->funcs->page_flip_target) { u32 current_vblank; int r; @@ -1003,11 +1012,16 @@ retry: ret = drm_modeset_lock(&crtc->mutex, &ctx); if (ret) goto out; - ret = drm_modeset_lock(&crtc->primary->mutex, &ctx); + ret = drm_modeset_lock(&plane->mutex, &ctx); if (ret) goto out; - if (crtc->primary->fb == NULL) { + if (plane->state) + old_fb = plane->state->fb; + else + old_fb = plane->fb; + + if (old_fb == NULL) { /* The framebuffer is currently unbound, presumably * due to a hotplug event, that userspace has not * yet discovered. @@ -1022,8 +1036,8 @@ retry: goto out; } - if (crtc->state) { - const struct drm_plane_state *state = crtc->primary->state; + if (plane->state) { + const struct drm_plane_state *state = plane->state; ret = drm_framebuffer_check_src_coords(state->src_x, state->src_y, @@ -1031,12 +1045,13 @@ retry: state->src_h, fb); } else { - ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y, &crtc->mode, fb); + ret = drm_crtc_check_viewport(crtc, crtc->x, crtc->y, + &crtc->mode, fb); } if (ret) goto out; - if (crtc->primary->fb->format != fb->format) { + if (old_fb->format != fb->format) { DRM_DEBUG_KMS("Page flip is not allowed to change frame buffer format.\n"); ret = -EINVAL; goto out; @@ -1048,10 +1063,12 @@ retry: ret = -ENOMEM; goto out; } + e->event.base.type = DRM_EVENT_FLIP_COMPLETE; e->event.base.length = sizeof(e->event); e->event.vbl.user_data = page_flip->user_data; e->event.vbl.crtc_id = crtc->base.id; + ret = drm_event_reserve_init(dev, file_priv, &e->base, &e->event.base); if (ret) { kfree(e); @@ -1060,7 +1077,7 @@ retry: } } - crtc->primary->old_fb = crtc->primary->fb; + plane->old_fb = plane->fb; if (crtc->funcs->page_flip_target) ret = crtc->funcs->page_flip_target(crtc, fb, e, page_flip->flags, @@ -1073,19 +1090,18 @@ retry: if (page_flip->flags & DRM_MODE_PAGE_FLIP_EVENT) drm_event_cancel_free(dev, &e->base); /* Keep the old fb, don't unref it. */ - crtc->primary->old_fb = NULL; + plane->old_fb = NULL; } else { - crtc->primary->fb = fb; - /* Unref only the old framebuffer. */ - fb = NULL; + plane->fb = fb; + drm_framebuffer_get(fb); } out: if (fb) drm_framebuffer_put(fb); - if (crtc->primary->old_fb) - drm_framebuffer_put(crtc->primary->old_fb); - crtc->primary->old_fb = NULL; + if (plane->old_fb) + drm_framebuffer_put(plane->old_fb); + plane->old_fb = NULL; if (ret == -EDEADLK) { ret = drm_modeset_backoff(&ctx); diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 7856a9b..397b46b 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -331,6 +331,9 @@ EXPORT_SYMBOL(drm_gem_map_dma_buf); /** * drm_gem_unmap_dma_buf - unmap_dma_buf implementation for GEM + * @attach: attachment to unmap buffer from + * @sgt: scatterlist info of the buffer to unmap + * @dir: direction of DMA transfer * * Not implemented. The unmap is done at drm_gem_map_detach(). This can be * used as the &dma_buf_ops.unmap_dma_buf callback. @@ -406,7 +409,10 @@ void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - return dev->driver->gem_prime_vmap(obj); + if (dev->driver->gem_prime_vmap) + return dev->driver->gem_prime_vmap(obj); + else + return NULL; } EXPORT_SYMBOL(drm_gem_dmabuf_vmap); @@ -423,12 +429,15 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr) struct drm_gem_object *obj = dma_buf->priv; struct drm_device *dev = obj->dev; - dev->driver->gem_prime_vunmap(obj, vaddr); + if (dev->driver->gem_prime_vunmap) + dev->driver->gem_prime_vunmap(obj, vaddr); } EXPORT_SYMBOL(drm_gem_dmabuf_vunmap); /** * drm_gem_dmabuf_kmap_atomic - map_atomic implementation for GEM + * @dma_buf: buffer to be mapped + * @page_num: page number within the buffer * * Not implemented. This can be used as the &dma_buf_ops.map_atomic callback. */ @@ -441,6 +450,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kmap_atomic); /** * drm_gem_dmabuf_kunmap_atomic - unmap_atomic implementation for GEM + * @dma_buf: buffer to be unmapped + * @page_num: page number within the buffer + * @addr: virtual address of the buffer * * Not implemented. This can be used as the &dma_buf_ops.unmap_atomic callback. */ @@ -453,6 +465,8 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kunmap_atomic); /** * drm_gem_dmabuf_kmap - map implementation for GEM + * @dma_buf: buffer to be mapped + * @page_num: page number within the buffer * * Not implemented. This can be used as the &dma_buf_ops.map callback. */ @@ -464,6 +478,9 @@ EXPORT_SYMBOL(drm_gem_dmabuf_kmap); /** * drm_gem_dmabuf_kunmap - unmap implementation for GEM + * @dma_buf: buffer to be unmapped + * @page_num: page number within the buffer + * @addr: virtual address of the buffer * * Not implemented. This can be used as the &dma_buf_ops.unmap callback. */ diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c index 8f4672d..1f8031e 100644 --- a/drivers/gpu/drm/drm_property.c +++ b/drivers/gpu/drm/drm_property.c @@ -169,9 +169,9 @@ struct drm_property *drm_property_create_enum(struct drm_device *dev, return NULL; for (i = 0; i < num_values; i++) { - ret = drm_property_add_enum(property, i, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -209,7 +209,7 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, uint64_t supported_bits) { struct drm_property *property; - int i, ret, index = 0; + int i, ret; int num_values = hweight64(supported_bits); flags |= DRM_MODE_PROP_BITMASK; @@ -221,14 +221,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev, if (!(supported_bits & (1ULL << props[i].type))) continue; - if (WARN_ON(index >= num_values)) { - drm_property_destroy(dev, property); - return NULL; - } - - ret = drm_property_add_enum(property, index++, - props[i].type, - props[i].name); + ret = drm_property_add_enum(property, + props[i].type, + props[i].name); if (ret) { drm_property_destroy(dev, property); return NULL; @@ -376,7 +371,6 @@ EXPORT_SYMBOL(drm_property_create_bool); /** * drm_property_add_enum - add a possible value to an enumeration property * @property: enumeration property to change - * @index: index of the new enumeration * @value: value of the new enumeration * @name: symbolic name of the new enumeration * @@ -388,10 +382,11 @@ EXPORT_SYMBOL(drm_property_create_bool); * Returns: * Zero on success, error code on failure. */ -int drm_property_add_enum(struct drm_property *property, int index, +int drm_property_add_enum(struct drm_property *property, uint64_t value, const char *name) { struct drm_property_enum *prop_enum; + int index = 0; if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN)) return -EINVAL; @@ -411,8 +406,12 @@ int drm_property_add_enum(struct drm_property *property, int index, list_for_each_entry(prop_enum, &property->enum_list, head) { if (WARN_ON(prop_enum->value == value)) return -EINVAL; + index++; } + if (WARN_ON(index >= property->num_values)) + return -EINVAL; + prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL); if (!prop_enum) return -ENOMEM; diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c index 9817c14..8c05782 100644 --- a/drivers/gpu/drm/drm_rect.c +++ b/drivers/gpu/drm/drm_rect.c @@ -50,13 +50,25 @@ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2) } EXPORT_SYMBOL(drm_rect_intersect); +static u32 clip_scaled(u32 src, u32 dst, u32 clip) +{ + u64 tmp = mul_u32_u32(src, dst - clip); + + /* + * Round toward 1.0 when clipping so that we don't accidentally + * change upscaling to downscaling or vice versa. + */ + if (src < (dst << 16)) + return DIV_ROUND_UP_ULL(tmp, dst); + else + return DIV_ROUND_DOWN_ULL(tmp, dst); +} + /** * drm_rect_clip_scaled - perform a scaled clip operation * @src: source window rectangle * @dst: destination window rectangle * @clip: clip rectangle - * @hscale: horizontal scaling factor - * @vscale: vertical scaling factor * * Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the * same amounts multiplied by @hscale and @vscale. @@ -66,33 +78,44 @@ EXPORT_SYMBOL(drm_rect_intersect); * %false otherwise */ bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, - const struct drm_rect *clip, - int hscale, int vscale) + const struct drm_rect *clip) { int diff; diff = clip->x1 - dst->x1; if (diff > 0) { - int64_t tmp = src->x1 + (int64_t) diff * hscale; - src->x1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX); + dst->x1 = clip->x1; } diff = clip->y1 - dst->y1; if (diff > 0) { - int64_t tmp = src->y1 + (int64_t) diff * vscale; - src->y1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX); + dst->y1 = clip->y1; } diff = dst->x2 - clip->x2; if (diff > 0) { - int64_t tmp = src->x2 - (int64_t) diff * hscale; - src->x2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_w = clip_scaled(drm_rect_width(src), + drm_rect_width(dst), diff); + + src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX); + dst->x2 = clip->x2; } diff = dst->y2 - clip->y2; if (diff > 0) { - int64_t tmp = src->y2 - (int64_t) diff * vscale; - src->y2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX); + u32 new_src_h = clip_scaled(drm_rect_height(src), + drm_rect_height(dst), diff); + + src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX); + dst->y2 = clip->y2; } - return drm_rect_intersect(dst, clip); + return drm_rect_visible(dst); } EXPORT_SYMBOL(drm_rect_clip_scaled); @@ -106,7 +129,10 @@ static int drm_calc_scale(int src, int dst) if (dst == 0) return 0; - scale = src / dst; + if (src > (dst << 16)) + return DIV_ROUND_UP(src, dst); + else + scale = src / dst; return scale; } @@ -121,6 +147,10 @@ static int drm_calc_scale(int src, int dst) * Calculate the horizontal scaling factor as * (@src width) / (@dst width). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor, or errno of out of limits. */ @@ -152,6 +182,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale); * Calculate the vertical scaling factor as * (@src height) / (@dst height). * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor, or errno of out of limits. */ @@ -189,6 +223,10 @@ EXPORT_SYMBOL(drm_rect_calc_vscale); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The horizontal scaling factor. */ @@ -239,6 +277,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale_relaxed); * If the calculated scaling factor is above @max_vscale, * decrease the height of rectangle @src to compensate. * + * If the scale is below 1 << 16, round down. If the scale is above + * 1 << 16, round up. This will calculate the scale with the most + * pessimistic limit calculation. + * * RETURNS: * The vertical scaling factor. */ @@ -373,8 +415,8 @@ EXPORT_SYMBOL(drm_rect_rotate); * them when doing a rotatation and its inverse. * That is, if you do :: * - * DRM_MODE_PROP_ROTATE(&r, width, height, rotation); - * DRM_MODE_ROTATE_inv(&r, width, height, rotation); + * drm_rect_rotate(&r, width, height, rotation); + * drm_rect_rotate_inv(&r, width, height, rotation); * * you will always get back the original rectangle. */ diff --git a/drivers/gpu/drm/drm_scdc_helper.c b/drivers/gpu/drm/drm_scdc_helper.c index 657ea5a..870e25f 100644 --- a/drivers/gpu/drm/drm_scdc_helper.c +++ b/drivers/gpu/drm/drm_scdc_helper.c @@ -141,7 +141,7 @@ bool drm_scdc_get_scrambling_status(struct i2c_adapter *adapter) ret = drm_scdc_readb(adapter, SCDC_SCRAMBLER_STATUS, &status); if (ret < 0) { - DRM_ERROR("Failed to read scrambling status: %d\n", ret); + DRM_DEBUG_KMS("Failed to read scrambling status: %d\n", ret); return false; } @@ -168,7 +168,7 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_ERROR("Failed to read TMDS config: %d\n", ret); + DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); return false; } @@ -179,7 +179,7 @@ bool drm_scdc_set_scrambling(struct i2c_adapter *adapter, bool enable) ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_ERROR("Failed to enable scrambling: %d\n", ret); + DRM_DEBUG_KMS("Failed to enable scrambling: %d\n", ret); return false; } @@ -223,7 +223,7 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) ret = drm_scdc_readb(adapter, SCDC_TMDS_CONFIG, &config); if (ret < 0) { - DRM_ERROR("Failed to read TMDS config: %d\n", ret); + DRM_DEBUG_KMS("Failed to read TMDS config: %d\n", ret); return false; } @@ -234,7 +234,7 @@ bool drm_scdc_set_high_tmds_clock_ratio(struct i2c_adapter *adapter, bool set) ret = drm_scdc_writeb(adapter, SCDC_TMDS_CONFIG, config); if (ret < 0) { - DRM_ERROR("Failed to set TMDS clock ratio: %d\n", ret); + DRM_DEBUG_KMS("Failed to set TMDS clock ratio: %d\n", ret); return false; } diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c index 987a353..7a00455 100644 --- a/drivers/gpu/drm/drm_simple_kms_helper.c +++ b/drivers/gpu/drm/drm_simple_kms_helper.c @@ -64,13 +64,15 @@ static int drm_simple_kms_crtc_check(struct drm_crtc *crtc, static void drm_simple_kms_crtc_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { + struct drm_plane *plane; struct drm_simple_display_pipe *pipe; pipe = container_of(crtc, struct drm_simple_display_pipe, crtc); if (!pipe->funcs || !pipe->funcs->enable) return; - pipe->funcs->enable(pipe, crtc->state); + plane = &pipe->plane; + pipe->funcs->enable(pipe, crtc->state, plane->state); } static void drm_simple_kms_crtc_disable(struct drm_crtc *crtc, diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 1c5b5ce..b3c1daa 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -331,9 +331,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor) struct device *kdev; int r; - if (minor->type == DRM_MINOR_CONTROL) - minor_str = "controlD%d"; - else if (minor->type == DRM_MINOR_RENDER) + if (minor->type == DRM_MINOR_RENDER) minor_str = "renderD%d"; else minor_str = "card%d"; diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig index 735ce47..208bc27 100644 --- a/drivers/gpu/drm/exynos/Kconfig +++ b/drivers/gpu/drm/exynos/Kconfig @@ -1,6 +1,6 @@ config DRM_EXYNOS tristate "DRM Support for Samsung SoC EXYNOS Series" - depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM) select DRM_KMS_HELPER select VIDEOMODE_HELPERS select SND_SOC_HDMI_CODEC if SND_SOC @@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D help Choose this option if you want to use Exynos G2D for DRM. +config DRM_EXYNOS_IPP + bool + config DRM_EXYNOS_FIMC bool "FIMC" - depends on BROKEN && MFD_SYSCON + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos FIMC for DRM. config DRM_EXYNOS_ROTATOR bool "Rotator" - depends on BROKEN + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos Rotator for DRM. +config DRM_EXYNOS_SCALER + bool "Scaler" + select DRM_EXYNOS_IPP + help + Choose this option if you want to use Exynos Scaler for DRM. + config DRM_EXYNOS_GSC bool "GScaler" - depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n + depends on VIDEO_SAMSUNG_EXYNOS_GSC=n + select DRM_EXYNOS_IPP help Choose this option if you want to use Exynos GSC for DRM. diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile index a51c545..3b323f1 100644 --- a/drivers/gpu/drm/exynos/Makefile +++ b/drivers/gpu/drm/exynos/Makefile @@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o +exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o +exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c index 964831d..86330f3 100644 --- a/drivers/gpu/drm/exynos/exynos_dp.c +++ b/drivers/gpu/drm/exynos/exynos_dp.c @@ -162,7 +162,7 @@ static int exynos_dp_bind(struct device *dev, struct device *master, void *data) dp->drm_dev = drm_dev; dp->plat_data.dev_type = EXYNOS_DP; - dp->plat_data.power_on = exynos_dp_poweron; + dp->plat_data.power_on_start = exynos_dp_poweron; dp->plat_data.power_off = exynos_dp_poweroff; dp->plat_data.attach = exynos_dp_bridge_attach; dp->plat_data.get_modes = exynos_dp_get_modes; diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c index a518e9c..a81b4a5 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.c +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c @@ -27,35 +27,23 @@ #include "exynos_drm_fb.h" #include "exynos_drm_gem.h" #include "exynos_drm_plane.h" +#include "exynos_drm_ipp.h" #include "exynos_drm_vidi.h" #include "exynos_drm_g2d.h" #include "exynos_drm_iommu.h" #define DRIVER_NAME "exynos" #define DRIVER_DESC "Samsung SoC DRM" -#define DRIVER_DATE "20110530" -#define DRIVER_MAJOR 1 -#define DRIVER_MINOR 0 - -int exynos_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; +#define DRIVER_DATE "20180330" - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} +/* + * Interface history: + * + * 1.0 - Original version + * 1.1 - Upgrade IPP driver to version 2.0 + */ +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 1 static int exynos_drm_open(struct drm_device *dev, struct drm_file *file) { @@ -108,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = { DRM_AUTH | DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES, + exynos_drm_ipp_get_res_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS, + exynos_drm_ipp_get_limits_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl, + DRM_AUTH | DRM_RENDER_ALLOW), }; static const struct file_operations exynos_drm_driver_fops = { @@ -204,6 +202,7 @@ struct exynos_drm_driver_info { #define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */ #define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */ #define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */ +#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */ #define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL) @@ -243,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = { DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D), }, { DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC), + DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE, }, { DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR), + DRM_COMPONENT_DRIVER + }, { + DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER), + DRM_COMPONENT_DRIVER }, { DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC), + DRM_COMPONENT_DRIVER }, { &exynos_drm_platform_driver, DRM_VIRTUAL_DEVICE @@ -274,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev) &info->driver->driver, (void *)platform_bus_type.match))) { put_device(p); - component_match_add(dev, &match, compare_dev, d); + + if (!(info->flags & DRM_FIMC_DEVICE) || + exynos_drm_check_fimc_device(d) == 0) + component_match_add(dev, &match, + compare_dev, d); p = d; } put_device(p); diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h index df2262f..0f6d079 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_drv.h +++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h @@ -273,9 +273,17 @@ static inline int exynos_dpi_bind(struct drm_device *dev, } #endif +#ifdef CONFIG_DRM_EXYNOS_FIMC +int exynos_drm_check_fimc_device(struct device *dev); +#else +static inline int exynos_drm_check_fimc_device(struct device *dev) +{ + return 0; +} +#endif + int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state, bool nonblock); -int exynos_atomic_check(struct drm_device *dev, struct drm_atomic_state *state); extern struct platform_driver fimd_driver; @@ -289,6 +297,7 @@ extern struct platform_driver vidi_driver; extern struct platform_driver g2d_driver; extern struct platform_driver fimc_driver; extern struct platform_driver rotator_driver; +extern struct platform_driver scaler_driver; extern struct platform_driver gsc_driver; extern struct platform_driver mic_driver; #endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c index 7904ffa..eae44fd 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c @@ -270,7 +270,6 @@ struct exynos_dsi { u32 lanes; u32 mode_flags; u32 format; - struct videomode vm; int state; struct drm_property *brightness; @@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi) static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi) { - struct videomode *vm = &dsi->vm; + struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode; unsigned int num_bits_resol = dsi->driver_data->num_bits_resol; u32 reg; if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) { reg = DSIM_CMD_ALLOW(0xf) - | DSIM_STABLE_VFP(vm->vfront_porch) - | DSIM_MAIN_VBP(vm->vback_porch); + | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay) + | DSIM_MAIN_VBP(m->vtotal - m->vsync_end); exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg); - reg = DSIM_MAIN_HFP(vm->hfront_porch) - | DSIM_MAIN_HBP(vm->hback_porch); + reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay) + | DSIM_MAIN_HBP(m->htotal - m->hsync_end); exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg); - reg = DSIM_MAIN_VSA(vm->vsync_len) - | DSIM_MAIN_HSA(vm->hsync_len); + reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start) + | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start); exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg); } - reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) | - DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol); + reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) | + DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol); exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg); - dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive); + dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay); } static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable) @@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder) return 0; } -static void exynos_dsi_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct exynos_dsi *dsi = encoder_to_dsi(encoder); - struct videomode *vm = &dsi->vm; - struct drm_display_mode *m = adjusted_mode; - - vm->hactive = m->hdisplay; - vm->vactive = m->vdisplay; - vm->vfront_porch = m->vsync_start - m->vdisplay; - vm->vback_porch = m->vtotal - m->vsync_end; - vm->vsync_len = m->vsync_end - m->vsync_start; - vm->hfront_porch = m->hsync_start - m->hdisplay; - vm->hback_porch = m->htotal - m->hsync_end; - vm->hsync_len = m->hsync_end - m->hsync_start; -} - static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = { - .mode_set = exynos_dsi_mode_set, .enable = exynos_dsi_enable, .disable = exynos_dsi_disable, }; diff --git a/drivers/gpu/drm/exynos/exynos_drm_fb.c b/drivers/gpu/drm/exynos/exynos_drm_fb.c index 0faaf82..7fcc1a7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fb.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fb.c @@ -18,6 +18,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <uapi/drm/exynos_drm.h> #include "exynos_drm_drv.h" @@ -26,20 +27,6 @@ #include "exynos_drm_iommu.h" #include "exynos_drm_crtc.h" -#define to_exynos_fb(x) container_of(x, struct exynos_drm_fb, fb) - -/* - * exynos specific framebuffer structure. - * - * @fb: drm framebuffer obejct. - * @exynos_gem: array of exynos specific gem object containing a gem object. - */ -struct exynos_drm_fb { - struct drm_framebuffer fb; - struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; - dma_addr_t dma_addr[MAX_FB_BUFFER]; -}; - static int check_fb_gem_memory_type(struct drm_device *drm_dev, struct exynos_drm_gem *exynos_gem) { @@ -66,40 +53,9 @@ static int check_fb_gem_memory_type(struct drm_device *drm_dev, return 0; } -static void exynos_drm_fb_destroy(struct drm_framebuffer *fb) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - unsigned int i; - - drm_framebuffer_cleanup(fb); - - for (i = 0; i < ARRAY_SIZE(exynos_fb->exynos_gem); i++) { - struct drm_gem_object *obj; - - if (exynos_fb->exynos_gem[i] == NULL) - continue; - - obj = &exynos_fb->exynos_gem[i]->base; - drm_gem_object_unreference_unlocked(obj); - } - - kfree(exynos_fb); - exynos_fb = NULL; -} - -static int exynos_drm_fb_create_handle(struct drm_framebuffer *fb, - struct drm_file *file_priv, - unsigned int *handle) -{ - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); - - return drm_gem_handle_create(file_priv, - &exynos_fb->exynos_gem[0]->base, handle); -} - static const struct drm_framebuffer_funcs exynos_drm_fb_funcs = { - .destroy = exynos_drm_fb_destroy, - .create_handle = exynos_drm_fb_create_handle, + .destroy = drm_gem_fb_destroy, + .create_handle = drm_gem_fb_create_handle, }; struct drm_framebuffer * @@ -108,12 +64,12 @@ exynos_drm_framebuffer_init(struct drm_device *dev, struct exynos_drm_gem **exynos_gem, int count) { - struct exynos_drm_fb *exynos_fb; + struct drm_framebuffer *fb; int i; int ret; - exynos_fb = kzalloc(sizeof(*exynos_fb), GFP_KERNEL); - if (!exynos_fb) + fb = kzalloc(sizeof(*fb), GFP_KERNEL); + if (!fb) return ERR_PTR(-ENOMEM); for (i = 0; i < count; i++) { @@ -121,23 +77,21 @@ exynos_drm_framebuffer_init(struct drm_device *dev, if (ret < 0) goto err; - exynos_fb->exynos_gem[i] = exynos_gem[i]; - exynos_fb->dma_addr[i] = exynos_gem[i]->dma_addr - + mode_cmd->offsets[i]; + fb->obj[i] = &exynos_gem[i]->base; } - drm_helper_mode_fill_fb_struct(dev, &exynos_fb->fb, mode_cmd); + drm_helper_mode_fill_fb_struct(dev, fb, mode_cmd); - ret = drm_framebuffer_init(dev, &exynos_fb->fb, &exynos_drm_fb_funcs); + ret = drm_framebuffer_init(dev, fb, &exynos_drm_fb_funcs); if (ret < 0) { DRM_ERROR("failed to initialize framebuffer\n"); goto err; } - return &exynos_fb->fb; + return fb; err: - kfree(exynos_fb); + kfree(fb); return ERR_PTR(ret); } @@ -191,12 +145,13 @@ err: dma_addr_t exynos_drm_fb_dma_addr(struct drm_framebuffer *fb, int index) { - struct exynos_drm_fb *exynos_fb = to_exynos_fb(fb); + struct exynos_drm_gem *exynos_gem; if (WARN_ON_ONCE(index >= MAX_FB_BUFFER)) return 0; - return exynos_fb->dma_addr[index]; + exynos_gem = to_exynos_gem(fb->obj[index]); + return exynos_gem->dma_addr + fb->offsets[index]; } static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { @@ -206,7 +161,7 @@ static struct drm_mode_config_helper_funcs exynos_drm_mode_config_helpers = { static const struct drm_mode_config_funcs exynos_drm_mode_config_funcs = { .fb_create = exynos_user_fb_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = exynos_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -227,4 +182,6 @@ void exynos_drm_mode_config_init(struct drm_device *dev) dev->mode_config.helper_private = &exynos_drm_mode_config_helpers; dev->mode_config.allow_fb_modifiers = true; + + dev->mode_config.normalize_zpos = true; } diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c index 5b18b5c..4dfbfc7 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c @@ -12,6 +12,7 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/mfd/syscon.h> #include <linux/regmap.h> @@ -24,8 +25,8 @@ #include <drm/exynos_drm.h> #include "regs-fimc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_fimc.h" /* * FIMC stands for Fully Interactive Mobile Camera and @@ -33,23 +34,6 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * FIMC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> FIMC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> FIMC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> FIMC H/W ----> FIMD. - */ - -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. */ #define FIMC_MAX_DEVS 4 @@ -59,29 +43,19 @@ #define FIMC_BUF_STOP 1 #define FIMC_BUF_START 2 #define FIMC_WIDTH_ITU_709 1280 -#define FIMC_REFRESH_MAX 60 -#define FIMC_REFRESH_MIN 12 -#define FIMC_CROP_MAX 8192 -#define FIMC_CROP_MIN 32 -#define FIMC_SCALE_MAX 4224 -#define FIMC_SCALE_MIN 32 +#define FIMC_AUTOSUSPEND_DELAY 2000 + +static unsigned int fimc_mask = 0xc; +module_param_named(fimc_devs, fimc_mask, uint, 0644); +MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM"); #define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct fimc_context, ippdrv); -enum fimc_wb { - FIMC_WB_NONE, - FIMC_WB_A, - FIMC_WB_B, -}; enum { FIMC_CLK_LCLK, FIMC_CLK_GATE, FIMC_CLK_WB_A, FIMC_CLK_WB_B, - FIMC_CLK_MUX, - FIMC_CLK_PARENT, FIMC_CLKS_MAX }; @@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = { [FIMC_CLK_GATE] = "fimc", [FIMC_CLK_WB_A] = "pxl_async0", [FIMC_CLK_WB_B] = "pxl_async1", - [FIMC_CLK_MUX] = "mux", - [FIMC_CLK_PARENT] = "parent", }; -#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL - /* * A structure of scaler. * @@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = { * @vratio: vertical ratio. */ struct fimc_scaler { - bool range; + bool range; bool bypass; bool up_h; bool up_v; @@ -116,56 +86,32 @@ struct fimc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual table 43-1. - * @in_hori: scaler input horizontal size. - * @bypass: scaler bypass mode. - * @dst_h_wo_rot: target horizontal size without output rotation. - * @dst_h_rot: target horizontal size with output rotation. - * @rl_w_wo_rot: real width without input rotation. - * @rl_h_rot: real height without output rotation. - */ -struct fimc_capability { - /* scaler */ - u32 in_hori; - u32 bypass; - /* output rotator */ - u32 dst_h_wo_rot; - u32 dst_h_rot; - /* input rotator */ - u32 rl_w_wo_rot; - u32 rl_h_rot; -}; - -/* * A structure of fimc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. * @lock: locking of operations. * @clocks: fimc clocks. - * @clk_frequency: LCLK clock frequency. - * @sysreg: handle to SYSREG block regmap. * @sc: scaler infomations. * @pol: porarity of writeback. * @id: fimc id. * @irq: irq number. - * @suspended: qos operations. */ struct fimc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; spinlock_t lock; struct clk *clocks[FIMC_CLKS_MAX]; - u32 clk_frequency; - struct regmap *sysreg; struct fimc_scaler sc; int id; int irq; - bool suspended; }; static u32 fimc_read(struct fimc_context *ctx, u32 reg) @@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx) fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ); } -static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx) -{ - return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK, - SYSREG_FIMD0WB_DEST_MASK, - ctx->id << SYSREG_FIMD0WB_DEST_SHIFT); -} - -static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) +static void fimc_set_type_ctrl(struct fimc_context *ctx) { u32 cfg; - DRM_DEBUG_KMS("wb[%d]\n", wb); - cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK | EXYNOS_CIGCTRL_SELCAM_ITU_MASK | @@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb) EXYNOS_CIGCTRL_SELWB_CAMIF_MASK | EXYNOS_CIGCTRL_SELWRITEBACK_MASK); - switch (wb) { - case FIMC_WB_A: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_B: - cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B | - EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK); - break; - case FIMC_WB_NONE: - default: - cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | - EXYNOS_CIGCTRL_SELWRITEBACK_A | - EXYNOS_CIGCTRL_SELCAM_MIPI_A | - EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); - break; - } + cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A | + EXYNOS_CIGCTRL_SELWRITEBACK_A | + EXYNOS_CIGCTRL_SELCAM_MIPI_A | + EXYNOS_CIGCTRL_SELCAM_FIMC_ITU); fimc_write(ctx, cfg, EXYNOS_CIGCTRL); } @@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx) static bool fimc_check_ovf(struct fimc_context *ctx) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 status, flag; status = fimc_read(ctx, EXYNOS_CISTATUS); @@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx) EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB | EXYNOS_CIWDOFST_CLROVFICR); - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); return true; } @@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable) fimc_write(ctx, cfg, EXYNOS_CIOCTRL); } - -static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: case DRM_FORMAT_XRGB8888: cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; default: /* bypass */ break; @@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR | EXYNOS_MSCTRL_C_INT_IN_2PLANE); break; - default: - dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); - - return 0; } -static int fimc_src_set_fmt(struct device *dev, u32 fmt) +static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_MSCTRL); @@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_src_set_fmt_order(ctx, fmt); + fimc_src_set_fmt_order(ctx, fmt); } -static int fimc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg1, cfg2; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[%x]\n", rotation); cfg1 = fimc_read(ctx, EXYNOS_MSCTRL); cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR | @@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev, cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR | EXYNOS_MSCTRL_FLIP_Y_MIRROR); cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg1, EXYNOS_MSCTRL); fimc_write(ctx, cfg2, EXYNOS_CITRGFMT); - *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0; - - return 0; } -static int fimc_set_window(struct fimc_context *ctx, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_set_window(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { u32 cfg, h1, h2, v1, v2; /* cropped image */ - h1 = pos->x; - h2 = sz->hsize - pos->w - pos->x; - v1 = pos->y; - v2 = sz->vsize - pos->h - pos->y; + h1 = buf->rect.x; + h2 = buf->buf.width - buf->rect.w - buf->rect.x; + v1 = buf->rect.y; + v2 = buf->buf.height - buf->rect.h - buf->rect.y; DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n", - pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize); + buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h, + buf->buf.width, buf->buf.height); DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2); /* @@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx, cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) | EXYNOS_CIWDOFST2_WINVEROFST2(v2)); fimc_write(ctx, cfg, EXYNOS_CIWDOFST2); - - return 0; } -static int fimc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_src_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; u32 cfg; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGISIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGISIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* set input DMA image size */ cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE); cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK | EXYNOS_CIREAL_ISIZE_WIDTH_MASK); - cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) | - EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h)); + cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) | + EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE); /* @@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap, * for now, we support only ITU601 8 bit mode */ cfg = (EXYNOS_CISRCFMT_ITU601_8BIT | - EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) | - EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize)); + EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) | + EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_CISRCFMT); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIIYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIIYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIIYOFF); - cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICBOFF); - cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIICROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIICROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIICROFF); - return fimc_set_window(ctx, &img_pos, &img_sz); + fimc_set_window(ctx, buf); } -static int fimc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_src_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > FIMC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_SRC]; - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIIYSA0); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICRSA0); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIICBSA0); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIICRSA0); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIIYSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICBSA0); - fimc_write(ctx, 0x0, EXYNOS_CIICRSA0); - break; - default: - /* bypass */ - break; - } - - return 0; + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0)); } -static struct exynos_drm_ipp_ops fimc_src_ops = { - .set_fmt = fimc_src_set_fmt, - .set_transf = fimc_src_set_transf, - .set_size = fimc_src_set_size, - .set_addr = fimc_src_set_addr, -}; - -static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) +static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) case DRM_FORMAT_RGB565: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_RGB888: cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888; fimc_write(ctx, cfg, EXYNOS_CISCCTRL); - return 0; + return; case DRM_FORMAT_XRGB8888: cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 | EXYNOS_CISCCTRL_EXTRGB_EXTENSION); @@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt) cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR; cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE; break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CIOCTRL); - - return 0; } -static int fimc_dst_set_fmt(struct device *dev, u32 fmt) +static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_NV21: cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420; break; - default: - dev_err(ippdrv->dev, "invalid target format 0x%x.\n", - fmt); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); @@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt) cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM); cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK; - cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; + if (tiled) + cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32; + else + cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR; fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM); - return fimc_dst_set_fmt_order(ctx, fmt); + fimc_dst_set_fmt_order(ctx, fmt); } -static int fimc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); + DRM_DEBUG_KMS("rotation[0x%x]\n", rotation); cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK; cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_90: + case DRM_MODE_ROTATE_90: cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE | EXYNOS_CITRGFMT_FLIP_X_MIRROR | EXYNOS_CITRGFMT_FLIP_Y_MIRROR); - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } fimc_write(ctx, cfg, EXYNOS_CITRGFMT); - *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0; - - return 0; } static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg, cfg_ext, shfactor; u32 pre_dst_width, pre_dst_height; u32 hfactor, vfactor; @@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc, /* fimc_ippdrv_check_property assures that dividers are not null */ hfactor = fls(src_w / dst_w / 2); if (hfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return -EINVAL; } vfactor = fls(src_h / dst_h / 2); if (vfactor > FIMC_SHFACTOR / 2) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return -EINVAL; } @@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc) fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN); } -static int fimc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void fimc_dst_set_size(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_pos img_pos = *pos; - struct drm_exynos_sz img_sz = *sz; - u32 cfg; + u32 cfg, cfg_ext; - DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n", - swap, sz->hsize, sz->vsize); + DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height); /* original size */ - cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) | - EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize)); + cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) | + EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height)); fimc_write(ctx, cfg, EXYNOS_ORGOSIZE); - DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h); + DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y, + buf->rect.w, buf->rect.h); /* CSC ITU */ cfg = fimc_read(ctx, EXYNOS_CIGCTRL); cfg &= ~EXYNOS_CIGCTRL_CSC_MASK; - if (sz->hsize >= FIMC_WIDTH_ITU_709) + if (buf->buf.width >= FIMC_WIDTH_ITU_709) cfg |= EXYNOS_CIGCTRL_CSC_ITU709; else cfg |= EXYNOS_CIGCTRL_CSC_ITU601; fimc_write(ctx, cfg, EXYNOS_CIGCTRL); - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - img_sz.hsize = sz->vsize; - img_sz.vsize = sz->hsize; - } + cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT); /* target image size */ cfg = fimc_read(ctx, EXYNOS_CITRGFMT); cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK | EXYNOS_CITRGFMT_TARGETV_MASK); - cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) | - EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h)); + if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w)); + else + cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) | + EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h)); fimc_write(ctx, cfg, EXYNOS_CITRGFMT); /* target area */ - cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h); + cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h); fimc_write(ctx, cfg, EXYNOS_CITAREA); /* offset Y(RGB), Cb, Cr */ - cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOYOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOYOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOYOFF); - cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCBOFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCBOFF); - cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) | - EXYNOS_CIOCROFF_VERTICAL(img_pos.y)); + cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) | + EXYNOS_CIOCROFF_VERTICAL(buf->rect.y)); fimc_write(ctx, cfg, EXYNOS_CIOCROFF); - - return 0; } static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) + bool enqueue) { unsigned long flags; u32 buf_num; u32 cfg; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); + DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue); spin_lock_irqsave(&ctx->lock, flags); cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ); - if (buf_type == IPP_BUF_ENQUEUE) + if (enqueue) cfg |= (1 << buf_id); else cfg &= ~(1 << buf_id); @@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id, buf_num = hweight32(cfg); - if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START) + if (enqueue && buf_num >= FIMC_BUF_START) fimc_mask_irq(ctx, true); - else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP) + else if (!enqueue && buf_num <= FIMC_BUF_STOP) fimc_mask_irq(ctx, false); spin_unlock_irqrestore(&ctx->lock, flags); } -static int fimc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void fimc_dst_set_addr(struct fimc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); + fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0)); + fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0)); + fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0)); - if (buf_id > FIMC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -ENOMEM; - } - - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - config = &property->config[EXYNOS_DRM_OPS_DST]; - - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y], - EXYNOS_CIOYSA(buf_id)); - - if (config->fmt == DRM_FORMAT_YVU420) { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCRSA(buf_id)); - } else { - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB], - EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR], - EXYNOS_CIOCRSA(buf_id)); - } - break; - case IPP_BUF_DEQUEUE: - fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id)); - fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id)); - break; - default: - /* bypass */ - break; - } - - fimc_dst_set_buf_seq(ctx, buf_id, buf_type); - - return 0; + fimc_dst_set_buf_seq(ctx, 0, true); } -static struct exynos_drm_ipp_ops fimc_dst_ops = { - .set_fmt = fimc_dst_set_fmt, - .set_transf = fimc_dst_set_transf, - .set_size = fimc_dst_set_size, - .set_addr = fimc_dst_set_addr, -}; +static void fimc_stop(struct fimc_context *ctx); static irqreturn_t fimc_irq_handler(int irq, void *dev_id) { struct fimc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; int buf_id; DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id); @@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id) DRM_DEBUG_KMS("buf_id[%d]\n", buf_id); - fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - queue_work(ippdrv->event_workq, &event_work->work); - - return IRQ_HANDLED; -} - -static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = FIMC_REFRESH_MIN; - prop_list->refresh_max = FIMC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) | - (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = FIMC_CROP_MAX; - prop_list->crop_max.vsize = FIMC_CROP_MAX; - prop_list->crop_min.hsize = FIMC_CROP_MIN; - prop_list->crop_min.vsize = FIMC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = FIMC_SCALE_MAX; - prop_list->scale_max.vsize = FIMC_SCALE_MAX; - prop_list->scale_min.hsize = FIMC_SCALE_MIN; - prop_list->scale_min.vsize = FIMC_SCALE_MIN; - - return 0; -} - -static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int fimc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!fimc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } - - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } - - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, 0); } - return 0; + fimc_dst_set_buf_seq(ctx, buf_id, false); + fimc_stop(ctx); -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; - - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); - } - - return -EINVAL; + return IRQ_HANDLED; } static void fimc_clear_addr(struct fimc_context *ctx) @@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx) } } -static int fimc_ippdrv_reset(struct device *dev) +static void fimc_reset(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - /* reset h/w block */ fimc_sw_reset(ctx); @@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev) memset(&ctx->sc, 0x0, sizeof(ctx->sc)); fimc_clear_addr(ctx); - - return 0; } -static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_start(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; - int ret, i; u32 cfg0, cfg1; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; - fimc_mask_irq(ctx, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } - - ret = fimc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } - - /* If set ture, we can save jpeg about screen */ + /* If set true, we can save jpeg about screen */ fimc_handle_jpeg(ctx, false); fimc_set_scaler(ctx, &ctx->sc); - switch (cmd) { - case IPP_CMD_M2M: - fimc_set_type_ctrl(ctx, FIMC_WB_NONE); - fimc_handle_lastend(ctx, false); - - /* setup dma */ - cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); - cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; - fimc_write(ctx, cfg0, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - fimc_set_type_ctrl(ctx, FIMC_WB_A); - fimc_handle_lastend(ctx, true); - - /* setup FIMD */ - ret = fimc_set_camblk_fimd0_wb(ctx); - if (ret < 0) { - dev_err(dev, "camblk setup failed.\n"); - return ret; - } + fimc_set_type_ctrl(ctx); + fimc_handle_lastend(ctx, false); - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* setup dma */ + cfg0 = fimc_read(ctx, EXYNOS_MSCTRL); + cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY; + fimc_write(ctx, cfg0, EXYNOS_MSCTRL); /* Reset status */ fimc_write(ctx, 0x0, EXYNOS_CISTATUS); @@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK); - if (cmd == IPP_CMD_M2M) - fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); - - return 0; + fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID); } -static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void fimc_stop(struct fimc_context *ctx) { - struct fimc_context *ctx = get_fimc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; u32 cfg; - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - switch (cmd) { - case IPP_CMD_M2M: - /* Source clear */ - cfg = fimc_read(ctx, EXYNOS_MSCTRL); - cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; - cfg &= ~EXYNOS_MSCTRL_ENVID; - fimc_write(ctx, cfg, EXYNOS_MSCTRL); - break; - case IPP_CMD_WB: - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; - } + /* Source clear */ + cfg = fimc_read(ctx, EXYNOS_MSCTRL); + cfg &= ~EXYNOS_MSCTRL_INPUT_MASK; + cfg &= ~EXYNOS_MSCTRL_ENVID; + fimc_write(ctx, cfg, EXYNOS_MSCTRL); fimc_mask_irq(ctx, false); @@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE); } +static int fimc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier); + fimc_src_set_size(ctx, &task->src); + fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0); + fimc_src_set_addr(ctx, &task->src); + fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier); + fimc_dst_set_transf(ctx, task->transform.rotation); + fimc_dst_set_size(ctx, &task->dst); + fimc_dst_set_addr(ctx, &task->dst); + fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + fimc_start(ctx); + + return 0; +} + +static void fimc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct fimc_context *ctx = + container_of(ipp, struct fimc_context, ipp); + + fimc_reset(ctx); + + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; + + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); + } +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = fimc_commit, + .abort = fimc_abort, +}; + +static int fimc_bind(struct device *dev, struct device *master, void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "fimc"); + + dev_info(dev, "The exynos fimc has been probed successfully\n"); + + return 0; +} + +static void fimc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct fimc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); +} + +static const struct component_ops fimc_component_ops = { + .bind = fimc_bind, + .unbind = fimc_unbind, +}; + static void fimc_put_clocks(struct fimc_context *ctx) { int i; @@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx) static int fimc_setup_clocks(struct fimc_context *ctx) { - struct device *fimc_dev = ctx->ippdrv.dev; + struct device *fimc_dev = ctx->dev; struct device *dev; int ret, i; @@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]); if (IS_ERR(ctx->clocks[i])) { - if (i >= FIMC_CLK_MUX) - break; ret = PTR_ERR(ctx->clocks[i]); dev_err(fimc_dev, "failed to get clock: %s\n", fimc_clock_names[i]); @@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx) } } - /* Optional FIMC LCLK parent clock setting */ - if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) { - ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX], - ctx->clocks[FIMC_CLK_PARENT]); - if (ret < 0) { - dev_err(fimc_dev, "failed to set parent.\n"); - goto e_clk_free; - } - } - - ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency); - if (ret < 0) - goto e_clk_free; - ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]); if (!ret) return ret; @@ -1605,57 +1198,118 @@ e_clk_free: return ret; } -static int fimc_parse_dt(struct fimc_context *ctx) +int exynos_drm_check_fimc_device(struct device *dev) { - struct device_node *node = ctx->ippdrv.dev->of_node; + unsigned int id = of_alias_get_id(dev->of_node, "fimc"); - /* Handle only devices that support the LCD Writeback data path */ - if (!of_property_read_bool(node, "samsung,lcd-wb")) - return -ENODEV; + if (id >= 0 && (BIT(id) & fimc_mask)) + return 0; + return -ENODEV; +} - if (of_property_read_u32(node, "clock-frequency", - &ctx->clk_frequency)) - ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY; +static const unsigned int fimc_formats[] = { + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, + DRM_FORMAT_YUV444, +}; - ctx->id = of_alias_get_id(node, "fimc"); +static const unsigned int fimc_tiled_formats[] = { + DRM_FORMAT_NV12, DRM_FORMAT_NV21, +}; - if (ctx->id < 0) { - dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n"); - return -EINVAL; - } +static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; - return 0; -} +static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; + +static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 }, + .v = { (1 << 16) / 64, (1 << 16) * 64 }) }, +}; static int fimc_probe(struct platform_device *pdev) { + const struct drm_exynos_ipp_limit *limits; + struct exynos_drm_ipp_formats *formats; struct device *dev = &pdev->dev; struct fimc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; int ret; + int i, j, num_limits, num_formats; - if (!dev->of_node) { - dev_err(dev, "device tree node not found.\n"); + if (exynos_drm_check_fimc_device(dev) != 0) return -ENODEV; - } ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - ctx->ippdrv.dev = dev; + ctx->dev = dev; + ctx->id = of_alias_get_id(dev->of_node, "fimc"); - ret = fimc_parse_dt(ctx); - if (ret < 0) - return ret; + /* construct formats/limits array */ + num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats); + formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL); + if (!formats) + return -ENOMEM; + + /* linear formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_v1); + } else { + limits = fimc_4210_limits_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_v2); + } + for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) { + formats[i].fourcc = fimc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = limits; + formats[i].num_limits = num_limits; + } - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_err(dev, "syscon regmap lookup failed.\n"); - return PTR_ERR(ctx->sysreg); + /* tiled formats */ + if (ctx->id < 3) { + limits = fimc_4210_limits_tiled_v1; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1); + } else { + limits = fimc_4210_limits_tiled_v2; + num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2); } + for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) { + formats[j].fourcc = fimc_tiled_formats[i]; + formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE; + formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[j].limits = limits; + formats[j].num_limits = num_limits; + } + + ctx->formats = formats; + ctx->num_formats = num_formats; /* resource memory */ ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev) return -ENOENT; } - ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler, - IRQF_ONESHOT, "drm_fimc", ctx); + ret = devm_request_irq(dev, res->start, fimc_irq_handler, + 0, dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev) if (ret < 0) return ret; - ippdrv = &ctx->ippdrv; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops; - ippdrv->check_property = fimc_ippdrv_check_property; - ippdrv->reset = fimc_ippdrv_reset; - ippdrv->start = fimc_ippdrv_start; - ippdrv->stop = fimc_ippdrv_stop; - ret = fimc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_put_clk; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - spin_lock_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm fimc device.\n"); + ret = component_add(dev, &fimc_component_ops); + if (ret) goto err_pm_dis; - } dev_info(dev, "drm fimc registered successfully.\n"); return 0; err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); -err_put_clk: fimc_put_clocks(ctx); return ret; @@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct fimc_context *ctx = get_fimc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &fimc_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); fimc_put_clocks(ctx); - pm_runtime_set_suspended(dev); - pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); - clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); - clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]); - ctx->suspended = true; - } - - return 0; -} - static int fimc_runtime_suspend(struct device *dev) { struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, false); + clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]); + return 0; } static int fimc_runtime_resume(struct device *dev) @@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev) struct fimc_context *ctx = get_fimc_context(dev); DRM_DEBUG_KMS("id[%d]\n", ctx->id); - - return fimc_clk_ctrl(ctx, true); + return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]); } #endif @@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = { .pm = &fimc_pm_ops, }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h deleted file mode 100644 index 127a424..0000000 --- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_FIMC_H_ -#define _EXYNOS_DRM_FIMC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_FIMC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c index d42ae2b..01b1570 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c +++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c @@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = { .has_limited_fmt = 1, }; +static struct fimd_driver_data s5pv210_fimd_driver_data = { + .timing_base = 0x0, + .has_shadowcon = 1, + .has_clksel = 1, +}; + static struct fimd_driver_data exynos3_fimd_driver_data = { .timing_base = 0x20000, .lcdblk_offset = 0x210, @@ -193,6 +199,8 @@ struct fimd_context { static const struct of_device_id fimd_driver_dt_match[] = { { .compatible = "samsung,s3c6400-fimd", .data = &s3c64xx_fimd_driver_data }, + { .compatible = "samsung,s5pv210-fimd", + .data = &s5pv210_fimd_driver_data }, { .compatible = "samsung,exynos3250-fimd", .data = &exynos3_fimd_driver_data }, { .compatible = "samsung,exynos4210-fimd", diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index 11cc01b..6e1494f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, return 0; } -int exynos_drm_gem_fault(struct vm_fault *vmf) +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj); unsigned long pfn; pgoff_t page_offset; - int ret; page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) { DRM_ERROR("invalid page offset\n"); - ret = -EINVAL; - goto out; + return VM_FAULT_SIGBUS; } pfn = page_to_pfn(exynos_gem->pages[page_offset]); - ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); - -out: - switch (ret) { - case 0: - case -ERESTARTSYS: - case -EINTR: - return VM_FAULT_NOPAGE; - case -ENOMEM: - return VM_FAULT_OOM; - default: - return VM_FAULT_SIGBUS; - } + return vmf_insert_mixed(vma, vmf->address, + __pfn_to_pfn_t(pfn, PFN_DEV)); } static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h index 5a4c7de..9057d7f 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.h +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h @@ -13,6 +13,7 @@ #define _EXYNOS_DRM_GEM_H_ #include <drm/drm_gem.h> +#include <linux/mm_types.h> #define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base) @@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv, struct drm_mode_create_dumb *args); /* page fault handler and mmap fault address(virtual) to physical memory. */ -int exynos_drm_gem_fault(struct vm_fault *vmf); +vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf); /* set vm_flags and we can change the vm attribute to other one at here. */ int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c index 0506b2b..e99dd1e 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c @@ -12,18 +12,20 @@ * */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/platform_device.h> #include <linux/clk.h> #include <linux/pm_runtime.h> #include <linux/mfd/syscon.h> +#include <linux/of_device.h> #include <linux/regmap.h> #include <drm/drmP.h> #include <drm/exynos_drm.h> #include "regs-gsc.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" -#include "exynos_drm_gsc.h" /* * GSC stands for General SCaler and @@ -31,26 +33,10 @@ * input DMA reads image data from the memory. * output DMA writes image data to memory. * GSC supports image rotation and image effect functions. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> GSC H/W ----> Memory. - * Writeback operation : supports cloned screen with FIMD. - * FIMD ----> GSC H/W ----> Memory. - * Output operation : supports direct display using local path. - * Memory ----> GSC H/W ----> FIMD, Mixer. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. added check_prepare api for right register. - * 5. need to add supported list in prop_list. - * 6. check prescaler/scaler optimization. - */ -#define GSC_MAX_DEVS 4 +#define GSC_MAX_CLOCKS 8 #define GSC_MAX_SRC 4 #define GSC_MAX_DST 16 #define GSC_RESET_TIMEOUT 50 @@ -65,8 +51,6 @@ #define GSC_SC_DOWN_RATIO_4_8 131072 #define GSC_SC_DOWN_RATIO_3_8 174762 #define GSC_SC_DOWN_RATIO_2_8 262144 -#define GSC_REFRESH_MIN 12 -#define GSC_REFRESH_MAX 60 #define GSC_CROP_MAX 8192 #define GSC_CROP_MIN 32 #define GSC_SCALE_MAX 4224 @@ -77,10 +61,9 @@ #define GSC_COEF_H_8T 8 #define GSC_COEF_V_4T 4 #define GSC_COEF_DEPTH 3 +#define GSC_AUTOSUSPEND_DELAY 2000 #define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct gsc_context, ippdrv); #define gsc_read(offset) readl(ctx->regs + (offset)) #define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset)) @@ -104,50 +87,47 @@ struct gsc_scaler { }; /* - * A structure of scaler capability. - * - * find user manual 49.2 features. - * @tile_w: tile mode or rotation width. - * @tile_h: tile mode or rotation height. - * @w: other cases width. - * @h: other cases height. - */ -struct gsc_capability { - /* tile or rotation */ - u32 tile_w; - u32 tile_h; - /* other cases */ - u32 w; - u32 h; -}; - -/* * A structure of gsc context. * - * @ippdrv: prepare initialization using ippdrv. * @regs_res: register resources. * @regs: memory mapped io registers. - * @sysreg: handle to SYSREG block regmap. - * @lock: locking of operations. * @gsc_clk: gsc gate clock. * @sc: scaler infomations. * @id: gsc id. * @irq: irq number. * @rotation: supports rotation of src. - * @suspended: qos operations. */ struct gsc_context { - struct exynos_drm_ippdrv ippdrv; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + struct exynos_drm_ipp_task *task; + struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct resource *regs_res; void __iomem *regs; - struct regmap *sysreg; - struct mutex lock; - struct clk *gsc_clk; + const char **clk_names; + struct clk *clocks[GSC_MAX_CLOCKS]; + int num_clocks; struct gsc_scaler sc; int id; int irq; bool rotation; - bool suspended; +}; + +/** + * struct gsc_driverdata - per device type driver data for init time. + * + * @limits: picture size limits array + * @clk_names: names of clocks needed by this variant + * @num_clocks: the number of clocks needed by this variant + */ +struct gsc_driverdata { + const struct drm_exynos_ipp_limit *limits; + int num_limits; + const char *clk_names[GSC_MAX_CLOCKS]; + int num_clocks; }; /* 8-tap Filter Coefficient */ @@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx) return 0; } -static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable) -{ - unsigned int gscblk_cfg; - - if (!ctx->sysreg) - return; - - regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg); - - if (enable) - gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) | - GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) | - GSC_BLK_SW_RESET_WB_DEST(ctx->id); - else - gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id); - - regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg); -} - static void gsc_handle_irq(struct gsc_context *ctx, bool enable, bool overflow, bool done) { @@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable, } -static int gsc_src_set_fmt(struct device *dev, u32 fmt) +static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= GSC_IN_RGB565; break; case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: cfg |= GSC_IN_XRGB8888; break; case DRM_FORMAT_BGRX8888: @@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_IN_CHROMA_ORDER_CBCR | GSC_IN_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) +static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; + unsigned int degree = rotation & DRM_MODE_ROTATE_MASK; u32 cfg; - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_ROT_MASK; switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + case DRM_MODE_ROTATE_0: + if (rotation & DRM_MODE_REFLECT_Y) cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; + case DRM_MODE_ROTATE_90: + cfg |= GSC_IN_ROT_90; + if (rotation & DRM_MODE_REFLECT_Y) + cfg |= GSC_IN_ROT_XFLIP; + if (rotation & DRM_MODE_REFLECT_X) + cfg |= GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_180: + case DRM_MODE_ROTATE_180: cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - case EXYNOS_DRM_DEGREE_270: + case DRM_MODE_ROTATE_270: cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) + if (rotation & DRM_MODE_REFLECT_Y) cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) + if (rotation & DRM_MODE_REFLECT_X) cfg &= ~GSC_IN_ROT_YFLIP; break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; } gsc_write(cfg, GSC_IN_CON); ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } -static int gsc_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_src_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) | - GSC_SRCIMG_OFFSET_Y(img_pos.y)); + cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) | + GSC_SRCIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_SRCIMG_OFFSET); /* cropped size */ - cfg = (GSC_CROPPED_WIDTH(img_pos.w) | - GSC_CROPPED_HEIGHT(img_pos.h)); + cfg = (GSC_CROPPED_WIDTH(buf->rect.w) | + GSC_CROPPED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_CROPPED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_SRCIMG_SIZE); cfg &= ~(GSC_SRCIMG_HEIGHT_MASK | GSC_SRCIMG_WIDTH_MASK); - cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) | - GSC_SRCIMG_HEIGHT(sz->vsize)); + cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) | + GSC_SRCIMG_HEIGHT(buf->buf.height)); gsc_write(cfg, GSC_SRCIMG_SIZE); cfg = gsc_read(GSC_IN_CON); cfg &= ~GSC_IN_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_IN_RGB_HD_WIDE; else @@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap, cfg |= GSC_IN_RGB_SD_NARROW; gsc_write(cfg, GSC_IN_CON); - - return 0; } -static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - /* mask register set */ cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - return -EINVAL; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK); gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK); - - return 0; } -static int gsc_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_SRC) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_IN_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id)); - return gsc_src_set_buf_seq(ctx, buf_id, buf_type); + gsc_src_set_buf_seq(ctx, buf_id, true); } -static struct exynos_drm_ipp_ops gsc_src_ops = { - .set_fmt = gsc_src_set_fmt, - .set_transf = gsc_src_set_transf, - .set_size = gsc_src_set_size, - .set_addr = gsc_src_set_addr, -}; - -static int gsc_dst_set_fmt(struct device *dev, u32 fmt) +static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; DRM_DEBUG_KMS("fmt[0x%x]\n", fmt); @@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_RGB565: cfg |= GSC_OUT_RGB565; break; + case DRM_FORMAT_ARGB8888: case DRM_FORMAT_XRGB8888: - cfg |= GSC_OUT_XRGB8888; + cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff)); break; case DRM_FORMAT_BGRX8888: cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP); @@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt) cfg |= (GSC_OUT_CHROMA_ORDER_CBCR | GSC_OUT_YUV420_2P); break; - default: - dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt); - return -EINVAL; } gsc_write(cfg, GSC_OUT_CON); - - return 0; -} - -static int gsc_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - u32 cfg; - - DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip); - - cfg = gsc_read(GSC_IN_CON); - cfg &= ~GSC_IN_ROT_MASK; - - switch (degree) { - case EXYNOS_DRM_DEGREE_0: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_90: - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg |= GSC_IN_ROT_90_XFLIP; - else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg |= GSC_IN_ROT_90_YFLIP; - else - cfg |= GSC_IN_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: - cfg |= GSC_IN_ROT_180; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - case EXYNOS_DRM_DEGREE_270: - cfg |= GSC_IN_ROT_270; - if (flip & EXYNOS_DRM_FLIP_VERTICAL) - cfg &= ~GSC_IN_ROT_XFLIP; - if (flip & EXYNOS_DRM_FLIP_HORIZONTAL) - cfg &= ~GSC_IN_ROT_YFLIP; - break; - default: - dev_err(ippdrv->dev, "invalid degree value %d.\n", degree); - return -EINVAL; - } - - gsc_write(cfg, GSC_IN_CON); - - ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0; - *swap = ctx->rotation; - - return 0; } static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio) @@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor) } static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, - struct drm_exynos_pos *src, struct drm_exynos_pos *dst) + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; u32 cfg; u32 src_w, src_h, dst_w, dst_h; int ret = 0; @@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc, ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio horizontal.\n"); + dev_err(ctx->dev, "failed to get ratio horizontal.\n"); return ret; } ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio); if (ret) { - dev_err(ippdrv->dev, "failed to get ratio vertical.\n"); + dev_err(ctx->dev, "failed to get ratio vertical.\n"); return ret; } @@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc) gsc_write(cfg, GSC_MAIN_V_RATIO); } -static int gsc_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, struct drm_exynos_sz *sz) +static void gsc_dst_set_size(struct gsc_context *ctx, + struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_pos img_pos = *pos; struct gsc_scaler *sc = &ctx->sc; u32 cfg; - DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n", - swap, pos->x, pos->y, pos->w, pos->h); - - if (swap) { - img_pos.w = pos->h; - img_pos.h = pos->w; - } - /* pixel offset */ - cfg = (GSC_DSTIMG_OFFSET_X(pos->x) | - GSC_DSTIMG_OFFSET_Y(pos->y)); + cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) | + GSC_DSTIMG_OFFSET_Y(buf->rect.y)); gsc_write(cfg, GSC_DSTIMG_OFFSET); /* scaled size */ - cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h)); + if (ctx->rotation) + cfg = (GSC_SCALED_WIDTH(buf->rect.h) | + GSC_SCALED_HEIGHT(buf->rect.w)); + else + cfg = (GSC_SCALED_WIDTH(buf->rect.w) | + GSC_SCALED_HEIGHT(buf->rect.h)); gsc_write(cfg, GSC_SCALED_SIZE); - DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize); - /* original size */ cfg = gsc_read(GSC_DSTIMG_SIZE); - cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | - GSC_DSTIMG_WIDTH_MASK); - cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) | - GSC_DSTIMG_HEIGHT(sz->vsize)); + cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK); + cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) | + GSC_DSTIMG_HEIGHT(buf->buf.height); gsc_write(cfg, GSC_DSTIMG_SIZE); cfg = gsc_read(GSC_OUT_CON); cfg &= ~GSC_OUT_RGB_TYPE_MASK; - DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range); - - if (pos->w >= GSC_WIDTH_ITU_709) + if (buf->rect.w >= GSC_WIDTH_ITU_709) if (sc->range) cfg |= GSC_OUT_RGB_HD_WIDE; else @@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap, cfg |= GSC_OUT_RGB_SD_NARROW; gsc_write(cfg, GSC_OUT_CON); - - return 0; } static int gsc_dst_get_buf_seq(struct gsc_context *ctx) @@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx) return buf_num; } -static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, + bool enqueue) { - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - bool masked; + bool masked = !enqueue; u32 cfg; u32 mask = 0x00000001 << buf_id; - int ret = 0; - - DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type); - - mutex_lock(&ctx->lock); /* mask register set */ cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK); - switch (buf_type) { - case IPP_BUF_ENQUEUE: - masked = false; - break; - case IPP_BUF_DEQUEUE: - masked = true; - break; - default: - dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n"); - ret = -EINVAL; - goto err_unlock; - } - /* sequence id */ cfg &= ~mask; cfg |= masked << buf_id; @@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id, gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK); /* interrupt enable */ - if (buf_type == IPP_BUF_ENQUEUE && - gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) + if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START) gsc_handle_irq(ctx, true, false, true); /* interrupt disable */ - if (buf_type == IPP_BUF_DEQUEUE && - gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) + if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP) gsc_handle_irq(ctx, false, false, true); - -err_unlock: - mutex_unlock(&ctx->lock); - return ret; } -static int gsc_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id, - enum drm_exynos_ipp_buf_type buf_type) +static void gsc_dst_set_addr(struct gsc_context *ctx, + u32 buf_id, struct exynos_drm_ipp_buffer *buf) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EFAULT; - } - - property = &c_node->property; - - DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n", - property->prop_id, buf_id, buf_type); - - if (buf_id > GSC_MAX_DST) { - dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id); - return -EINVAL; - } - /* address register set */ - switch (buf_type) { - case IPP_BUF_ENQUEUE: - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y], - GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB], - GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR], - GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - case IPP_BUF_DEQUEUE: - gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id)); - gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id)); - break; - default: - /* bypass */ - break; - } + gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id)); + gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id)); + gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id)); - return gsc_dst_set_buf_seq(ctx, buf_id, buf_type); -} - -static struct exynos_drm_ipp_ops gsc_dst_ops = { - .set_fmt = gsc_dst_set_fmt, - .set_transf = gsc_dst_set_transf, - .set_size = gsc_dst_set_size, - .set_addr = gsc_dst_set_addr, -}; - -static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable) -{ - DRM_DEBUG_KMS("enable[%d]\n", enable); - - if (enable) { - clk_prepare_enable(ctx->gsc_clk); - ctx->suspended = false; - } else { - clk_disable_unprepare(ctx->gsc_clk); - ctx->suspended = true; - } - - return 0; + gsc_dst_set_buf_seq(ctx, buf_id, true); } static int gsc_get_src_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_SRC; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx) } } + DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, + curr_index, buf_id); + if (buf_id == GSC_MAX_SRC) { DRM_ERROR("failed to get in buffer index.\n"); return -EINVAL; } - ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } - - DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, - curr_index, buf_id); + gsc_src_set_buf_seq(ctx, buf_id, false); return buf_id; } @@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) { u32 cfg, curr_index, i; u32 buf_id = GSC_MAX_DST; - int ret; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); @@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) return -EINVAL; } - ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE); - if (ret < 0) { - DRM_ERROR("failed to dequeue.\n"); - return ret; - } + gsc_dst_set_buf_seq(ctx, buf_id, false); DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg, curr_index, buf_id); @@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx) static irqreturn_t gsc_irq_handler(int irq, void *dev_id) { struct gsc_context *ctx = dev_id; - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = - c_node->event_work; u32 status; - int buf_id[EXYNOS_DRM_OPS_MAX]; + int err = 0; DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id); status = gsc_read(GSC_IRQ); if (status & GSC_IRQ_STATUS_OR_IRQ) { - dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n", + dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n", ctx->id, status); - return IRQ_NONE; + err = -EINVAL; } if (status & GSC_IRQ_STATUS_OR_FRM_DONE) { - dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n", - ctx->id, status); - - buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_SRC] < 0) - return IRQ_HANDLED; - - buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx); - if (buf_id[EXYNOS_DRM_OPS_DST] < 0) - return IRQ_HANDLED; - - DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", - buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]); - - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_SRC] = - buf_id[EXYNOS_DRM_OPS_SRC]; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } - - return IRQ_HANDLED; -} - -static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->writeback = 1; - prop_list->refresh_min = GSC_REFRESH_MIN; - prop_list->refresh_max = GSC_REFRESH_MAX; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 1; - prop_list->crop = 1; - prop_list->crop_max.hsize = GSC_CROP_MAX; - prop_list->crop_max.vsize = GSC_CROP_MAX; - prop_list->crop_min.hsize = GSC_CROP_MIN; - prop_list->crop_min.vsize = GSC_CROP_MIN; - prop_list->scale = 1; - prop_list->scale_max.hsize = GSC_SCALE_MAX; - prop_list->scale_max.vsize = GSC_SCALE_MAX; - prop_list->scale_min.hsize = GSC_SCALE_MIN; - prop_list->scale_min.vsize = GSC_SCALE_MIN; - - return 0; -} - -static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} - -static int gsc_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) -{ - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos *pos; - struct drm_exynos_sz *sz; - bool swap; - int i; - - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + int src_buf_id, dst_buf_id; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - /* check for flip */ - if (!gsc_check_drm_flip(config->flip)) { - DRM_ERROR("invalid flip.\n"); - goto err_property; - } - - /* check for degree */ - switch (config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - break; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - swap = false; - break; - default: - DRM_ERROR("invalid degree.\n"); - goto err_property; - } + dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n", + ctx->id, status); - /* check for buffer bound */ - if ((pos->x + pos->w > sz->hsize) || - (pos->y + pos->h > sz->vsize)) { - DRM_ERROR("out of buf bound.\n"); - goto err_property; - } + src_buf_id = gsc_get_src_buf_index(ctx); + dst_buf_id = gsc_get_dst_buf_index(ctx); - /* check for crop */ - if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) { - if (swap) { - if ((pos->h < pp->crop_min.hsize) || - (sz->vsize > pp->crop_max.hsize) || - (pos->w < pp->crop_min.vsize) || - (sz->hsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->crop_min.hsize) || - (sz->hsize > pp->crop_max.hsize) || - (pos->h < pp->crop_min.vsize) || - (sz->vsize > pp->crop_max.vsize)) { - DRM_ERROR("out of crop size.\n"); - goto err_property; - } - } - } + DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id, + dst_buf_id); - /* check for scale */ - if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) { - if (swap) { - if ((pos->h < pp->scale_min.hsize) || - (sz->vsize > pp->scale_max.hsize) || - (pos->w < pp->scale_min.vsize) || - (sz->hsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } else { - if ((pos->w < pp->scale_min.hsize) || - (sz->hsize > pp->scale_max.hsize) || - (pos->h < pp->scale_min.vsize) || - (sz->vsize > pp->scale_max.vsize)) { - DRM_ERROR("out of scale size.\n"); - goto err_property; - } - } - } + if (src_buf_id < 0 || dst_buf_id < 0) + err = -EINVAL; } - return 0; - -err_property: - for_each_ipp_ops(i) { - if ((i == EXYNOS_DRM_OPS_SRC) && - (property->cmd == IPP_CMD_WB)) - continue; + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - config = &property->config[i]; - pos = &config->pos; - sz = &config->sz; - - DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n", - i ? "dst" : "src", config->flip, config->degree, - pos->x, pos->y, pos->w, pos->h, - sz->hsize, sz->vsize); + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, err); } - return -EINVAL; + return IRQ_HANDLED; } - -static int gsc_ippdrv_reset(struct device *dev) +static int gsc_reset(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); struct gsc_scaler *sc = &ctx->sc; int ret; /* reset h/w block */ ret = gsc_sw_reset(ctx); if (ret < 0) { - dev_err(dev, "failed to reset hardware.\n"); + dev_err(ctx->dev, "failed to reset hardware.\n"); return ret; } @@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev) return 0; } -static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_start(struct gsc_context *ctx) { - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_property *property; - struct drm_exynos_ipp_config *config; - struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX]; - struct drm_exynos_ipp_set_wb set_wb; u32 cfg; - int ret, i; - - DRM_DEBUG_KMS("cmd[%d]\n", cmd); - - if (!c_node) { - DRM_ERROR("failed to get c_node.\n"); - return -EINVAL; - } - - property = &c_node->property; gsc_handle_irq(ctx, true, false, true); - for_each_ipp_ops(i) { - config = &property->config[i]; - img_pos[i] = config->pos; - } + /* enable one shot */ + cfg = gsc_read(GSC_ENABLE); + cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | + GSC_ENABLE_CLK_GATE_MODE_MASK); + cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; + gsc_write(cfg, GSC_ENABLE); - switch (cmd) { - case IPP_CMD_M2M: - /* enable one shot */ - cfg = gsc_read(GSC_ENABLE); - cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK | - GSC_ENABLE_CLK_GATE_MODE_MASK); - cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT; - gsc_write(cfg, GSC_ENABLE); - - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_WB: - set_wb.enable = 1; - set_wb.refresh = property->refresh_rate; - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - - /* src local path */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB); - gsc_write(cfg, GSC_IN_CON); - - /* dst dma memory */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - case IPP_CMD_OUTPUT: - /* src dma memory */ - cfg = gsc_read(GSC_IN_CON); - cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); - cfg |= GSC_IN_PATH_MEMORY; - gsc_write(cfg, GSC_IN_CON); - - /* dst local path */ - cfg = gsc_read(GSC_OUT_CON); - cfg |= GSC_OUT_PATH_MEMORY; - gsc_write(cfg, GSC_OUT_CON); - break; - default: - ret = -EINVAL; - dev_err(dev, "invalid operations.\n"); - return ret; - } + /* src dma memory */ + cfg = gsc_read(GSC_IN_CON); + cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK); + cfg |= GSC_IN_PATH_MEMORY; + gsc_write(cfg, GSC_IN_CON); - ret = gsc_set_prescaler(ctx, &ctx->sc, - &img_pos[EXYNOS_DRM_OPS_SRC], - &img_pos[EXYNOS_DRM_OPS_DST]); - if (ret) { - dev_err(dev, "failed to set prescaler.\n"); - return ret; - } + /* dst dma memory */ + cfg = gsc_read(GSC_OUT_CON); + cfg |= GSC_OUT_PATH_MEMORY; + gsc_write(cfg, GSC_OUT_CON); gsc_set_scaler(ctx, &ctx->sc); cfg = gsc_read(GSC_ENABLE); cfg |= GSC_ENABLE_ON; gsc_write(cfg, GSC_ENABLE); +} + +static int gsc_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp); + int ret; + + pm_runtime_get_sync(ctx->dev); + ctx->task = task; + + ret = gsc_reset(ctx); + if (ret) { + pm_runtime_put_autosuspend(ctx->dev); + ctx->task = NULL; + return ret; + } + + gsc_src_set_fmt(ctx, task->src.buf.fourcc); + gsc_src_set_transf(ctx, task->transform.rotation); + gsc_src_set_size(ctx, &task->src); + gsc_src_set_addr(ctx, 0, &task->src); + gsc_dst_set_fmt(ctx, task->dst.buf.fourcc); + gsc_dst_set_size(ctx, &task->dst); + gsc_dst_set_addr(ctx, 0, &task->dst); + gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect); + gsc_start(ctx); return 0; } -static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void gsc_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) { - struct gsc_context *ctx = get_gsc_context(dev); - struct drm_exynos_ipp_set_wb set_wb = {0, 0}; - u32 cfg; + struct gsc_context *ctx = + container_of(ipp, struct gsc_context, ipp); - DRM_DEBUG_KMS("cmd[%d]\n", cmd); + gsc_reset(ctx); + if (ctx->task) { + struct exynos_drm_ipp_task *task = ctx->task; - switch (cmd) { - case IPP_CMD_M2M: - /* bypass */ - break; - case IPP_CMD_WB: - gsc_set_gscblk_fimd_wb(ctx, set_wb.enable); - exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb); - break; - case IPP_CMD_OUTPUT: - default: - dev_err(dev, "invalid operations.\n"); - break; + ctx->task = NULL; + pm_runtime_mark_last_busy(ctx->dev); + pm_runtime_put_autosuspend(ctx->dev); + exynos_drm_ipp_task_done(task, -EIO); } +} - gsc_handle_irq(ctx, false, false, true); +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = gsc_commit, + .abort = gsc_abort, +}; - /* reset sequence */ - gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK); - gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK); +static int gsc_bind(struct device *dev, struct device *master, void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; - cfg = gsc_read(GSC_ENABLE); - cfg &= ~GSC_ENABLE_ON; - gsc_write(cfg, GSC_ENABLE); + ctx->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + ctx->formats, ctx->num_formats, "gsc"); + + dev_info(dev, "The exynos gscaler has been probed successfully\n"); + + return 0; +} + +static void gsc_unbind(struct device *dev, struct device *master, + void *data) +{ + struct gsc_context *ctx = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &ctx->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(drm_dev, dev); } +static const struct component_ops gsc_component_ops = { + .bind = gsc_bind, + .unbind = gsc_unbind, +}; + +static const unsigned int gsc_formats[] = { + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888, + DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61, + DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU, + DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422, +}; + static int gsc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct gsc_driverdata *driver_data; + struct exynos_drm_ipp_formats *formats; struct gsc_context *ctx; struct resource *res; - struct exynos_drm_ippdrv *ippdrv; - int ret; + int ret, i; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); if (!ctx) return -ENOMEM; - if (dev->of_node) { - ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node, - "samsung,sysreg"); - if (IS_ERR(ctx->sysreg)) { - dev_warn(dev, "failed to get system register.\n"); - ctx->sysreg = NULL; - } + formats = devm_kzalloc(dev, sizeof(*formats) * + (ARRAY_SIZE(gsc_formats)), GFP_KERNEL); + if (!formats) + return -ENOMEM; + + driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev); + ctx->dev = dev; + ctx->num_clocks = driver_data->num_clocks; + ctx->clk_names = driver_data->clk_names; + + for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) { + formats[i].fourcc = gsc_formats[i]; + formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE | + DRM_EXYNOS_IPP_FORMAT_DESTINATION; + formats[i].limits = driver_data->limits; + formats[i].num_limits = driver_data->num_limits; } + ctx->formats = formats; + ctx->num_formats = ARRAY_SIZE(gsc_formats); /* clock control */ - ctx->gsc_clk = devm_clk_get(dev, "gscl"); - if (IS_ERR(ctx->gsc_clk)) { - dev_err(dev, "failed to get gsc clock.\n"); - return PTR_ERR(ctx->gsc_clk); + for (i = 0; i < ctx->num_clocks; i++) { + ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]); + if (IS_ERR(ctx->clocks[i])) { + dev_err(dev, "failed to get clock: %s\n", + ctx->clk_names[i]); + return PTR_ERR(ctx->clocks[i]); + } } /* resource memory */ @@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev) } ctx->irq = res->start; - ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler, - IRQF_ONESHOT, "drm_gsc", ctx); + ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0, + dev_name(dev), ctx); if (ret < 0) { dev_err(dev, "failed to request irq.\n"); return ret; @@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev) /* context initailization */ ctx->id = pdev->id; - ippdrv = &ctx->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops; - ippdrv->check_property = gsc_ippdrv_check_property; - ippdrv->reset = gsc_ippdrv_reset; - ippdrv->start = gsc_ippdrv_start; - ippdrv->stop = gsc_ippdrv_stop; - ret = gsc_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - return ret; - } - - DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv); - - mutex_init(&ctx->lock); platform_set_drvdata(pdev, ctx); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm gsc device.\n"); - goto err_ippdrv_register; - } + ret = component_add(dev, &gsc_component_ops); + if (ret) + goto err_pm_dis; dev_info(dev, "drm gsc registered successfully.\n"); return 0; -err_ippdrv_register: +err_pm_dis: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -1748,13 +1279,8 @@ err_ippdrv_register: static int gsc_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct gsc_context *ctx = get_gsc_context(dev); - struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv; - exynos_drm_ippdrv_unregister(ippdrv); - mutex_destroy(&ctx->lock); - - pm_runtime_set_suspended(dev); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; @@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev) static int __maybe_unused gsc_runtime_suspend(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, false); + for (i = ctx->num_clocks - 1; i >= 0; i--) + clk_disable_unprepare(ctx->clocks[i]); + + return 0; } static int __maybe_unused gsc_runtime_resume(struct device *dev) { struct gsc_context *ctx = get_gsc_context(dev); + int i, ret; DRM_DEBUG_KMS("id[%d]\n", ctx->id); - return gsc_clk_ctrl(ctx, true); + for (i = 0; i < ctx->num_clocks; i++) { + ret = clk_prepare_enable(ctx->clocks[i]); + if (ret) { + while (--i > 0) + clk_disable_unprepare(ctx->clocks[i]); + return ret; + } + } + return 0; } static const struct dev_pm_ops gsc_pm_ops = { @@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = { SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL) }; +static const struct drm_exynos_ipp_limit gsc_5250_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5420_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static const struct drm_exynos_ipp_limit gsc_5433_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) }, + { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) }, + { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) }, + { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 }, + .v = { (1 << 16) / 16, (1 << 16) * 8 }) }, +}; + +static struct gsc_driverdata gsc_exynos5250_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5250_limits, + .num_limits = ARRAY_SIZE(gsc_5250_limits), +}; + +static struct gsc_driverdata gsc_exynos5420_drvdata = { + .clk_names = {"gscl"}, + .num_clocks = 1, + .limits = gsc_5420_limits, + .num_limits = ARRAY_SIZE(gsc_5420_limits), +}; + +static struct gsc_driverdata gsc_exynos5433_drvdata = { + .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"}, + .num_clocks = 4, + .limits = gsc_5433_limits, + .num_limits = ARRAY_SIZE(gsc_5433_limits), +}; + static const struct of_device_id exynos_drm_gsc_of_match[] = { - { .compatible = "samsung,exynos5-gsc" }, - { }, + { + .compatible = "samsung,exynos5-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5250-gsc", + .data = &gsc_exynos5250_drvdata, + }, { + .compatible = "samsung,exynos5420-gsc", + .data = &gsc_exynos5420_drvdata, + }, { + .compatible = "samsung,exynos5433-gsc", + .data = &gsc_exynos5433_drvdata, + }, { + }, }; MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match); @@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = { .of_match_table = of_match_ptr(exynos_drm_gsc_of_match), }, }; - diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h deleted file mode 100644 index 29ec1c5..0000000 --- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (c) 2012 Samsung Electronics Co., Ltd. - * - * Authors: - * Eunchul Kim <chulspro.kim@samsung.com> - * Jinyoung Jeon <jy0.jeon@samsung.com> - * Sangmin Lee <lsmin.lee@samsung.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#ifndef _EXYNOS_DRM_GSC_H_ -#define _EXYNOS_DRM_GSC_H_ - -/* - * TODO - * FIMD output interface notifier callback. - * Mixer output interface notifier callback. - */ - -#endif /* _EXYNOS_DRM_GSC_H_ */ diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c new file mode 100644 index 0000000..26374e5 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c @@ -0,0 +1,916 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Authors: + * Marek Szyprowski <m.szyprowski@samsung.com> + * + * Exynos DRM Image Post Processing (IPP) related functions + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + */ + + +#include <drm/drmP.h> +#include <drm/drm_mode.h> +#include <uapi/drm/exynos_drm.h> + +#include "exynos_drm_drv.h" +#include "exynos_drm_gem.h" +#include "exynos_drm_ipp.h" + +static int num_ipp; +static LIST_HEAD(ipp_list); + +/** + * exynos_drm_ipp_register - Register a new picture processor hardware module + * @dev: DRM device + * @ipp: ipp module to init + * @funcs: callbacks for the new ipp object + * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*) + * @formats: array of supported formats + * @num_formats: size of the supported formats array + * @name: name (for debugging purposes) + * + * Initializes a ipp module. + * + * Returns: + * Zero on success, error code on failure. + */ +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name) +{ + WARN_ON(!ipp); + WARN_ON(!funcs); + WARN_ON(!formats); + WARN_ON(!num_formats); + + spin_lock_init(&ipp->lock); + INIT_LIST_HEAD(&ipp->todo_list); + init_waitqueue_head(&ipp->done_wq); + ipp->dev = dev; + ipp->funcs = funcs; + ipp->capabilities = caps; + ipp->name = name; + ipp->formats = formats; + ipp->num_formats = num_formats; + + /* ipp_list modification is serialized by component framework */ + list_add_tail(&ipp->head, &ipp_list); + ipp->id = num_ipp++; + + DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id); + + return 0; +} + +/** + * exynos_drm_ipp_unregister - Unregister the picture processor module + * @dev: DRM device + * @ipp: ipp module + */ +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp) +{ + WARN_ON(ipp->task); + WARN_ON(!list_empty(&ipp->todo_list)); + list_del(&ipp->head); +} + +/** + * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a list of ipp ids. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + struct exynos_drm_ipp *ipp; + uint32_t __user *ipp_ptr = (uint32_t __user *) + (unsigned long)resp->ipp_id_ptr; + unsigned int count = num_ipp, copied = 0; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (count && resp->count_ipps >= count) { + list_for_each_entry(ipp, &ipp_list, head) { + if (put_user(ipp->id, ipp_ptr + copied)) + return -EFAULT; + copied++; + } + } + resp->count_ipps = count; + + return 0; +} + +static inline struct exynos_drm_ipp *__ipp_get(uint32_t id) +{ + struct exynos_drm_ipp *ipp; + + list_for_each_entry(ipp, &ipp_list, head) + if (ipp->id == id) + return ipp; + return NULL; +} + +/** + * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module capabilities. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_caps *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr; + struct exynos_drm_ipp *ipp; + int i; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + resp->ipp_id = ipp->id; + resp->capabilities = ipp->capabilities; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (resp->formats_count >= ipp->num_formats) { + for (i = 0; i < ipp->num_formats; i++) { + struct drm_exynos_ipp_format tmp = { + .fourcc = ipp->formats[i].fourcc, + .type = ipp->formats[i].type, + .modifier = ipp->formats[i].modifier, + }; + + if (copy_to_user(ptr, &tmp, sizeof(tmp))) + return -EFAULT; + ptr += sizeof(tmp); + } + } + resp->formats_count = ipp->num_formats; + + return 0; +} + +static inline const struct exynos_drm_ipp_formats *__ipp_format_get( + struct exynos_drm_ipp *ipp, uint32_t fourcc, + uint64_t mod, unsigned int type) +{ + int i; + + for (i = 0; i < ipp->num_formats; i++) { + if ((ipp->formats[i].type & type) && + ipp->formats[i].fourcc == fourcc && + ipp->formats[i].modifier == mod) + return &ipp->formats[i]; + } + return NULL; +} + +/** + * exynos_drm_ipp_get_limits_ioctl - get ipp module limits + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a structure describing ipp module limitations for provided + * picture format. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_limits *resp = data; + void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr; + const struct exynos_drm_ipp_formats *format; + struct exynos_drm_ipp *ipp; + + if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE && + resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION) + return -EINVAL; + + ipp = __ipp_get(resp->ipp_id); + if (!ipp) + return -ENOENT; + + format = __ipp_format_get(ipp, resp->fourcc, resp->modifier, + resp->type); + if (!format) + return -EINVAL; + + /* + * This ioctl is called twice, once to determine how much space is + * needed, and the 2nd time to fill it. + */ + if (format->num_limits && resp->limits_count >= format->num_limits) + if (copy_to_user((void __user *)ptr, format->limits, + sizeof(*format->limits) * format->num_limits)) + return -EFAULT; + resp->limits_count = format->num_limits; + + return 0; +} + +struct drm_pending_exynos_ipp_event { + struct drm_pending_event base; + struct drm_exynos_ipp_event event; +}; + +static inline struct exynos_drm_ipp_task * + exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + + task = kzalloc(sizeof(*task), GFP_KERNEL); + if (!task) + return NULL; + + task->dev = ipp->dev; + task->ipp = ipp; + + /* some defaults */ + task->src.rect.w = task->dst.rect.w = UINT_MAX; + task->src.rect.h = task->dst.rect.h = UINT_MAX; + task->transform.rotation = DRM_MODE_ROTATE_0; + + DRM_DEBUG_DRIVER("Allocated task %pK\n", task); + + return task; +} + +static const struct exynos_drm_param_map { + unsigned int id; + unsigned int size; + unsigned int offset; +} exynos_drm_ipp_params_maps[] = { + { + DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, src.buf), + }, { + DRM_EXYNOS_IPP_TASK_BUFFER | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_buffer), + offsetof(struct exynos_drm_ipp_task, dst.buf), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, src.rect), + }, { + DRM_EXYNOS_IPP_TASK_RECTANGLE | + DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION, + sizeof(struct drm_exynos_ipp_task_rect), + offsetof(struct exynos_drm_ipp_task, dst.rect), + }, { + DRM_EXYNOS_IPP_TASK_TRANSFORM, + sizeof(struct drm_exynos_ipp_task_transform), + offsetof(struct exynos_drm_ipp_task, transform), + }, { + DRM_EXYNOS_IPP_TASK_ALPHA, + sizeof(struct drm_exynos_ipp_task_alpha), + offsetof(struct exynos_drm_ipp_task, alpha), + }, +}; + +static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task, + struct drm_exynos_ioctl_ipp_commit *arg) +{ + const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps; + void __user *params = (void __user *)(unsigned long)arg->params_ptr; + unsigned int size = arg->params_size; + uint32_t id; + int i; + + while (size) { + if (get_user(id, (uint32_t __user *)params)) + return -EFAULT; + + for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++) + if (map[i].id == id) + break; + if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) || + map[i].size > size) + return -EINVAL; + + if (copy_from_user((void *)task + map[i].offset, params, + map[i].size)) + return -EFAULT; + + params += map[i].size; + size -= map[i].size; + } + + DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task); + return 0; +} + +static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf, + struct drm_file *filp) +{ + int ret = 0; + int i; + + /* basic checks */ + if (buf->buf.width == 0 || buf->buf.height == 0) + return -EINVAL; + buf->format = drm_format_info(buf->buf.fourcc); + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int width = (i == 0) ? buf->buf.width : + DIV_ROUND_UP(buf->buf.width, buf->format->hsub); + + if (buf->buf.pitch[i] == 0) + buf->buf.pitch[i] = width * buf->format->cpp[i]; + if (buf->buf.pitch[i] < width * buf->format->cpp[i]) + return -EINVAL; + if (!buf->buf.gem_id[i]) + return -ENOENT; + } + + /* pitch for additional planes must match */ + if (buf->format->num_planes > 2 && + buf->buf.pitch[1] != buf->buf.pitch[2]) + return -EINVAL; + + /* get GEM buffers and check their size */ + for (i = 0; i < buf->format->num_planes; i++) { + unsigned int height = (i == 0) ? buf->buf.height : + DIV_ROUND_UP(buf->buf.height, buf->format->vsub); + unsigned long size = height * buf->buf.pitch[i]; + struct drm_gem_object *obj = drm_gem_object_lookup(filp, + buf->buf.gem_id[i]); + if (!obj) { + ret = -ENOENT; + goto gem_free; + } + buf->exynos_gem[i] = to_exynos_gem(obj); + + if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) { + i++; + ret = -EINVAL; + goto gem_free; + } + buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr + + buf->buf.offset[i]; + } + + return 0; +gem_free: + while (i--) { + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); + buf->exynos_gem[i] = NULL; + } + return ret; +} + +static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf) +{ + int i; + + if (!buf->exynos_gem[0]) + return; + for (i = 0; i < buf->format->num_planes; i++) + drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base); +} + +static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + DRM_DEBUG_DRIVER("Freeing task %pK\n", task); + + exynos_drm_ipp_task_release_buf(&task->src); + exynos_drm_ipp_task_release_buf(&task->dst); + if (task->event) + drm_event_cancel_free(ipp->dev, &task->event->base); + kfree(task); +} + +struct drm_ipp_limit { + struct drm_exynos_ipp_limit_val h; + struct drm_exynos_ipp_limit_val v; +}; + +enum drm_ipp_size_id { + IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX +}; + +static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = { + [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, + [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED, + DRM_EXYNOS_IPP_LIMIT_SIZE_AREA, + DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER }, +}; + +static inline void __limit_set_val(unsigned int *ptr, unsigned int val) +{ + if (!*ptr) + *ptr = val; +} + +static void __get_size_limit(const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, enum drm_ipp_size_id id, + struct drm_ipp_limit *res) +{ + const struct drm_exynos_ipp_limit *l = limits; + int i = 0; + + memset(res, 0, sizeof(*res)); + for (i = 0; limit_id_fallback[id][i]; i++) + for (l = limits; l - limits < num_limits; l++) { + if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) != + DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) || + ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) != + limit_id_fallback[id][i])) + continue; + __limit_set_val(&res->h.min, l->h.min); + __limit_set_val(&res->h.max, l->h.max); + __limit_set_val(&res->h.align, l->h.align); + __limit_set_val(&res->v.min, l->v.min); + __limit_set_val(&res->v.max, l->v.max); + __limit_set_val(&res->v.align, l->v.align); + } +} + +static inline bool __align_check(unsigned int val, unsigned int align) +{ + if (align && (val & (align - 1))) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n", + val, align); + return false; + } + return true; +} + +static inline bool __size_limit_check(unsigned int val, + struct drm_exynos_ipp_limit_val *l) +{ + if ((l->min && val < l->min) || (l->max && val > l->max)) { + DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n", + val, l->min, l->max); + return false; + } + return __align_check(val, l->align); +} + +static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf, + const struct drm_exynos_ipp_limit *limits, unsigned int num_limits, + bool rotate, bool swap) +{ + enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA; + struct drm_ipp_limit l; + struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v; + + if (!limits) + return 0; + + __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l); + if (!__size_limit_check(buf->buf.width, &l.h) || + !__size_limit_check(buf->buf.height, &l.v)) + return -EINVAL; + + if (swap) { + lv = &l.h; + lh = &l.v; + } + __get_size_limit(limits, num_limits, id, &l); + if (!__size_limit_check(buf->rect.w, lh) || + !__align_check(buf->rect.x, lh->align) || + !__size_limit_check(buf->rect.h, lv) || + !__align_check(buf->rect.y, lv->align)) + return -EINVAL; + + return 0; +} + +static inline bool __scale_limit_check(unsigned int src, unsigned int dst, + unsigned int min, unsigned int max) +{ + if ((max && (dst << 16) > src * max) || + (min && (dst << 16) < src * min)) { + DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n", + src, dst, + min >> 16, 100000 * (min & 0xffff) / (1 << 16), + max >> 16, 100000 * (max & 0xffff) / (1 << 16)); + return false; + } + return true; +} + +static int exynos_drm_ipp_check_scale_limits( + struct drm_exynos_ipp_task_rect *src, + struct drm_exynos_ipp_task_rect *dst, + const struct drm_exynos_ipp_limit *limits, + unsigned int num_limits, bool swap) +{ + const struct drm_exynos_ipp_limit_val *lh, *lv; + int dw, dh; + + for (; num_limits; limits++, num_limits--) + if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) == + DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE) + break; + if (!num_limits) + return 0; + + lh = (!swap) ? &limits->h : &limits->v; + lv = (!swap) ? &limits->v : &limits->h; + dw = (!swap) ? dst->w : dst->h; + dh = (!swap) ? dst->h : dst->w; + + if (!__scale_limit_check(src->w, dw, lh->min, lh->max) || + !__scale_limit_check(src->h, dh, lv->min, lv->max)) + return -EINVAL; + + return 0; +} + +static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task) +{ + struct exynos_drm_ipp *ipp = task->ipp; + const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt; + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + unsigned int rotation = task->transform.rotation; + int ret = 0; + bool swap = drm_rotation_90_or_270(rotation); + bool rotate = (rotation != DRM_MODE_ROTATE_0); + bool scale = false; + + DRM_DEBUG_DRIVER("Checking task %pK\n", task); + + if (src->rect.w == UINT_MAX) + src->rect.w = src->buf.width; + if (src->rect.h == UINT_MAX) + src->rect.h = src->buf.height; + if (dst->rect.w == UINT_MAX) + dst->rect.w = dst->buf.width; + if (dst->rect.h == UINT_MAX) + dst->rect.h = dst->buf.height; + + if (src->rect.x + src->rect.w > (src->buf.width) || + src->rect.y + src->rect.h > (src->buf.height) || + dst->rect.x + dst->rect.w > (dst->buf.width) || + dst->rect.y + dst->rect.h > (dst->buf.height)) { + DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n", + task); + return -EINVAL; + } + + if ((!swap && (src->rect.w != dst->rect.w || + src->rect.h != dst->rect.h)) || + (swap && (src->rect.w != dst->rect.h || + src->rect.h != dst->rect.w))) + scale = true; + + if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) && + (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) || + (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) && + src->buf.fourcc != dst->buf.fourcc)) { + DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task); + return -EINVAL; + } + + src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_SOURCE); + if (!src_fmt) { + DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits, + src_fmt->num_limits, + rotate, false); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + src_fmt->limits, + src_fmt->num_limits, swap); + if (ret) + return ret; + + dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier, + DRM_EXYNOS_IPP_FORMAT_DESTINATION); + if (!dst_fmt) { + DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task); + return -EINVAL; + } + ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits, + dst_fmt->num_limits, + false, swap); + if (ret) + return ret; + ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect, + dst_fmt->limits, + dst_fmt->num_limits, swap); + if (ret) + return ret; + + DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task); + + return ret; +} + +static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task, + struct drm_file *filp) +{ + struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst; + int ret = 0; + + DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task); + + ret = exynos_drm_ipp_task_setup_buffer(src, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task); + return ret; + } + ret = exynos_drm_ipp_task_setup_buffer(dst, filp); + if (ret) { + DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task); + return ret; + } + + DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task); + + return ret; +} + + +static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task, + struct drm_file *file_priv, uint64_t user_data) +{ + struct drm_pending_exynos_ipp_event *e = NULL; + int ret; + + e = kzalloc(sizeof(*e), GFP_KERNEL); + if (!e) + return -ENOMEM; + + e->event.base.type = DRM_EXYNOS_IPP_EVENT; + e->event.base.length = sizeof(e->event); + e->event.user_data = user_data; + + ret = drm_event_reserve_init(task->dev, file_priv, &e->base, + &e->event.base); + if (ret) + goto free; + + task->event = e; + return 0; +free: + kfree(e); + return ret; +} + +static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task) +{ + struct timespec64 now; + + ktime_get_ts64(&now); + task->event->event.tv_sec = now.tv_sec; + task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC; + task->event->event.sequence = atomic_inc_return(&task->ipp->sequence); + + drm_send_event(task->dev, &task->event->base); +} + +static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task) +{ + int ret = task->ret; + + if (ret == 0 && task->event) { + exynos_drm_ipp_event_send(task); + /* ensure event won't be canceled on task free */ + task->event = NULL; + } + + exynos_drm_ipp_task_free(task->ipp, task); + return ret; +} + +static void exynos_drm_ipp_cleanup_work(struct work_struct *work) +{ + struct exynos_drm_ipp_task *task = container_of(work, + struct exynos_drm_ipp_task, cleanup_work); + + exynos_drm_ipp_task_cleanup(task); +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp); + +/** + * exynos_drm_ipp_task_done - finish given task and set return code + * @task: ipp task to finish + * @ret: error code or 0 if operation has been performed successfully + */ +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret) +{ + struct exynos_drm_ipp *ipp = task->ipp; + unsigned long flags; + + DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret); + + spin_lock_irqsave(&ipp->lock, flags); + if (ipp->task == task) + ipp->task = NULL; + task->flags |= DRM_EXYNOS_IPP_TASK_DONE; + task->ret = ret; + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); + wake_up(&ipp->done_wq); + + if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) { + INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work); + schedule_work(&task->cleanup_work); + } +} + +static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp) +{ + struct exynos_drm_ipp_task *task; + unsigned long flags; + int ret; + + DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id); + + spin_lock_irqsave(&ipp->lock, flags); + + if (ipp->task || list_empty(&ipp->todo_list)) { + spin_unlock_irqrestore(&ipp->lock, flags); + return; + } + + task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task, + head); + list_del_init(&task->head); + ipp->task = task; + + spin_unlock_irqrestore(&ipp->lock, flags); + + DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task); + + ret = ipp->funcs->commit(ipp, task); + if (ret) + exynos_drm_ipp_task_done(task, ret); +} + +static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + list_add(&task->head, &ipp->todo_list); + spin_unlock_irqrestore(&ipp->lock, flags); + + exynos_drm_ipp_next_task(ipp); +} + +static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + unsigned long flags; + + spin_lock_irqsave(&ipp->lock, flags); + if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) { + /* already completed task */ + exynos_drm_ipp_task_cleanup(task); + } else if (ipp->task != task) { + /* task has not been scheduled for execution yet */ + list_del_init(&task->head); + exynos_drm_ipp_task_cleanup(task); + } else { + /* + * currently processed task, call abort() and perform + * cleanup with async worker + */ + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + spin_unlock_irqrestore(&ipp->lock, flags); + if (ipp->funcs->abort) + ipp->funcs->abort(ipp, task); + return; + } + spin_unlock_irqrestore(&ipp->lock, flags); +} + +/** + * exynos_drm_ipp_commit_ioctl - perform image processing operation + * @dev: DRM device + * @data: ioctl data + * @file_priv: DRM file info + * + * Construct a ipp task from the set of properties provided from the user + * and try to schedule it to framebuffer processor hardware. + * + * Called by the user via ioctl. + * + * Returns: + * Zero on success, negative errno on failure. + */ +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_commit *arg = data; + struct exynos_drm_ipp *ipp; + struct exynos_drm_ipp_task *task; + int ret = 0; + + if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved) + return -EINVAL; + + /* can't test and expect an event at the same time */ + if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) && + (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT)) + return -EINVAL; + + ipp = __ipp_get(arg->ipp_id); + if (!ipp) + return -ENOENT; + + task = exynos_drm_ipp_task_alloc(ipp); + if (!task) + return -ENOMEM; + + ret = exynos_drm_ipp_task_set(task, arg); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_check(task); + if (ret) + goto free; + + ret = exynos_drm_ipp_task_setup_buffers(task, file_priv); + if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) + goto free; + + if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) { + ret = exynos_drm_ipp_event_create(task, file_priv, + arg->user_data); + if (ret) + goto free; + } + + /* + * Queue task for processing on the hardware. task object will be + * then freed after exynos_drm_ipp_task_done() + */ + if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) { + DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n", + ipp->id, task); + + task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC; + exynos_drm_ipp_schedule_task(task->ipp, task); + ret = 0; + } else { + DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id, + task); + exynos_drm_ipp_schedule_task(ipp, task); + ret = wait_event_interruptible(ipp->done_wq, + task->flags & DRM_EXYNOS_IPP_TASK_DONE); + if (ret) + exynos_drm_ipp_task_abort(ipp, task); + else + ret = exynos_drm_ipp_task_cleanup(task); + } + return ret; +free: + exynos_drm_ipp_task_free(ipp, task); + + return ret; +} diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h new file mode 100644 index 0000000..0b27d4a --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef _EXYNOS_DRM_IPP_H_ +#define _EXYNOS_DRM_IPP_H_ + +#include <drm/drmP.h> + +struct exynos_drm_ipp; +struct exynos_drm_ipp_task; + +/** + * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions + */ +struct exynos_drm_ipp_funcs { + /** + * @commit: + * + * This is the main entry point to start framebuffer processing + * in the hardware. The exynos_drm_ipp_task has been already validated. + * This function must not wait until the device finishes processing. + * When the driver finishes processing, it has to call + * exynos_exynos_drm_ipp_task_done() function. + * + * RETURNS: + * + * 0 on success or negative error codes in case of failure. + */ + int (*commit)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); + + /** + * @abort: + * + * Informs the driver that it has to abort the currently running + * task as soon as possible (i.e. as soon as it can stop the device + * safely), even if the task would not have been finished by then. + * After the driver performs the necessary steps, it has to call + * exynos_drm_ipp_task_done() (as if the task ended normally). + * This function does not have to (and will usually not) wait + * until the device enters a state when it can be stopped. + */ + void (*abort)(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task); +}; + +/** + * struct exynos_drm_ipp - central picture processor module structure + */ +struct exynos_drm_ipp { + struct drm_device *dev; + struct list_head head; + unsigned int id; + + const char *name; + const struct exynos_drm_ipp_funcs *funcs; + unsigned int capabilities; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + atomic_t sequence; + + spinlock_t lock; + struct exynos_drm_ipp_task *task; + struct list_head todo_list; + wait_queue_head_t done_wq; +}; + +struct exynos_drm_ipp_buffer { + struct drm_exynos_ipp_task_buffer buf; + struct drm_exynos_ipp_task_rect rect; + + struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER]; + const struct drm_format_info *format; + dma_addr_t dma_addr[MAX_FB_BUFFER]; +}; + +/** + * struct exynos_drm_ipp_task - a structure describing transformation that + * has to be performed by the picture processor hardware module + */ +struct exynos_drm_ipp_task { + struct drm_device *dev; + struct exynos_drm_ipp *ipp; + struct list_head head; + + struct exynos_drm_ipp_buffer src; + struct exynos_drm_ipp_buffer dst; + + struct drm_exynos_ipp_task_transform transform; + struct drm_exynos_ipp_task_alpha alpha; + + struct work_struct cleanup_work; + unsigned int flags; + int ret; + + struct drm_pending_exynos_ipp_event *event; +}; + +#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0) +#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1) + +struct exynos_drm_ipp_formats { + uint32_t fourcc; + uint32_t type; + uint64_t modifier; + const struct drm_exynos_ipp_limit *limits; + unsigned int num_limits; +}; + +/* helper macros to set exynos_drm_ipp_formats structure and limits*/ +#define IPP_SRCDST_MFORMAT(f, m, l) \ + .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \ + .num_limits = ARRAY_SIZE(l), \ + .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \ + DRM_EXYNOS_IPP_FORMAT_DESTINATION) + +#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l) + +#define IPP_SIZE_LIMIT(l, val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \ + DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val + +#define IPP_SCALE_LIMIT(val...) \ + .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val + +int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp, + const struct exynos_drm_ipp_funcs *funcs, unsigned int caps, + const struct exynos_drm_ipp_formats *formats, + unsigned int num_formats, const char *name); +void exynos_drm_ipp_unregister(struct drm_device *dev, + struct exynos_drm_ipp *ipp); + +void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret); + +#ifdef CONFIG_DRM_EXYNOS_IPP +int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv); +#else +static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + struct drm_exynos_ioctl_ipp_get_res *resp = data; + + resp->count_ipps = 0; + return 0; +} +static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, + void *data, struct drm_file *file_priv) +{ + return -ENODEV; +} +#endif +#endif diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c index 79282a8..1a76dd3 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c +++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c @@ -10,6 +10,7 @@ */ #include <linux/kernel.h> +#include <linux/component.h> #include <linux/err.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -22,29 +23,18 @@ #include <drm/exynos_drm.h> #include "regs-rotator.h" #include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" #include "exynos_drm_ipp.h" /* * Rotator supports image crop/rotator and input/output DMA operations. * input DMA reads image data from the memory. * output DMA writes image data to memory. - * - * M2M operation : supports crop/scale/rotation/csc so on. - * Memory ----> Rotator H/W ----> Memory. */ -/* - * TODO - * 1. check suspend/resume api if needed. - * 2. need to check use case platform_device_id. - * 3. check src/dst size with, height. - * 4. need to add supported list in prop_list. - */ +#define ROTATOR_AUTOSUSPEND_DELAY 2000 -#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev)) -#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\ - struct rot_context, ippdrv); -#define rot_read(offset) readl(rot->regs + (offset)) +#define rot_read(offset) readl(rot->regs + (offset)) #define rot_write(cfg, offset) writel(cfg, rot->regs + (offset)) enum rot_irq_status { @@ -52,54 +42,28 @@ enum rot_irq_status { ROT_IRQ_STATUS_ILLEGAL = 9, }; -/* - * A structure of limitation. - * - * @min_w: minimum width. - * @min_h: minimum height. - * @max_w: maximum width. - * @max_h: maximum height. - * @align: align size. - */ -struct rot_limit { - u32 min_w; - u32 min_h; - u32 max_w; - u32 max_h; - u32 align; -}; - -/* - * A structure of limitation table. - * - * @ycbcr420_2p: case of YUV. - * @rgb888: case of RGB. - */ -struct rot_limit_table { - struct rot_limit ycbcr420_2p; - struct rot_limit rgb888; +struct rot_variant { + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; }; /* * A structure of rotator context. * @ippdrv: prepare initialization using ippdrv. - * @regs_res: register resources. * @regs: memory mapped io registers. * @clock: rotator gate clock. * @limit_tbl: limitation of rotator. * @irq: irq number. - * @cur_buf_id: current operation buffer id. - * @suspended: suspended state. */ struct rot_context { - struct exynos_drm_ippdrv ippdrv; - struct resource *regs_res; + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; void __iomem *regs; struct clk *clock; - struct rot_limit_table *limit_tbl; - int irq; - int cur_buf_id[EXYNOS_DRM_OPS_MAX]; - bool suspended; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; + struct exynos_drm_ipp_task *task; }; static void rotator_reg_set_irq(struct rot_context *rot, bool enable) @@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable) rot_write(val, ROT_CONFIG); } -static u32 rotator_reg_get_fmt(struct rot_context *rot) -{ - u32 val = rot_read(ROT_CONTROL); - - val &= ROT_CONTROL_FMT_MASK; - - return val; -} - static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) { u32 val = rot_read(ROT_STATUS); @@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot) static irqreturn_t rotator_irq_handler(int irq, void *arg) { struct rot_context *rot = arg; - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node; - struct drm_exynos_ipp_event_work *event_work = c_node->event_work; enum rot_irq_status irq_status; u32 val; @@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg) val |= ROT_STATUS_IRQ_PENDING((u32)irq_status); rot_write(val, ROT_STATUS); - if (irq_status == ROT_IRQ_STATUS_COMPLETE) { - event_work->ippdrv = ippdrv; - event_work->buf_id[EXYNOS_DRM_OPS_DST] = - rot->cur_buf_id[EXYNOS_DRM_OPS_DST]; - queue_work(ippdrv->event_workq, &event_work->work); - } else { - DRM_ERROR("the SFR is set illegally\n"); + if (rot->task) { + struct exynos_drm_ipp_task *task = rot->task; + + rot->task = NULL; + pm_runtime_mark_last_busy(rot->dev); + pm_runtime_put_autosuspend(rot->dev); + exynos_drm_ipp_task_done(task, + irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL); } return IRQ_HANDLED; } -static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize, - u32 *vsize) +static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt) { - struct rot_limit_table *limit_tbl = rot->limit_tbl; - struct rot_limit *limit; - u32 mask, val; - - /* Get size limit */ - if (fmt == ROT_CONTROL_FMT_RGB888) - limit = &limit_tbl->rgb888; - else - limit = &limit_tbl->ycbcr420_2p; - - /* Get mask for rounding to nearest aligned val */ - mask = ~((1 << limit->align) - 1); - - /* Set aligned width */ - val = ROT_ALIGN(*hsize, limit->align, mask); - if (val < limit->min_w) - *hsize = ROT_MIN(limit->min_w, mask); - else if (val > limit->max_w) - *hsize = ROT_MAX(limit->max_w, mask); - else - *hsize = val; - - /* Set aligned height */ - val = ROT_ALIGN(*vsize, limit->align, mask); - if (val < limit->min_h) - *vsize = ROT_MIN(limit->min_h, mask); - else if (val > limit->max_h) - *vsize = ROT_MAX(limit->max_h, mask); - else - *vsize = val; -} - -static int rotator_src_set_fmt(struct device *dev, u32 fmt) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; val = rot_read(ROT_CONTROL); @@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt) case DRM_FORMAT_XRGB8888: val |= ROT_CONTROL_FMT_RGB888; break; - default: - DRM_ERROR("invalid image format\n"); - return -EINVAL; } rot_write(val, ROT_CONTROL); - - return 0; } -static inline bool rotator_check_reg_fmt(u32 fmt) +static void rotator_src_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) || - (fmt == ROT_CONTROL_FMT_RGB888)) - return true; - - return false; -} - -static int rotator_src_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) -{ - struct rot_context *rot = dev_get_drvdata(dev); - u32 fmt, hsize, vsize; u32 val; - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); - /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_SRC_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_SRC_CROP_POS); - val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w); + val = ROT_SRC_CROP_SIZE_H(buf->rect.h) | + ROT_SRC_CROP_SIZE_W(buf->rect.w); rot_write(val, ROT_SRC_CROP_SIZE); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1)); } -static int rotator_src_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_dst_set_transf(struct rot_context *rot, + unsigned int rotation) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - - val = rot_read(ROT_SRC_BUF_SIZE); - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_SRC_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_SRC_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } - - return 0; -} - -static int rotator_dst_set_transf(struct device *dev, - enum drm_exynos_degree degree, - enum drm_exynos_flip flip, bool *swap) -{ - struct rot_context *rot = dev_get_drvdata(dev); u32 val; /* Set transform configuration */ val = rot_read(ROT_CONTROL); val &= ~ROT_CONTROL_FLIP_MASK; - switch (flip) { - case EXYNOS_DRM_FLIP_VERTICAL: - val |= ROT_CONTROL_FLIP_VERTICAL; - break; - case EXYNOS_DRM_FLIP_HORIZONTAL: + if (rotation & DRM_MODE_REFLECT_X) val |= ROT_CONTROL_FLIP_HORIZONTAL; - break; - default: - /* Flip None */ - break; - } + if (rotation & DRM_MODE_REFLECT_Y) + val |= ROT_CONTROL_FLIP_VERTICAL; val &= ~ROT_CONTROL_ROT_MASK; - switch (degree) { - case EXYNOS_DRM_DEGREE_90: + if (rotation & DRM_MODE_ROTATE_90) val |= ROT_CONTROL_ROT_90; - break; - case EXYNOS_DRM_DEGREE_180: + else if (rotation & DRM_MODE_ROTATE_180) val |= ROT_CONTROL_ROT_180; - break; - case EXYNOS_DRM_DEGREE_270: + else if (rotation & DRM_MODE_ROTATE_270) val |= ROT_CONTROL_ROT_270; - break; - default: - /* Rotation 0 Degree */ - break; - } rot_write(val, ROT_CONTROL); - - /* Check degree for setting buffer size swap */ - if ((degree == EXYNOS_DRM_DEGREE_90) || - (degree == EXYNOS_DRM_DEGREE_270)) - *swap = true; - else - *swap = false; - - return 0; } -static int rotator_dst_set_size(struct device *dev, int swap, - struct drm_exynos_pos *pos, - struct drm_exynos_sz *sz) +static void rotator_dst_set_buf(struct rot_context *rot, + struct exynos_drm_ipp_buffer *buf) { - struct rot_context *rot = dev_get_drvdata(dev); - u32 val, fmt, hsize, vsize; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Align buffer size */ - hsize = sz->hsize; - vsize = sz->vsize; - rotator_align_size(rot, fmt, &hsize, &vsize); + u32 val; /* Set buffer size configuration */ - val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize); + val = ROT_SET_BUF_SIZE_H(buf->buf.height) | + ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]); rot_write(val, ROT_DST_BUF_SIZE); /* Set crop image position configuration */ - val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x); + val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x); rot_write(val, ROT_DST_CROP_POS); - return 0; + /* Set buffer DMA address */ + rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0)); + rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1)); } -static int rotator_dst_set_addr(struct device *dev, - struct drm_exynos_ipp_buf_info *buf_info, - u32 buf_id, enum drm_exynos_ipp_buf_type buf_type) +static void rotator_start(struct rot_context *rot) { - struct rot_context *rot = dev_get_drvdata(dev); - dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX]; - u32 val, fmt, hsize, vsize; - int i; - - /* Set current buf_id */ - rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id; - - switch (buf_type) { - case IPP_BUF_ENQUEUE: - /* Set address configuration */ - for_each_ipp_planar(i) - addr[i] = buf_info->base[i]; - - /* Get format */ - fmt = rotator_reg_get_fmt(rot); - if (!rotator_check_reg_fmt(fmt)) { - DRM_ERROR("invalid format.\n"); - return -EINVAL; - } - - /* Re-set cb planar for NV12 format */ - if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) && - !addr[EXYNOS_DRM_PLANAR_CB]) { - /* Get buf size */ - val = rot_read(ROT_DST_BUF_SIZE); - - hsize = ROT_GET_BUF_SIZE_W(val); - vsize = ROT_GET_BUF_SIZE_H(val); - - /* Set cb planar */ - addr[EXYNOS_DRM_PLANAR_CB] = - addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize; - } - - for_each_ipp_planar(i) - rot_write(addr[i], ROT_DST_BUF_ADDR(i)); - break; - case IPP_BUF_DEQUEUE: - for_each_ipp_planar(i) - rot_write(0x0, ROT_DST_BUF_ADDR(i)); - break; - default: - /* Nothing to do */ - break; - } + u32 val; - return 0; + /* Set interrupt enable */ + rotator_reg_set_irq(rot, true); + + val = rot_read(ROT_CONTROL); + val |= ROT_CONTROL_START; + rot_write(val, ROT_CONTROL); } -static struct exynos_drm_ipp_ops rot_src_ops = { - .set_fmt = rotator_src_set_fmt, - .set_size = rotator_src_set_size, - .set_addr = rotator_src_set_addr, -}; +static int rotator_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct rot_context *rot = + container_of(ipp, struct rot_context, ipp); -static struct exynos_drm_ipp_ops rot_dst_ops = { - .set_transf = rotator_dst_set_transf, - .set_size = rotator_dst_set_size, - .set_addr = rotator_dst_set_addr, -}; + pm_runtime_get_sync(rot->dev); + rot->task = task; -static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv) -{ - struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list; - - prop_list->version = 1; - prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) | - (1 << EXYNOS_DRM_FLIP_HORIZONTAL); - prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) | - (1 << EXYNOS_DRM_DEGREE_90) | - (1 << EXYNOS_DRM_DEGREE_180) | - (1 << EXYNOS_DRM_DEGREE_270); - prop_list->csc = 0; - prop_list->crop = 0; - prop_list->scale = 0; + rotator_src_set_fmt(rot, task->src.buf.fourcc); + rotator_src_set_buf(rot, &task->src); + rotator_dst_set_transf(rot, task->transform.rotation); + rotator_dst_set_buf(rot, &task->dst); + rotator_start(rot); return 0; } -static inline bool rotator_check_drm_fmt(u32 fmt) -{ - switch (fmt) { - case DRM_FORMAT_XRGB8888: - case DRM_FORMAT_NV12: - return true; - default: - DRM_DEBUG_KMS("not support format\n"); - return false; - } -} - -static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip) -{ - switch (flip) { - case EXYNOS_DRM_FLIP_NONE: - case EXYNOS_DRM_FLIP_VERTICAL: - case EXYNOS_DRM_FLIP_HORIZONTAL: - case EXYNOS_DRM_FLIP_BOTH: - return true; - default: - DRM_DEBUG_KMS("invalid flip\n"); - return false; - } -} +static const struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = rotator_commit, +}; -static int rotator_ippdrv_check_property(struct device *dev, - struct drm_exynos_ipp_property *property) +static int rotator_bind(struct device *dev, struct device *master, void *data) { - struct drm_exynos_ipp_config *src_config = - &property->config[EXYNOS_DRM_OPS_SRC]; - struct drm_exynos_ipp_config *dst_config = - &property->config[EXYNOS_DRM_OPS_DST]; - struct drm_exynos_pos *src_pos = &src_config->pos; - struct drm_exynos_pos *dst_pos = &dst_config->pos; - struct drm_exynos_sz *src_sz = &src_config->sz; - struct drm_exynos_sz *dst_sz = &dst_config->sz; - bool swap = false; - - /* Check format configuration */ - if (src_config->fmt != dst_config->fmt) { - DRM_DEBUG_KMS("not support csc feature\n"); - return -EINVAL; - } - - if (!rotator_check_drm_fmt(dst_config->fmt)) { - DRM_DEBUG_KMS("invalid format\n"); - return -EINVAL; - } - - /* Check transform configuration */ - if (src_config->degree != EXYNOS_DRM_DEGREE_0) { - DRM_DEBUG_KMS("not support source-side rotation\n"); - return -EINVAL; - } - - switch (dst_config->degree) { - case EXYNOS_DRM_DEGREE_90: - case EXYNOS_DRM_DEGREE_270: - swap = true; - case EXYNOS_DRM_DEGREE_0: - case EXYNOS_DRM_DEGREE_180: - /* No problem */ - break; - default: - DRM_DEBUG_KMS("invalid degree\n"); - return -EINVAL; - } - - if (src_config->flip != EXYNOS_DRM_FLIP_NONE) { - DRM_DEBUG_KMS("not support source-side flip\n"); - return -EINVAL; - } + struct rot_context *rot = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - if (!rotator_check_drm_flip(dst_config->flip)) { - DRM_DEBUG_KMS("invalid flip\n"); - return -EINVAL; - } + rot->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); - /* Check size configuration */ - if ((src_pos->x + src_pos->w > src_sz->hsize) || - (src_pos->y + src_pos->h > src_sz->vsize)) { - DRM_DEBUG_KMS("out of source buffer bound\n"); - return -EINVAL; - } + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE, + rot->formats, rot->num_formats, "rotator"); - if (swap) { - if ((dst_pos->x + dst_pos->h > dst_sz->vsize) || - (dst_pos->y + dst_pos->w > dst_sz->hsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } else { - if ((dst_pos->x + dst_pos->w > dst_sz->hsize) || - (dst_pos->y + dst_pos->h > dst_sz->vsize)) { - DRM_DEBUG_KMS("out of destination buffer bound\n"); - return -EINVAL; - } - - if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) { - DRM_DEBUG_KMS("not support scale feature\n"); - return -EINVAL; - } - } + dev_info(dev, "The exynos rotator has been probed successfully\n"); return 0; } -static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd) +static void rotator_unbind(struct device *dev, struct device *master, + void *data) { struct rot_context *rot = dev_get_drvdata(dev); - u32 val; - - if (rot->suspended) { - DRM_ERROR("suspended state\n"); - return -EPERM; - } - - if (cmd != IPP_CMD_M2M) { - DRM_ERROR("not support cmd: %d\n", cmd); - return -EINVAL; - } - - /* Set interrupt enable */ - rotator_reg_set_irq(rot, true); - - val = rot_read(ROT_CONTROL); - val |= ROT_CONTROL_START; - - rot_write(val, ROT_CONTROL); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &rot->ipp; - return 0; + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(rot->drm_dev, rot->dev); } -static struct rot_limit_table rot_limit_tbl_4210 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_64K, - .max_h = SZ_64K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_16K, - .max_h = SZ_16K, - .align = 2, - }, -}; - -static struct rot_limit_table rot_limit_tbl_4x12 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 2, - }, +static const struct component_ops rotator_component_ops = { + .bind = rotator_bind, + .unbind = rotator_unbind, }; -static struct rot_limit_table rot_limit_tbl_5250 = { - .ycbcr420_2p = { - .min_w = 32, - .min_h = 32, - .max_w = SZ_32K, - .max_h = SZ_32K, - .align = 3, - }, - .rgb888 = { - .min_w = 8, - .min_h = 8, - .max_w = SZ_8K, - .max_h = SZ_8K, - .align = 1, - }, -}; - -static const struct of_device_id exynos_rotator_match[] = { - { - .compatible = "samsung,exynos4210-rotator", - .data = &rot_limit_tbl_4210, - }, - { - .compatible = "samsung,exynos4212-rotator", - .data = &rot_limit_tbl_4x12, - }, - { - .compatible = "samsung,exynos5250-rotator", - .data = &rot_limit_tbl_5250, - }, - {}, -}; -MODULE_DEVICE_TABLE(of, exynos_rotator_match); - static int rotator_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; + struct resource *regs_res; struct rot_context *rot; - struct exynos_drm_ippdrv *ippdrv; + const struct rot_variant *variant; + int irq; int ret; - if (!dev->of_node) { - dev_err(dev, "cannot find of_node.\n"); - return -ENODEV; - } - rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL); if (!rot) return -ENOMEM; - rot->limit_tbl = (struct rot_limit_table *) - of_device_get_match_data(dev); - rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - rot->regs = devm_ioremap_resource(dev, rot->regs_res); + variant = of_device_get_match_data(dev); + rot->formats = variant->formats; + rot->num_formats = variant->num_formats; + rot->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + rot->regs = devm_ioremap_resource(dev, regs_res); if (IS_ERR(rot->regs)) return PTR_ERR(rot->regs); - rot->irq = platform_get_irq(pdev, 0); - if (rot->irq < 0) { + irq = platform_get_irq(pdev, 0); + if (irq < 0) { dev_err(dev, "failed to get irq\n"); - return rot->irq; + return irq; } - ret = devm_request_threaded_irq(dev, rot->irq, NULL, - rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot); + ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev), + rot); if (ret < 0) { dev_err(dev, "failed to request irq\n"); return ret; @@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev) return PTR_ERR(rot->clock); } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY); pm_runtime_enable(dev); - - ippdrv = &rot->ippdrv; - ippdrv->dev = dev; - ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops; - ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops; - ippdrv->check_property = rotator_ippdrv_check_property; - ippdrv->start = rotator_ippdrv_start; - ret = rotator_init_prop_list(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to init property list.\n"); - goto err_ippdrv_register; - } - - DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv); - platform_set_drvdata(pdev, rot); - ret = exynos_drm_ippdrv_register(ippdrv); - if (ret < 0) { - dev_err(dev, "failed to register drm rotator device\n"); - goto err_ippdrv_register; - } - - dev_info(dev, "The exynos rotator is probed successfully\n"); + ret = component_add(dev, &rotator_component_ops); + if (ret) + goto err_component; return 0; -err_ippdrv_register: +err_component: + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return ret; } @@ -770,45 +332,101 @@ err_ippdrv_register: static int rotator_remove(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rot_context *rot = dev_get_drvdata(dev); - struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv; - - exynos_drm_ippdrv_unregister(ippdrv); + component_del(dev, &rotator_component_ops); + pm_runtime_dont_use_autosuspend(dev); pm_runtime_disable(dev); return 0; } #ifdef CONFIG_PM -static int rotator_clk_crtl(struct rot_context *rot, bool enable) -{ - if (enable) { - clk_prepare_enable(rot->clock); - rot->suspended = false; - } else { - clk_disable_unprepare(rot->clock); - rot->suspended = true; - } - - return 0; -} - static int rotator_runtime_suspend(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, false); + clk_disable_unprepare(rot->clock); + return 0; } static int rotator_runtime_resume(struct device *dev) { struct rot_context *rot = dev_get_drvdata(dev); - return rotator_clk_crtl(rot, true); + return clk_prepare_enable(rot->clock); } #endif +static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) }, +}; + +static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4210_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_4412_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct exynos_drm_ipp_formats rotator_5250_formats[] = { + { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) }, + { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) }, +}; + +static const struct rot_variant rotator_4210_data = { + .formats = rotator_4210_formats, + .num_formats = ARRAY_SIZE(rotator_4210_formats), +}; + +static const struct rot_variant rotator_4412_data = { + .formats = rotator_4412_formats, + .num_formats = ARRAY_SIZE(rotator_4412_formats), +}; + +static const struct rot_variant rotator_5250_data = { + .formats = rotator_5250_formats, + .num_formats = ARRAY_SIZE(rotator_5250_formats), +}; + +static const struct of_device_id exynos_rotator_match[] = { + { + .compatible = "samsung,exynos4210-rotator", + .data = &rotator_4210_data, + }, { + .compatible = "samsung,exynos4212-rotator", + .data = &rotator_4412_data, + }, { + .compatible = "samsung,exynos5250-rotator", + .data = &rotator_5250_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_rotator_match); + static const struct dev_pm_ops rotator_pm_ops = { SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) @@ -820,7 +438,7 @@ struct platform_driver rotator_driver = { .probe = rotator_probe, .remove = rotator_remove, .driver = { - .name = "exynos-rot", + .name = "exynos-rotator", .owner = THIS_MODULE, .pm = &rotator_pm_ops, .of_match_table = exynos_rotator_match, diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c new file mode 100644 index 0000000..63b05b7 --- /dev/null +++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c @@ -0,0 +1,694 @@ +/* + * Copyright (C) 2017 Samsung Electronics Co.Ltd + * Author: + * Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundationr + */ + +#include <linux/kernel.h> +#include <linux/component.h> +#include <linux/err.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/platform_device.h> +#include <linux/clk.h> +#include <linux/of_device.h> +#include <linux/pm_runtime.h> + +#include <drm/drmP.h> +#include <drm/exynos_drm.h> +#include "regs-scaler.h" +#include "exynos_drm_fb.h" +#include "exynos_drm_drv.h" +#include "exynos_drm_iommu.h" +#include "exynos_drm_ipp.h" + +#define scaler_read(offset) readl(scaler->regs + (offset)) +#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset)) +#define SCALER_MAX_CLK 4 +#define SCALER_AUTOSUSPEND_DELAY 2000 + +struct scaler_data { + const char *clk_name[SCALER_MAX_CLK]; + unsigned int num_clk; + const struct exynos_drm_ipp_formats *formats; + unsigned int num_formats; +}; + +struct scaler_context { + struct exynos_drm_ipp ipp; + struct drm_device *drm_dev; + struct device *dev; + void __iomem *regs; + struct clk *clock[SCALER_MAX_CLK]; + struct exynos_drm_ipp_task *task; + const struct scaler_data *scaler_data; +}; + +static u32 scaler_get_format(u32 drm_fmt) +{ + switch (drm_fmt) { + case DRM_FORMAT_NV21: + return SCALER_YUV420_2P_UV; + case DRM_FORMAT_NV12: + return SCALER_YUV420_2P_VU; + case DRM_FORMAT_YUV420: + return SCALER_YUV420_3P; + case DRM_FORMAT_YUYV: + return SCALER_YUV422_1P_YUYV; + case DRM_FORMAT_UYVY: + return SCALER_YUV422_1P_UYVY; + case DRM_FORMAT_YVYU: + return SCALER_YUV422_1P_YVYU; + case DRM_FORMAT_NV61: + return SCALER_YUV422_2P_UV; + case DRM_FORMAT_NV16: + return SCALER_YUV422_2P_VU; + case DRM_FORMAT_YUV422: + return SCALER_YUV422_3P; + case DRM_FORMAT_NV42: + return SCALER_YUV444_2P_UV; + case DRM_FORMAT_NV24: + return SCALER_YUV444_2P_VU; + case DRM_FORMAT_YUV444: + return SCALER_YUV444_3P; + case DRM_FORMAT_RGB565: + return SCALER_RGB_565; + case DRM_FORMAT_XRGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_ARGB1555: + return SCALER_ARGB1555; + case DRM_FORMAT_XRGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_ARGB4444: + return SCALER_ARGB4444; + case DRM_FORMAT_XRGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_ARGB8888: + return SCALER_ARGB8888; + case DRM_FORMAT_RGBX8888: + return SCALER_RGBA8888; + case DRM_FORMAT_RGBA8888: + return SCALER_RGBA8888; + default: + break; + } + + return 0; +} + +static inline void scaler_enable_int(struct scaler_context *scaler) +{ + u32 val; + + val = SCALER_INT_EN_TIMEOUT | + SCALER_INT_EN_ILLEGAL_BLEND | + SCALER_INT_EN_ILLEGAL_RATIO | + SCALER_INT_EN_ILLEGAL_DST_HEIGHT | + SCALER_INT_EN_ILLEGAL_DST_WIDTH | + SCALER_INT_EN_ILLEGAL_DST_V_POS | + SCALER_INT_EN_ILLEGAL_DST_H_POS | + SCALER_INT_EN_ILLEGAL_DST_C_SPAN | + SCALER_INT_EN_ILLEGAL_DST_Y_SPAN | + SCALER_INT_EN_ILLEGAL_DST_CR_BASE | + SCALER_INT_EN_ILLEGAL_DST_CB_BASE | + SCALER_INT_EN_ILLEGAL_DST_Y_BASE | + SCALER_INT_EN_ILLEGAL_DST_COLOR | + SCALER_INT_EN_ILLEGAL_SRC_HEIGHT | + SCALER_INT_EN_ILLEGAL_SRC_WIDTH | + SCALER_INT_EN_ILLEGAL_SRC_CV_POS | + SCALER_INT_EN_ILLEGAL_SRC_CH_POS | + SCALER_INT_EN_ILLEGAL_SRC_YV_POS | + SCALER_INT_EN_ILLEGAL_SRC_YH_POS | + SCALER_INT_EN_ILLEGAL_DST_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN | + SCALER_INT_EN_ILLEGAL_SRC_CR_BASE | + SCALER_INT_EN_ILLEGAL_SRC_CB_BASE | + SCALER_INT_EN_ILLEGAL_SRC_Y_BASE | + SCALER_INT_EN_ILLEGAL_SRC_COLOR | + SCALER_INT_EN_FRAME_END; + scaler_write(val, SCALER_INT_EN); +} + +static inline void scaler_set_src_fmt(struct scaler_context *scaler, + u32 src_fmt) +{ + u32 val; + + val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt); + scaler_write(val, SCALER_SRC_CFG); +} + +static inline void scaler_set_src_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + static unsigned int bases[] = { + SCALER_SRC_Y_BASE, + SCALER_SRC_CB_BASE, + SCALER_SRC_CR_BASE, + }; + int i; + + for (i = 0; i < src_buf->format->num_planes; ++i) + scaler_write(src_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_src_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *src_buf) +{ + u32 val; + + val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] / + src_buf->format->cpp[0]); + + if (src_buf->format->num_planes > 1) + val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]); + + scaler_write(val, SCALER_SRC_SPAN); +} + +static inline void scaler_set_src_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2); + val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2); + scaler_write(val, SCALER_SRC_Y_POS); + scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */ +} + +static inline void scaler_set_src_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *src_pos) +{ + u32 val; + + val = SCALER_SRC_WH_SET_WIDTH(src_pos->w); + val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h); + scaler_write(val, SCALER_SRC_WH); +} + +static inline void scaler_set_dst_fmt(struct scaler_context *scaler, + u32 dst_fmt) +{ + u32 val; + + val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt); + scaler_write(val, SCALER_DST_CFG); +} + +static inline void scaler_set_dst_base(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + static unsigned int bases[] = { + SCALER_DST_Y_BASE, + SCALER_DST_CB_BASE, + SCALER_DST_CR_BASE, + }; + int i; + + for (i = 0; i < dst_buf->format->num_planes; ++i) + scaler_write(dst_buf->dma_addr[i], bases[i]); +} + +static inline void scaler_set_dst_span(struct scaler_context *scaler, + struct exynos_drm_ipp_buffer *dst_buf) +{ + u32 val; + + val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] / + dst_buf->format->cpp[0]); + + if (dst_buf->format->num_planes > 1) + val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]); + + scaler_write(val, SCALER_DST_SPAN); +} + +static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_WH_SET_WIDTH(dst_pos->w); + val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h); + scaler_write(val, SCALER_DST_WH); +} + +static inline void scaler_set_dst_wh(struct scaler_context *scaler, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val; + + val = SCALER_DST_POS_SET_H_POS(dst_pos->x); + val |= SCALER_DST_POS_SET_V_POS(dst_pos->y); + scaler_write(val, SCALER_DST_POS); +} + +static inline void scaler_set_hv_ratio(struct scaler_context *scaler, + unsigned int rotation, + struct drm_exynos_ipp_task_rect *src_pos, + struct drm_exynos_ipp_task_rect *dst_pos) +{ + u32 val, h_ratio, v_ratio; + + if (drm_rotation_90_or_270(rotation)) { + h_ratio = (src_pos->h << 16) / dst_pos->w; + v_ratio = (src_pos->w << 16) / dst_pos->h; + } else { + h_ratio = (src_pos->w << 16) / dst_pos->w; + v_ratio = (src_pos->h << 16) / dst_pos->h; + } + + val = SCALER_H_RATIO_SET(h_ratio); + scaler_write(val, SCALER_H_RATIO); + + val = SCALER_V_RATIO_SET(v_ratio); + scaler_write(val, SCALER_V_RATIO); +} + +static inline void scaler_set_rotation(struct scaler_context *scaler, + unsigned int rotation) +{ + u32 val = 0; + + if (rotation & DRM_MODE_ROTATE_90) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90); + else if (rotation & DRM_MODE_ROTATE_180) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180); + else if (rotation & DRM_MODE_ROTATE_270) + val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270); + if (rotation & DRM_MODE_REFLECT_X) + val |= SCALER_ROT_CFG_FLIP_X_EN; + if (rotation & DRM_MODE_REFLECT_Y) + val |= SCALER_ROT_CFG_FLIP_Y_EN; + scaler_write(val, SCALER_ROT_CFG); +} + +static inline void scaler_set_csc(struct scaler_context *scaler, + const struct drm_format_info *fmt) +{ + static const u32 csc_mtx[2][3][3] = { + { /* YCbCr to RGB */ + {0x254, 0x000, 0x331}, + {0x254, 0xf38, 0xe60}, + {0x254, 0x409, 0x000}, + }, + { /* RGB to YCbCr */ + {0x084, 0x102, 0x032}, + {0xfb4, 0xf6b, 0x0e1}, + {0x0e1, 0xf44, 0xfdc}, + }, + }; + int i, j, dir; + + switch (fmt->format) { + case DRM_FORMAT_RGB565: + case DRM_FORMAT_XRGB1555: + case DRM_FORMAT_ARGB1555: + case DRM_FORMAT_XRGB4444: + case DRM_FORMAT_ARGB4444: + case DRM_FORMAT_XRGB8888: + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_RGBA8888: + dir = 1; + break; + default: + dir = 0; + } + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i)); +} + +static inline void scaler_set_timer(struct scaler_context *scaler, + unsigned int timer, unsigned int divider) +{ + u32 val; + + val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE; + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer); + val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider); + scaler_write(val, SCALER_TIMEOUT_CTRL); +} + +static inline void scaler_start_hw(struct scaler_context *scaler) +{ + scaler_write(SCALER_CFG_START_CMD, SCALER_CFG); +} + +static int scaler_commit(struct exynos_drm_ipp *ipp, + struct exynos_drm_ipp_task *task) +{ + struct scaler_context *scaler = + container_of(ipp, struct scaler_context, ipp); + + u32 src_fmt = scaler_get_format(task->src.buf.fourcc); + struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect; + + u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc); + struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect; + + scaler->task = task; + + pm_runtime_get_sync(scaler->dev); + + scaler_set_src_fmt(scaler, src_fmt); + scaler_set_src_base(scaler, &task->src); + scaler_set_src_span(scaler, &task->src); + scaler_set_src_luma_pos(scaler, src_pos); + scaler_set_src_wh(scaler, src_pos); + + scaler_set_dst_fmt(scaler, dst_fmt); + scaler_set_dst_base(scaler, &task->dst); + scaler_set_dst_span(scaler, &task->dst); + scaler_set_dst_luma_pos(scaler, dst_pos); + scaler_set_dst_wh(scaler, dst_pos); + + scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos); + scaler_set_rotation(scaler, task->transform.rotation); + + scaler_set_csc(scaler, task->src.format); + + scaler_set_timer(scaler, 0xffff, 0xf); + + scaler_enable_int(scaler); + scaler_start_hw(scaler); + + return 0; +} + +static struct exynos_drm_ipp_funcs ipp_funcs = { + .commit = scaler_commit, +}; + +static inline void scaler_disable_int(struct scaler_context *scaler) +{ + scaler_write(0, SCALER_INT_EN); +} + +static inline u32 scaler_get_int_status(struct scaler_context *scaler) +{ + return scaler_read(SCALER_INT_STATUS); +} + +static inline bool scaler_task_done(u32 val) +{ + return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL; +} + +static irqreturn_t scaler_irq_handler(int irq, void *arg) +{ + struct scaler_context *scaler = arg; + + u32 val = scaler_get_int_status(scaler); + + scaler_disable_int(scaler); + + if (scaler->task) { + struct exynos_drm_ipp_task *task = scaler->task; + + scaler->task = NULL; + pm_runtime_mark_last_busy(scaler->dev); + pm_runtime_put_autosuspend(scaler->dev); + exynos_drm_ipp_task_done(task, scaler_task_done(val)); + } + + return IRQ_HANDLED; +} + +static int scaler_bind(struct device *dev, struct device *master, void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + scaler->drm_dev = drm_dev; + drm_iommu_attach_device(drm_dev, dev); + + exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs, + DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE | + DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT, + scaler->scaler_data->formats, + scaler->scaler_data->num_formats, "scaler"); + + dev_info(dev, "The exynos scaler has been probed successfully\n"); + + return 0; +} + +static void scaler_unbind(struct device *dev, struct device *master, + void *data) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + struct drm_device *drm_dev = data; + struct exynos_drm_ipp *ipp = &scaler->ipp; + + exynos_drm_ipp_unregister(drm_dev, ipp); + drm_iommu_detach_device(scaler->drm_dev, scaler->dev); +} + +static const struct component_ops scaler_component_ops = { + .bind = scaler_bind, + .unbind = scaler_unbind, +}; + +static int scaler_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *regs_res; + struct scaler_context *scaler; + int irq; + int ret, i; + + scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL); + if (!scaler) + return -ENOMEM; + + scaler->scaler_data = + (struct scaler_data *)of_device_get_match_data(dev); + + scaler->dev = dev; + regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + scaler->regs = devm_ioremap_resource(dev, regs_res); + if (IS_ERR(scaler->regs)) + return PTR_ERR(scaler->regs); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "failed to get irq\n"); + return irq; + } + + ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler, + IRQF_ONESHOT, "drm_scaler", scaler); + if (ret < 0) { + dev_err(dev, "failed to request irq\n"); + return ret; + } + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) { + scaler->clock[i] = devm_clk_get(dev, + scaler->scaler_data->clk_name[i]); + if (IS_ERR(scaler->clock[i])) { + dev_err(dev, "failed to get clock\n"); + return PTR_ERR(scaler->clock[i]); + } + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); + platform_set_drvdata(pdev, scaler); + + ret = component_add(dev, &scaler_component_ops); + if (ret) + goto err_ippdrv_register; + + return 0; + +err_ippdrv_register: + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + return ret; +} + +static int scaler_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + + component_del(dev, &scaler_component_ops); + pm_runtime_dont_use_autosuspend(dev); + pm_runtime_disable(dev); + + return 0; +} + +#ifdef CONFIG_PM + +static int clk_disable_unprepare_wrapper(struct clk *clk) +{ + clk_disable_unprepare(clk); + + return 0; +} + +static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable) +{ + int (*clk_fun)(struct clk *clk), i; + + clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper; + + for (i = 0; i < scaler->scaler_data->num_clk; ++i) + clk_fun(scaler->clock[i]); + + return 0; +} + +static int scaler_runtime_suspend(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, false); +} + +static int scaler_runtime_resume(struct device *dev) +{ + struct scaler_context *scaler = dev_get_drvdata(dev); + + return scaler_clk_ctrl(scaler, true); +} +#endif + +static const struct dev_pm_ops scaler_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, + pm_runtime_force_resume) + SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL) +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = { + { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) }, + { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 }, + .v = { 65536 * 1 / 4, 65536 * 16 }) }, +}; + +static const struct exynos_drm_ipp_formats exynos5420_formats[] = { + /* SCALER_YUV420_2P_UV */ + { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_2P_VU */ + { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV420_3P */ + { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) }, + + /* SCALER_YUV422_1P_YUYV */ + { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_UYVY */ + { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_1P_YVYU */ + { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_UV */ + { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_2P_VU */ + { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV422_3P */ + { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) }, + + /* SCALER_YUV444_2P_UV */ + { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_2P_VU */ + { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) }, + + /* SCALER_YUV444_3P */ + { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGB_565 */ + { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB1555 */ + { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB4444 */ + { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_ARGB8888 */ + { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) }, + + /* SCALER_RGBA8888 */ + { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) }, +}; + +static const struct scaler_data exynos5420_data = { + .clk_name = {"mscl"}, + .num_clk = 1, + .formats = exynos5420_formats, + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct scaler_data exynos5433_data = { + .clk_name = {"pclk", "aclk", "aclk_xiu"}, + .num_clk = 3, + .formats = exynos5420_formats, /* intentional */ + .num_formats = ARRAY_SIZE(exynos5420_formats), +}; + +static const struct of_device_id exynos_scaler_match[] = { + { + .compatible = "samsung,exynos5420-scaler", + .data = &exynos5420_data, + }, { + .compatible = "samsung,exynos5433-scaler", + .data = &exynos5433_data, + }, { + }, +}; +MODULE_DEVICE_TABLE(of, exynos_scaler_match); + +struct platform_driver scaler_driver = { + .probe = scaler_probe, + .remove = scaler_remove, + .driver = { + .name = "exynos-scaler", + .owner = THIS_MODULE, + .pm = &scaler_pm_ops, + .of_match_table = exynos_scaler_match, + }, +}; diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index abd84cb..09c4bc0 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder) drm_mode_connector_attach_encoder(connector, encoder); if (hdata->bridge) { - encoder->bridge = hdata->bridge; - hdata->bridge->encoder = encoder; ret = drm_bridge_attach(encoder, hdata->bridge, NULL); if (ret) DRM_ERROR("Failed to attach bridge\n"); diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 257299e..272c79f 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx, chroma_addr[1] = chroma_addr[0] + 0x40; } else { luma_addr[1] = luma_addr[0] + fb->pitches[0]; - chroma_addr[1] = chroma_addr[0] + fb->pitches[0]; + chroma_addr[1] = chroma_addr[0] + fb->pitches[1]; } } else { luma_addr[1] = 0; @@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); + vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) | VP_IMG_VSIZE(fb->height)); /* chroma plane for NV12/NV21 is half the height of the luma plane */ - vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) | + vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) | VP_IMG_VSIZE(fb->height / 2)); vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w); - vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); vp_reg_write(ctx, VP_SRC_H_POSITION, VP_SRC_H_POSITION_VAL(state->src.x)); - vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); - vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w); vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x); + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2); } else { + vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h); + vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y); vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h); vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y); } @@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) /* interlace scan need to check shadow register */ if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + goto out; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + goto out; + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); if (base != shadow) diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h index c311f57..189cfa2 100644 --- a/drivers/gpu/drm/exynos/regs-mixer.h +++ b/drivers/gpu/drm/exynos/regs-mixer.h @@ -47,6 +47,7 @@ #define MXR_MO 0x0304 #define MXR_RESOLUTION 0x0310 +#define MXR_CFG_S 0x2004 #define MXR_GRAPHIC0_BASE_S 0x2024 #define MXR_GRAPHIC1_BASE_S 0x2044 diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h new file mode 100644 index 0000000..fc7ccad --- /dev/null +++ b/drivers/gpu/drm/exynos/regs-scaler.h @@ -0,0 +1,426 @@ +/* drivers/gpu/drm/exynos/regs-scaler.h + * + * Copyright (c) 2017 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com> + * + * Register definition file for Samsung scaler driver + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef EXYNOS_REGS_SCALER_H +#define EXYNOS_REGS_SCALER_H + +/* Register part */ + +/* Global setting */ +#define SCALER_STATUS 0x0 /* no shadow */ +#define SCALER_CFG 0x4 + +/* Interrupt */ +#define SCALER_INT_EN 0x8 /* no shadow */ +#define SCALER_INT_STATUS 0xc /* no shadow */ + +/* SRC */ +#define SCALER_SRC_CFG 0x10 +#define SCALER_SRC_Y_BASE 0x14 +#define SCALER_SRC_CB_BASE 0x18 +#define SCALER_SRC_CR_BASE 0x294 +#define SCALER_SRC_SPAN 0x1c +#define SCALER_SRC_Y_POS 0x20 +#define SCALER_SRC_WH 0x24 +#define SCALER_SRC_C_POS 0x28 + +/* DST */ +#define SCALER_DST_CFG 0x30 +#define SCALER_DST_Y_BASE 0x34 +#define SCALER_DST_CB_BASE 0x38 +#define SCALER_DST_CR_BASE 0x298 +#define SCALER_DST_SPAN 0x3c +#define SCALER_DST_WH 0x40 +#define SCALER_DST_POS 0x44 + +/* Ratio */ +#define SCALER_H_RATIO 0x50 +#define SCALER_V_RATIO 0x54 + +/* Rotation */ +#define SCALER_ROT_CFG 0x58 + +/* Coefficient */ +/* + * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D} + * + * A B C D A B C D + * 0 60 64 68 6c 140 144 148 14c + * 1 70 74 78 7c 150 154 158 15c + * 2 80 84 88 8c 160 164 168 16c + * 3 90 94 98 9c 170 174 178 17c + * 4 a0 a4 a8 ac 180 184 188 18c + * 5 b0 b4 b8 bc 190 194 198 19c + * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac + * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc + * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc + * + * + * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B} + * + * A B A B + * 0 f0 f4 1d0 1d4 + * 1 f8 fc 1d8 1dc + * 2 100 104 1e0 1e4 + * 3 108 10c 1e8 1ec + * 4 110 114 1f0 1f4 + * 5 118 11c 1f8 1fc + * 6 120 124 200 204 + * 7 128 12c 208 20c + * 8 130 134 210 214 + */ +#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4) +#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4) + +#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c))) +#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c))) +#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c))) + + +/* Color Space Conversion */ +#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4) + +/* Dithering */ +#define SCALER_DITH_CFG 0x250 + +/* Version Number */ +#define SCALER_VER 0x260 /* no shadow */ + +/* Cycle count and Timeout */ +#define SCALER_CYCLE_COUNT 0x278 /* no shadow */ +#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */ +#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */ + +/* Blending */ +#define SCALER_SRC_BLEND_COLOR 0x280 +#define SCALER_SRC_BLEND_ALPHA 0x284 +#define SCALER_DST_BLEND_COLOR 0x288 +#define SCALER_DST_BLEND_ALPHA 0x28c + +/* Color Fill */ +#define SCALER_FILL_COLOR 0x290 + +/* Multiple Command Queue */ +#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */ +#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */ +#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */ + +/* CRC */ +#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */ +#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */ +#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */ +#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */ + +/* Shadow Registers */ +#define SCALER_SHADOW_OFFSET 0x1000 + + +/* Bit definition part */ +#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1) +#define SCALER_GET(reg, hi_b, lo_b) \ + (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b)) +#define SCALER_SET(val, hi_b, lo_b) \ + (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b) + +/* SCALER_STATUS */ +#define SCALER_STATUS_SCALER_RUNNING (1 << 1) +#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0) + +/* SCALER_CFG */ +#define SCALER_CFG_FILL_EN (1 << 24) +#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17) +#define SCALER_CFG_BLEND_EN (1 << 16) +#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10) +#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9) +#define SCALER_CFG_16_BURST_MODE (1 << 8) +#define SCALER_CFG_SOFT_RESET (1 << 1) +#define SCALER_CFG_START_CMD (1 << 0) + +/* SCALER_INT_EN */ +#define SCALER_INT_EN_TIMEOUT (1 << 31) +#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_EN_FRAME_END (1 << 0) + +/* SCALER_INT_STATUS */ +#define SCALER_INT_STATUS_TIMEOUT (1 << 31) +#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24) +#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23) +#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22) +#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21) +#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20) +#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19) +#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17) +#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16) +#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15) +#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14) +#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13) +#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12) +#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8) +#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7) +#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4) +#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3) +#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2) +#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1) +#define SCALER_INT_STATUS_FRAME_END (1 << 0) + +/* SCALER_SRC_CFG */ +#define SCALER_SRC_CFG_TILE_EN (1 << 10) +#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) +#define SCALER_YUV420_2P_UV 0 +#define SCALER_YUV422_2P_UV 2 +#define SCALER_YUV444_2P_UV 3 +#define SCALER_RGB_565 4 +#define SCALER_ARGB1555 5 +#define SCALER_ARGB8888 6 +#define SCALER_ARGB8888_PRE 7 +#define SCALER_YUV422_1P_YVYU 9 +#define SCALER_YUV422_1P_YUYV 10 +#define SCALER_YUV422_1P_UYVY 11 +#define SCALER_ARGB4444 12 +#define SCALER_L8A8 13 +#define SCALER_RGBA8888 14 +#define SCALER_L8 15 +#define SCALER_YUV420_2P_VU 16 +#define SCALER_YUV422_2P_VU 18 +#define SCALER_YUV444_2P_VU 19 +#define SCALER_YUV420_3P 20 +#define SCALER_YUV422_3P 22 +#define SCALER_YUV444_3P 23 + +/* SCALER_SRC_SPAN */ +#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_Y_POS */ +#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_SRC_WH */ +#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_SRC_C_POS */ +#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16) +#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16) +#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0) +#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0) + +/* SCALER_DST_CFG */ +#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5) +#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5) +#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0) +#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0) + +/* SCALER_DST_SPAN */ +#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_WH */ +#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0) + +/* SCALER_DST_POS */ +#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16) +#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16) +#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0) +#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0) + +/* SCALER_H_RATIO */ +#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_V_RATIO */ +#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0) +#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0) + +/* SCALER_ROT_CFG */ +#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3) +#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2) +#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0) +#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0) +#define SCALER_ROT_MODE_90 1 +#define SCALER_ROT_MODE_180 2 +#define SCALER_ROT_MODE_270 3 + +/* SCALER_HCOEF, SCALER_VCOEF */ +#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2)) +#define SCALER_COEF_GET(r, i) \ + (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff) +#define SCALER_COEF_SET(v, i) \ + (((v) & 0x1ff) << SCALER_COEF_SHIFT(i)) + +/* SCALER_CSC_COEFxy */ +#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0) +#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0) + +/* SCALER_DITH_CFG */ +#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6) +#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6) +#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3) +#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3) +#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0) +#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0) + +/* SCALER_TIMEOUT_CTRL */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16) +#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4) +#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4) +#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0) + +/* SCALER_TIMEOUT_CNT */ +#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16) + +/* SCALER_SRC_BLEND_COLOR */ +#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_SRC_BLEND_ALPHA */ +#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_COLOR */ +#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_DST_BLEND_ALPHA */ +#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31) +#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29) +#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29) +#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28) +#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24) +#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24) +#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0) +#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0) + +/* SCALER_FILL_COLOR */ +#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24) +#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24) +#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16) +#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16) +#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8) +#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8) +#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0) +#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0) + +/* SCALER_ADDR_Q_CONFIG */ +#define SCALER_ADDR_Q_CONFIG_RST (1 << 0) + +/* SCALER_SRC_ADDR_Q_STATUS */ +#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_DST_ADDR_Q_STATUS */ +#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23) +#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22) +#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16) +#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15) +#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14) +#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8) +#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7) +#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6) +#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0) + +/* SCALER_CRC_COLOR00_10 */ +#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR20_30 */ +#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR01_11 */ +#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0) + +/* SCALER_CRC_COLOR21_31 */ +#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16) +#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0) + +#endif /* EXYNOS_REGS_SCALER_H */ diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c index 3a3bf75..34b8576 100644 --- a/drivers/gpu/drm/gma500/cdv_device.c +++ b/drivers/gpu/drm/gma500/cdv_device.c @@ -485,7 +485,7 @@ void cdv_intel_attach_force_audio_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(force_audio_names); i++) - drm_property_add_enum(prop, i, i-1, force_audio_names[i]); + drm_property_add_enum(prop, i-1, force_audio_names[i]); dev_priv->force_audio_property = prop; } @@ -514,7 +514,7 @@ void cdv_intel_attach_broadcast_rgb_property(struct drm_connector *connector) return; for (i = 0; i < ARRAY_SIZE(broadcast_rgb_names); i++) - drm_property_add_enum(prop, i, i, broadcast_rgb_names[i]); + drm_property_add_enum(prop, i, broadcast_rgb_names[i]); dev_priv->broadcast_rgb_property = prop; } diff --git a/drivers/gpu/drm/gma500/cdv_intel_crt.c b/drivers/gpu/drm/gma500/cdv_intel_crt.c index b837e7a..cb5a14b 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_crt.c +++ b/drivers/gpu/drm/gma500/cdv_intel_crt.c @@ -64,7 +64,7 @@ static void cdv_intel_crt_dpms(struct drm_encoder *encoder, int mode) REG_WRITE(reg, temp); } -static int cdv_intel_crt_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_intel_crt_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->flags & DRM_MODE_FLAG_DBLSCAN) diff --git a/drivers/gpu/drm/gma500/cdv_intel_dp.c b/drivers/gpu/drm/gma500/cdv_intel_dp.c index a4bb89b..5ea785f 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_dp.c +++ b/drivers/gpu/drm/gma500/cdv_intel_dp.c @@ -505,7 +505,7 @@ static void cdv_intel_edp_backlight_off (struct gma_encoder *intel_encoder) msleep(intel_dp->backlight_off_delay); } -static int +static enum drm_mode_status cdv_intel_dp_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { diff --git a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c index 563f193..f087899 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_hdmi.c +++ b/drivers/gpu/drm/gma500/cdv_intel_hdmi.c @@ -223,7 +223,7 @@ static int cdv_hdmi_get_modes(struct drm_connector *connector) return ret; } -static int cdv_hdmi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->clock > 165000) diff --git a/drivers/gpu/drm/gma500/cdv_intel_lvds.c b/drivers/gpu/drm/gma500/cdv_intel_lvds.c index e64960d..de9531c 100644 --- a/drivers/gpu/drm/gma500/cdv_intel_lvds.c +++ b/drivers/gpu/drm/gma500/cdv_intel_lvds.c @@ -244,7 +244,7 @@ static void cdv_intel_lvds_restore(struct drm_connector *connector) { } -static int cdv_intel_lvds_mode_valid(struct drm_connector *connector, +static enum drm_mode_status cdv_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/gma500/mdfld_dsi_output.c b/drivers/gpu/drm/gma500/mdfld_dsi_output.c index acb3848..fe02092 100644 --- a/drivers/gpu/drm/gma500/mdfld_dsi_output.c +++ b/drivers/gpu/drm/gma500/mdfld_dsi_output.c @@ -346,7 +346,7 @@ static int mdfld_dsi_connector_get_modes(struct drm_connector *connector) return 0; } -static int mdfld_dsi_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status mdfld_dsi_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct mdfld_dsi_connector *dsi_connector = diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi.c b/drivers/gpu/drm/gma500/oaktrail_hdmi.c index 8b2eb32..78566a8 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi.c @@ -509,7 +509,7 @@ static void oaktrail_hdmi_dpms(struct drm_encoder *encoder, int mode) HDMI_WRITE(HDMI_VIDEO_REG, temp); } -static int oaktrail_hdmi_mode_valid(struct drm_connector *connector, +static enum drm_mode_status oaktrail_hdmi_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { if (mode->clock > 165000) diff --git a/drivers/gpu/drm/gma500/psb_intel_drv.h b/drivers/gpu/drm/gma500/psb_intel_drv.h index e8e4ea1..e05e539 100644 --- a/drivers/gpu/drm/gma500/psb_intel_drv.h +++ b/drivers/gpu/drm/gma500/psb_intel_drv.h @@ -255,7 +255,7 @@ extern int intelfb_remove(struct drm_device *dev, extern bool psb_intel_lvds_mode_fixup(struct drm_encoder *encoder, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode); -extern int psb_intel_lvds_mode_valid(struct drm_connector *connector, +extern enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode); extern int psb_intel_lvds_set_property(struct drm_connector *connector, struct drm_property *property, diff --git a/drivers/gpu/drm/gma500/psb_intel_lvds.c b/drivers/gpu/drm/gma500/psb_intel_lvds.c index be3eefe..8baf632 100644 --- a/drivers/gpu/drm/gma500/psb_intel_lvds.c +++ b/drivers/gpu/drm/gma500/psb_intel_lvds.c @@ -343,7 +343,7 @@ static void psb_intel_lvds_restore(struct drm_connector *connector) } } -int psb_intel_lvds_mode_valid(struct drm_connector *connector, +enum drm_mode_status psb_intel_lvds_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_psb_private *dev_priv = connector->dev->dev_private; diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c index 8450791..f2ee6aa 100644 --- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c +++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c @@ -1157,7 +1157,7 @@ static void psb_intel_sdvo_dpms(struct drm_encoder *encoder, int mode) return; } -static int psb_intel_sdvo_mode_valid(struct drm_connector *connector, +static enum drm_mode_status psb_intel_sdvo_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct psb_intel_sdvo *psb_intel_sdvo = intel_attached_sdvo(connector); @@ -2281,7 +2281,7 @@ static bool psb_intel_sdvo_tv_create_property(struct psb_intel_sdvo *psb_intel_s for (i = 0; i < psb_intel_sdvo_connector->format_supported_num; i++) drm_property_add_enum( - psb_intel_sdvo_connector->tv_format, i, + psb_intel_sdvo_connector->tv_format, i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]); psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0]; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c index f4eba87..d2f4749 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c @@ -27,7 +27,7 @@ static int hibmc_connector_get_modes(struct drm_connector *connector) return drm_add_modes_noedid(connector, 800, 600); } -static int hibmc_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status hibmc_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { return MODE_OK; diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index 9e67a7b..421c8a7 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c @@ -1106,7 +1106,7 @@ static int tda998x_connector_get_modes(struct drm_connector *connector) return n; } -static int tda998x_connector_mode_valid(struct drm_connector *connector, +static enum drm_mode_status tda998x_connector_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { /* TDA19988 dotclock can go up to 165MHz */ diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug index 108d21f..9de8b1c 100644 --- a/drivers/gpu/drm/i915/Kconfig.debug +++ b/drivers/gpu/drm/i915/Kconfig.debug @@ -25,7 +25,8 @@ config DRM_I915_DEBUG select X86_MSR # used by igt/pm_rpm select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks) select DRM_DEBUG_MM if DRM=y - select DRM_DEBUG_MM_SELFTEST + select STACKDEPOT if DRM=y # for DRM_DEBUG_MM + select DRM_DEBUG_SELFTEST select SW_SYNC # signaling validation framework (igt/syncobj*) select DRM_I915_SW_FENCE_DEBUG_OBJECTS select DRM_I915_SELFTEST @@ -89,6 +90,18 @@ config DRM_I915_SW_FENCE_CHECK_DAG If in doubt, say "N". +config DRM_I915_DEBUG_GUC + bool "Enable additional driver debugging for GuC" + depends on DRM_I915 + default n + help + Choose this option to turn on extra driver debugging that may affect + performance but will help resolve GuC related issues. + + Recommended for driver developers only. + + If in doubt, say "N". + config DRM_I915_SELFTEST bool "Enable selftests upon driver load" depends on DRM_I915 diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 4eee91a..4c6adae 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -12,12 +12,16 @@ # Note the danger in using -Wall -Wextra is that when CI updates gcc we # will most likely get a sudden build breakage... Hopefully we will fix # new warnings before CI updates! -subdir-ccflags-y := -Wall -Wextra +subdir-ccflags-y := -Wall -Wextra -Wvla subdir-ccflags-y += $(call cc-disable-warning, unused-parameter) subdir-ccflags-y += $(call cc-disable-warning, type-limits) subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers) subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough) subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable) +# clang warnings +subdir-ccflags-y += $(call cc-disable-warning, sign-compare) +subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized) +subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides) subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror # Fine grained warnings disable @@ -43,7 +47,8 @@ i915-y := i915_drv.o \ intel_csr.o \ intel_device_info.o \ intel_pm.o \ - intel_runtime_pm.o + intel_runtime_pm.o \ + intel_workarounds.o i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o @@ -66,11 +71,11 @@ i915-y += i915_cmd_parser.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ - i915_gem_timeline.o \ i915_gem_userptr.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ intel_breadcrumbs.o \ @@ -79,7 +84,8 @@ i915-y += i915_cmd_parser.o \ intel_lrc.o \ intel_mocs.o \ intel_ringbuffer.o \ - intel_uncore.o + intel_uncore.o \ + intel_wopcm.o # general-purpose microcontroller (GuC) support i915-y += intel_uc.o \ @@ -152,7 +158,8 @@ i915-y += dvo_ch7017.o \ i915-$(CONFIG_DRM_I915_CAPTURE_ERROR) += i915_gpu_error.o i915-$(CONFIG_DRM_I915_SELFTEST) += \ selftests/i915_random.o \ - selftests/i915_selftest.o + selftests/i915_selftest.o \ + selftests/igt_flush_test.o # virtual gpu code i915-y += i915_vgpu.o @@ -171,7 +178,8 @@ i915-y += i915_perf.o \ i915_oa_glk.o \ i915_oa_cflgt2.o \ i915_oa_cflgt3.o \ - i915_oa_cnl.o + i915_oa_cnl.o \ + i915_oa_icl.o ifeq ($(CONFIG_DRM_I915_GVT),y) i915-y += intel_gvt.o diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index db6b94d..718ca08 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -813,15 +813,31 @@ static inline bool is_force_nonpriv_mmio(unsigned int offset) } static int force_nonpriv_reg_handler(struct parser_exec_state *s, - unsigned int offset, unsigned int index) + unsigned int offset, unsigned int index, char *cmd) { struct intel_gvt *gvt = s->vgpu->gvt; - unsigned int data = cmd_val(s, index + 1); + unsigned int data; + u32 ring_base; + u32 nopid; + struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv; + + if (!strcmp(cmd, "lri")) + data = cmd_val(s, index + 1); + else { + gvt_err("Unexpected forcenonpriv 0x%x write from cmd %s\n", + offset, cmd); + return -EINVAL; + } + + ring_base = dev_priv->engine[s->ring_id]->mmio_base; + nopid = i915_mmio_reg_offset(RING_NOPID(ring_base)); - if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data)) { + if (!intel_gvt_in_force_nonpriv_whitelist(gvt, data) && + data != nopid) { gvt_err("Unexpected forcenonpriv 0x%x LRI write, value=0x%x\n", offset, data); - return -EPERM; + patch_value(s, cmd_ptr(s, index), nopid); + return 0; } return 0; } @@ -869,7 +885,7 @@ static int cmd_reg_handler(struct parser_exec_state *s, return -EINVAL; if (is_force_nonpriv_mmio(offset) && - force_nonpriv_reg_handler(s, offset, index)) + force_nonpriv_reg_handler(s, offset, index, cmd)) return -EPERM; if (offset == i915_mmio_reg_offset(DERRMR) || @@ -1080,6 +1096,7 @@ static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s) { set_bit(cmd_interrupt_events[s->ring_id].mi_user_interrupt, s->workload->pending_events); + patch_value(s, cmd_ptr(s, 0), MI_NOOP); return 0; } @@ -1603,7 +1620,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s) if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) { /* BDW decides privilege based on address space */ - if (cmd_val(s, 0) & (1 << 8)) + if (cmd_val(s, 0) & (1 << 8) && + !(s->vgpu->scan_nonprivbb & (1 << s->ring_id))) return 0; } return 1; @@ -1617,6 +1635,8 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) bool bb_end = false; struct intel_vgpu *vgpu = s->vgpu; u32 cmd; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; *bb_size = 0; @@ -1628,18 +1648,22 @@ static int find_bb_size(struct parser_exec_state *s, unsigned long *bb_size) cmd = cmd_val(s, 0); info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } do { - if (copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + if (copy_gma_to_hva(s->vgpu, mm, gma, gma + 4, &cmd) < 0) return -EFAULT; info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } @@ -1665,6 +1689,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) unsigned long gma = 0; unsigned long bb_size; int ret = 0; + struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ? + s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm; + unsigned long gma_start_offset = 0; /* get the start gm address of the batch buffer */ gma = get_gma_bb_from_cmd(s, 1); @@ -1679,8 +1706,24 @@ static int perform_bb_shadow(struct parser_exec_state *s) if (!bb) return -ENOMEM; + bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true; + + /* the gma_start_offset stores the batch buffer's start gma's + * offset relative to page boundary. so for non-privileged batch + * buffer, the shadowed gem object holds exactly the same page + * layout as original gem object. This is for the convience of + * replacing the whole non-privilged batch buffer page to this + * shadowed one in PPGTT at the same gma address. (this replacing + * action is not implemented yet now, but may be necessary in + * future). + * for prileged batch buffer, we just change start gma address to + * that of shadowed page. + */ + if (bb->ppgtt) + gma_start_offset = gma & ~I915_GTT_PAGE_MASK; + bb->obj = i915_gem_object_create(s->vgpu->gvt->dev_priv, - roundup(bb_size, PAGE_SIZE)); + roundup(bb_size + gma_start_offset, PAGE_SIZE)); if (IS_ERR(bb->obj)) { ret = PTR_ERR(bb->obj); goto err_free_bb; @@ -1701,9 +1744,9 @@ static int perform_bb_shadow(struct parser_exec_state *s) bb->clflush &= ~CLFLUSH_BEFORE; } - ret = copy_gma_to_hva(s->vgpu, s->vgpu->gtt.ggtt_mm, + ret = copy_gma_to_hva(s->vgpu, mm, gma, gma + bb_size, - bb->va); + bb->va + gma_start_offset); if (ret < 0) { gvt_vgpu_err("fail to copy guest ring buffer\n"); ret = -EFAULT; @@ -1729,7 +1772,7 @@ static int perform_bb_shadow(struct parser_exec_state *s) * buffer's gma in pair. After all, we don't want to pin the shadow * buffer here (too early). */ - s->ip_va = bb->va; + s->ip_va = bb->va + gma_start_offset; s->ip_gma = gma; return 0; err_unmap: @@ -2468,15 +2511,18 @@ static int cmd_parser_exec(struct parser_exec_state *s) info = get_cmd_info(s->vgpu->gvt, cmd, s->ring_id); if (info == NULL) { - gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x\n", - cmd, get_opcode(cmd, s->ring_id)); + gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %d, workload=%p\n", + cmd, get_opcode(cmd, s->ring_id), + (s->buf_addr_type == PPGTT_BUFFER) ? + "ppgtt" : "ggtt", s->ring_id, s->workload); return -EBADRQC; } s->info = info; trace_gvt_command(vgpu->id, s->ring_id, s->ip_gma, s->ip_va, - cmd_length(s), s->buf_type); + cmd_length(s), s->buf_type, s->buf_addr_type, + s->workload, info->name); if (info->handler) { ret = info->handler(s); diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c index 32a66df..2ec89bc 100644 --- a/drivers/gpu/drm/i915/gvt/debugfs.c +++ b/drivers/gpu/drm/i915/gvt/debugfs.c @@ -122,18 +122,69 @@ static int vgpu_mmio_diff_show(struct seq_file *s, void *unused) seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff); return 0; } +DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff); -static int vgpu_mmio_diff_open(struct inode *inode, struct file *file) +static int +vgpu_scan_nonprivbb_get(void *data, u64 *val) { - return single_open(file, vgpu_mmio_diff_show, inode->i_private); + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + *val = vgpu->scan_nonprivbb; + return 0; } -static const struct file_operations vgpu_mmio_diff_fops = { - .open = vgpu_mmio_diff_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +/* + * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning + * of non-privileged batch buffer. e.g. + * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer + * on engine 0 and 1. + */ +static int +vgpu_scan_nonprivbb_set(void *data, u64 val) +{ + struct intel_vgpu *vgpu = (struct intel_vgpu *)data; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + enum intel_engine_id id; + char buf[128], *s; + int len; + + val &= (1 << I915_NUM_ENGINES) - 1; + + if (vgpu->scan_nonprivbb == val) + return 0; + + if (!val) + goto done; + + len = sprintf(buf, + "gvt: vgpu %d turns on non-privileged batch buffers scanning on Engines:", + vgpu->id); + + s = buf + len; + + for (id = 0; id < I915_NUM_ENGINES; id++) { + struct intel_engine_cs *engine; + + engine = dev_priv->engine[id]; + if (engine && (val & (1 << id))) { + len = snprintf(s, 4, "%d, ", engine->id); + s += len; + } else + val &= ~(1 << id); + } + + if (val) + sprintf(s, "low performance expected."); + + pr_warn("%s\n", buf); + +done: + vgpu->scan_nonprivbb = val; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(vgpu_scan_nonprivbb_fops, + vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set, + "0x%llx\n"); /** * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU @@ -162,6 +213,11 @@ int intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu) if (!ent) return -ENOMEM; + ent = debugfs_create_file("scan_nonprivbb", 0644, vgpu->debugfs, + vgpu, &vgpu_scan_nonprivbb_fops); + if (!ent) + return -ENOMEM; + return 0; } diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c index dd96ffc..6d8180e 100644 --- a/drivers/gpu/drm/i915/gvt/display.c +++ b/drivers/gpu/drm/i915/gvt/display.c @@ -169,6 +169,8 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = { static void emulate_monitor_status_change(struct intel_vgpu *vgpu) { struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + int pipe; + vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT | SDE_PORTC_HOTPLUG_CPT | SDE_PORTD_HOTPLUG_CPT); @@ -267,6 +269,14 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu) if (IS_BROADWELL(dev_priv)) vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK; + /* Disable Primary/Sprite/Cursor plane */ + for_each_pipe(dev_priv, pipe) { + vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISPLAY_PLANE_ENABLE; + vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~CURSOR_MODE; + vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= CURSOR_MODE_DISABLE; + } + vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE; } diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c index b555eb2..6f4f8e9 100644 --- a/drivers/gpu/drm/i915/gvt/dmabuf.c +++ b/drivers/gpu/drm/i915/gvt/dmabuf.c @@ -323,6 +323,7 @@ static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf, struct intel_vgpu_fb_info *fb_info) { gvt_dmabuf->drm_format = fb_info->drm_format; + gvt_dmabuf->drm_format_mod = fb_info->drm_format_mod; gvt_dmabuf->width = fb_info->width; gvt_dmabuf->height = fb_info->height; gvt_dmabuf->stride = fb_info->stride; diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c index 6b50fe7..1c12068 100644 --- a/drivers/gpu/drm/i915/gvt/fb_decoder.c +++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c @@ -245,16 +245,13 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu, plane->hw_format = fmt; plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate primary plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -371,16 +368,13 @@ int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu, alpha_plane, alpha_force); plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate cursor plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } @@ -476,16 +470,13 @@ int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu, plane->drm_format = drm_format; plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK; - if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) return -EINVAL; - } plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base); if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) { - gvt_vgpu_err("invalid gma address: %lx\n", - (unsigned long)plane->base); + gvt_vgpu_err("Translate sprite plane gma 0x%x to gpa fail\n", + plane->base); return -EINVAL; } diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index d292812..78e55aa 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -530,6 +530,16 @@ static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm, false, 0, mm->vgpu); } +static void ggtt_get_host_entry(struct intel_vgpu_mm *mm, + struct intel_gvt_gtt_entry *entry, unsigned long index) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops; + + GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT); + + pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu); +} + static void ggtt_set_host_entry(struct intel_vgpu_mm *mm, struct intel_gvt_gtt_entry *entry, unsigned long index) { @@ -1818,6 +1828,18 @@ int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off, return ret; } +static void ggtt_invalidate_pte(struct intel_vgpu *vgpu, + struct intel_gvt_gtt_entry *entry) +{ + struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; + unsigned long pfn; + + pfn = pte_ops->get_pfn(entry); + if (pfn != vgpu->gvt->gtt.scratch_mfn) + intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, + pfn << PAGE_SHIFT); +} + static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, void *p_data, unsigned int bytes) { @@ -1844,10 +1866,10 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data, bytes); - m = e; if (ops->test_present(&e)) { gfn = ops->get_pfn(&e); + m = e; /* one PTE update may be issued in multiple writes and the * first write may not construct a valid gfn @@ -1868,8 +1890,12 @@ static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off, ops->set_pfn(&m, gvt->gtt.scratch_mfn); } else ops->set_pfn(&m, dma_addr >> PAGE_SHIFT); - } else + } else { + ggtt_get_host_entry(ggtt_mm, &m, g_gtt_index); + ggtt_invalidate_pte(vgpu, &m); ops->set_pfn(&m, gvt->gtt.scratch_mfn); + ops->clear_present(&m); + } out: ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index); @@ -2030,7 +2056,7 @@ int intel_vgpu_init_gtt(struct intel_vgpu *vgpu) return PTR_ERR(gtt->ggtt_mm); } - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, false); return create_scratch_page_tree(vgpu); } @@ -2315,17 +2341,19 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) /** * intel_vgpu_reset_ggtt - reset the GGTT entry * @vgpu: a vGPU + * @invalidate_old: invalidate old entries * * This function is called at the vGPU create stage * to reset all the GGTT entries. * */ -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv; struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops; struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE}; + struct intel_gvt_gtt_entry old_entry; u32 index; u32 num_entries; @@ -2334,13 +2362,23 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu) index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT; num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT; - while (num_entries--) + while (num_entries--) { + if (invalidate_old) { + ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index); + ggtt_invalidate_pte(vgpu, &old_entry); + } ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++); + } ggtt_invalidate(dev_priv); } @@ -2360,5 +2398,5 @@ void intel_vgpu_reset_gtt(struct intel_vgpu *vgpu) * removing the shadow pages. */ intel_vgpu_destroy_all_ppgtt_mm(vgpu); - intel_vgpu_reset_ggtt(vgpu); + intel_vgpu_reset_ggtt(vgpu, true); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index a8b369c..3792f2b 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -193,7 +193,7 @@ struct intel_vgpu_gtt { extern int intel_vgpu_init_gtt(struct intel_vgpu *vgpu); extern void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu); -void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu); +void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old); void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu); extern int intel_gvt_init_gtt(struct intel_gvt *gvt); diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h index efacd8a..05d15a0 100644 --- a/drivers/gpu/drm/i915/gvt/gvt.h +++ b/drivers/gpu/drm/i915/gvt/gvt.h @@ -99,7 +99,6 @@ struct intel_vgpu_fence { struct intel_vgpu_mmio { void *vreg; void *sreg; - bool disable_warn_untrack; }; #define INTEL_GVT_MAX_BAR_NUM 4 @@ -226,6 +225,7 @@ struct intel_vgpu { struct completion vblank_done; + u32 scan_nonprivbb; }; /* validating GM healthy status*/ diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index 8c5d5d0..4b6532f 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -191,6 +191,8 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, unsigned int max_fence = vgpu_fence_sz(vgpu); if (fence_num >= max_fence) { + gvt_vgpu_err("access oob fence reg %d/%d\n", + fence_num, max_fence); /* When guest access oob fence regs without access * pv_info first, we treat guest not supporting GVT, @@ -200,11 +202,6 @@ static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu, enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST); - if (!vgpu->mmio.disable_warn_untrack) { - gvt_vgpu_err("found oob fence register access\n"); - gvt_vgpu_err("total fence %d, access fence %d\n", - max_fence, fence_num); - } memset(p_data, 0, bytes); return -EINVAL; } @@ -477,22 +474,28 @@ static int force_nonpriv_write(struct intel_vgpu *vgpu, unsigned int offset, void *p_data, unsigned int bytes) { u32 reg_nonpriv = *(u32 *)p_data; + int ring_id = intel_gvt_render_mmio_to_ring_id(vgpu->gvt, offset); + u32 ring_base; + struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; int ret = -EINVAL; - if ((bytes != 4) || ((offset & (bytes - 1)) != 0)) { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n", - vgpu->id, offset, bytes); + if ((bytes != 4) || ((offset & (bytes - 1)) != 0) || ring_id < 0) { + gvt_err("vgpu(%d) ring %d Invalid FORCE_NONPRIV offset %x(%dB)\n", + vgpu->id, ring_id, offset, bytes); return ret; } - if (in_whitelist(reg_nonpriv)) { + ring_base = dev_priv->engine[ring_id]->mmio_base; + + if (in_whitelist(reg_nonpriv) || + reg_nonpriv == i915_mmio_reg_offset(RING_NOPID(ring_base))) { ret = intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes); - } else { - gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x\n", - vgpu->id, reg_nonpriv); - } - return ret; + } else + gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n", + vgpu->id, reg_nonpriv, offset); + + return 0; } static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset, @@ -1150,6 +1153,7 @@ static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification) switch (notification) { case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE: root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY; + /* fall through */ case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE: mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps); return PTR_ERR_OR_ZERO(mm); @@ -3091,9 +3095,7 @@ int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset, */ mmio_info = find_mmio_info(gvt, offset); if (!mmio_info) { - if (!vgpu->mmio.disable_warn_untrack) - gvt_vgpu_err("untracked MMIO %08x len %d\n", - offset, bytes); + gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes); goto default_rw; } diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index c16a492..1466d87 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -1301,7 +1301,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd, } - return 0; + return -ENOTTY; } static ssize_t diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c index 11b71b3..e4960af 100644 --- a/drivers/gpu/drm/i915/gvt/mmio.c +++ b/drivers/gpu/drm/i915/gvt/mmio.c @@ -244,8 +244,6 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr) /* set the bit 0:2(Core C-State ) to C0 */ vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0; - - vgpu->mmio.disable_warn_untrack = false; } else { #define GVT_GEN8_MMIO_RESET_OFFSET (0x44200) /* only reset the engine related, so starting with 0x44200 diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index a5bac83..0f94955 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -448,7 +448,7 @@ static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next, bool is_inhibit_context(struct i915_gem_context *ctx, int ring_id) { - u32 *reg_state = ctx->engine[ring_id].lrc_reg_state; + u32 *reg_state = ctx->__engine[ring_id].lrc_reg_state; u32 inhibit_mask = _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c index 75b7bc7..d053cbe 100644 --- a/drivers/gpu/drm/i915/gvt/sched_policy.c +++ b/drivers/gpu/drm/i915/gvt/sched_policy.c @@ -53,7 +53,6 @@ struct vgpu_sched_data { bool active; ktime_t sched_in_time; - ktime_t sched_out_time; ktime_t sched_time; ktime_t left_ts; ktime_t allocated_ts; @@ -66,17 +65,22 @@ struct gvt_sched_data { struct hrtimer timer; unsigned long period; struct list_head lru_runq_head; + ktime_t expire_time; }; -static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu) +static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time) { ktime_t delta_ts; - struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data; + struct vgpu_sched_data *vgpu_data; - delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time; + if (!vgpu || vgpu == vgpu->gvt->idle_vgpu) + return; - vgpu_data->sched_time += delta_ts; - vgpu_data->left_ts -= delta_ts; + vgpu_data = vgpu->sched_data; + delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time); + vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts); + vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts); + vgpu_data->sched_in_time = cur_time; } #define GVT_TS_BALANCE_PERIOD_MS 100 @@ -150,11 +154,7 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt) } cur_time = ktime_get(); - if (scheduler->current_vgpu) { - vgpu_data = scheduler->current_vgpu->sched_data; - vgpu_data->sched_out_time = cur_time; - vgpu_update_timeslice(scheduler->current_vgpu); - } + vgpu_update_timeslice(scheduler->current_vgpu, cur_time); vgpu_data = scheduler->next_vgpu->sched_data; vgpu_data->sched_in_time = cur_time; @@ -226,17 +226,22 @@ out: void intel_gvt_schedule(struct intel_gvt *gvt) { struct gvt_sched_data *sched_data = gvt->scheduler.sched_data; - static uint64_t timer_check; + ktime_t cur_time; mutex_lock(&gvt->lock); + cur_time = ktime_get(); if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED, (void *)&gvt->service_request)) { - if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS)) + if (cur_time >= sched_data->expire_time) { gvt_balance_timeslice(sched_data); + sched_data->expire_time = ktime_add_ms( + cur_time, GVT_TS_BALANCE_PERIOD_MS); + } } clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request); + vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time); tbs_sched_func(sched_data); mutex_unlock(&gvt->lock); diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 638abe8..c2d183b 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -58,7 +58,7 @@ static void update_shadow_pdps(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -97,7 +97,7 @@ static void sr_oa_regs(struct intel_vgpu_workload *workload, i915_mmio_reg_offset(EU_PERF_CNTL6), }; - if (!workload || !reg_state || workload->ring_id != RCS) + if (workload->ring_id != RCS) return; if (save) { @@ -130,7 +130,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload) int ring_id = workload->ring_id; struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *dst; @@ -283,7 +283,7 @@ static int shadow_context_status_change(struct notifier_block *nb, static void shadow_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc = 0; desc = ce->lrc_desc; @@ -389,7 +389,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) * shadow_ctx pages invalid. So gvt need to pin itself. After update * the guest context, gvt can unpin the shadow_ctx safely. */ - ring = engine->context_pin(engine, shadow_ctx); + ring = intel_context_pin(shadow_ctx, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); gvt_vgpu_err("fail to pin shadow context\n"); @@ -403,7 +403,7 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); err_shadow: release_shadow_wa_ctx(&workload->wa_ctx); err_scan: @@ -437,7 +437,7 @@ static int intel_gvt_generate_request(struct intel_vgpu_workload *workload) return 0; err_unpin: - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); release_shadow_wa_ctx(&workload->wa_ctx); return ret; } @@ -452,12 +452,6 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) int ret; list_for_each_entry(bb, &workload->shadow_bb, list) { - bb->vma = i915_gem_object_ggtt_pin(bb->obj, NULL, 0, 0, 0); - if (IS_ERR(bb->vma)) { - ret = PTR_ERR(bb->vma); - goto err; - } - /* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va * is only updated into ring_scan_buffer, not real ring address * allocated in later copy_workload_to_ring_buffer. pls be noted @@ -469,25 +463,53 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload) bb->bb_start_cmd_va = workload->shadow_ring_buffer_va + bb->bb_offset; - /* relocate shadow batch buffer */ - bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); - if (gmadr_bytes == 8) - bb->bb_start_cmd_va[2] = 0; + if (bb->ppgtt) { + /* for non-priv bb, scan&shadow is only for + * debugging purpose, so the content of shadow bb + * is the same as original bb. Therefore, + * here, rather than switch to shadow bb's gma + * address, we directly use original batch buffer's + * gma address, and send original bb to hardware + * directly + */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + } else { + bb->vma = i915_gem_object_ggtt_pin(bb->obj, + NULL, 0, 0, 0); + if (IS_ERR(bb->vma)) { + ret = PTR_ERR(bb->vma); + goto err; + } - /* No one is going to touch shadow bb from now on. */ - if (bb->clflush & CLFLUSH_AFTER) { - drm_clflush_virt_range(bb->va, bb->obj->base.size); - bb->clflush &= ~CLFLUSH_AFTER; - } + /* relocate shadow batch buffer */ + bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma); + if (gmadr_bytes == 8) + bb->bb_start_cmd_va[2] = 0; - ret = i915_gem_object_set_to_gtt_domain(bb->obj, false); - if (ret) - goto err; + /* No one is going to touch shadow bb from now on. */ + if (bb->clflush & CLFLUSH_AFTER) { + drm_clflush_virt_range(bb->va, + bb->obj->base.size); + bb->clflush &= ~CLFLUSH_AFTER; + } - i915_gem_obj_finish_shmem_access(bb->obj); - bb->accessing = false; + ret = i915_gem_object_set_to_gtt_domain(bb->obj, + false); + if (ret) + goto err; - i915_vma_move_to_active(bb->vma, workload->req, 0); + i915_gem_obj_finish_shmem_access(bb->obj); + bb->accessing = false; + + i915_vma_move_to_active(bb->vma, workload->req, 0); + } } return 0; err: @@ -504,7 +526,7 @@ static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx) struct intel_vgpu_submission *s = &workload->vgpu->submission; struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; @@ -666,7 +688,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); if (ret) { - engine->context_unpin(engine, shadow_ctx); + intel_context_unpin(shadow_ctx, engine); goto out; } @@ -749,7 +771,7 @@ static void update_guest_context(struct intel_vgpu_workload *workload) struct i915_gem_context *shadow_ctx = s->shadow_ctx; int ring_id = workload->ring_id; struct drm_i915_gem_object *ctx_obj = - shadow_ctx->engine[ring_id].state->obj; + shadow_ctx->__engine[ring_id].state->obj; struct execlist_ring_context *shadow_ring_context; struct page *page; void *src; @@ -876,7 +898,7 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id) } mutex_lock(&dev_priv->drm.struct_mutex); /* unpin shadow ctx as the shadow_ctx update is done */ - engine->context_unpin(engine, s->shadow_ctx); + intel_context_unpin(s->shadow_ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1134,9 +1156,6 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu) if (IS_ERR(s->shadow_ctx)) return PTR_ERR(s->shadow_ctx); - if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv)) - s->shadow_ctx->priority = INT_MAX; - bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES); s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload", diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h index 486ed57..6c64478 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.h +++ b/drivers/gpu/drm/i915/gvt/scheduler.h @@ -125,6 +125,7 @@ struct intel_vgpu_shadow_bb { unsigned int clflush; bool accessing; unsigned long bb_offset; + bool ppgtt; }; #define workload_q_head(vgpu, ring_id) \ diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h index 82093f1..1fd6420 100644 --- a/drivers/gpu/drm/i915/gvt/trace.h +++ b/drivers/gpu/drm/i915/gvt/trace.h @@ -224,19 +224,25 @@ TRACE_EVENT(oos_sync, TP_printk("%s", __entry->buf) ); +#define GVT_CMD_STR_LEN 40 TRACE_EVENT(gvt_command, - TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, u32 cmd_len, - u32 buf_type), + TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va, + u32 cmd_len, u32 buf_type, u32 buf_addr_type, + void *workload, char *cmd_name), - TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type), + TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type, + buf_addr_type, workload, cmd_name), TP_STRUCT__entry( __field(u8, vgpu_id) __field(u8, ring_id) __field(u32, ip_gma) __field(u32, buf_type) + __field(u32, buf_addr_type) __field(u32, cmd_len) + __field(void*, workload) __dynamic_array(u32, raw_cmd, cmd_len) + __array(char, cmd_name, GVT_CMD_STR_LEN) ), TP_fast_assign( @@ -244,17 +250,25 @@ TRACE_EVENT(gvt_command, __entry->ring_id = ring_id; __entry->ip_gma = ip_gma; __entry->buf_type = buf_type; + __entry->buf_addr_type = buf_addr_type; __entry->cmd_len = cmd_len; + __entry->workload = workload; + snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name); memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va)); ), - TP_printk("vgpu%d ring %d: buf_type %u, ip_gma %08x, raw cmd %s", + TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n", __entry->vgpu_id, __entry->ring_id, + __entry->buf_addr_type, __entry->buf_type, __entry->ip_gma, - __print_array(__get_dynamic_array(raw_cmd), __entry->cmd_len, 4)) + __entry->cmd_name, + __entry->cmd_len, + __print_array(__get_dynamic_array(raw_cmd), + __entry->cmd_len, 4), + __entry->workload) ); #define GVT_TEMP_STR_LEN 10 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 89f7ff2..13e7b9e 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -377,16 +377,19 @@ static void print_batch_pool_stats(struct seq_file *m, print_file_stats(m, "[k]batch pool", stats); } -static int per_file_ctx_stats(int id, void *ptr, void *data) +static int per_file_ctx_stats(int idx, void *ptr, void *data) { struct i915_gem_context *ctx = ptr; - int n; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, ctx->i915, id) { + struct intel_context *ce = to_intel_context(ctx, engine); - for (n = 0; n < ARRAY_SIZE(ctx->engine); n++) { - if (ctx->engine[n].state) - per_file_stats(0, ctx->engine[n].state->obj, data); - if (ctx->engine[n].ring) - per_file_stats(0, ctx->engine[n].ring->vma->obj, data); + if (ce->state) + per_file_stats(0, ce->state->obj, data); + if (ce->ring) + per_file_stats(0, ce->ring->vma->obj, data); } return 0; @@ -1215,20 +1218,20 @@ static int i915_frequency_info(struct seq_file *m, void *unused) max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 0 : rp_state_cap >> 16) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Lowest (RPN) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (rp_state_cap & 0xff00) >> 8; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Nominal (RP1) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); max_freq = (IS_GEN9_LP(dev_priv) ? rp_state_cap >> 16 : rp_state_cap >> 0) & 0xff; max_freq *= (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? GEN9_FREQ_SCALER : 1); + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1); seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n", intel_gpu_freq(dev_priv, max_freq)); seq_printf(m, "Max overclocked frequency: %dMHz\n", @@ -1340,10 +1343,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused) struct rb_node *rb; seq_printf(m, "%s:\n", engine->name); - seq_printf(m, "\tseqno = %x [current %x, last %x], inflight %d\n", + seq_printf(m, "\tseqno = %x [current %x, last %x]\n", engine->hangcheck.seqno, seqno[id], - intel_engine_last_submit(engine), - engine->timeline->inflight_seqnos); + intel_engine_last_submit(engine)); seq_printf(m, "\twaiters? %s, fake irq active? %s, stalled? %s\n", yesno(intel_engine_has_waiter(engine)), yesno(test_bit(engine->id, @@ -1796,9 +1798,9 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) { struct drm_i915_private *dev_priv = node_to_i915(m->private); struct intel_rps *rps = &dev_priv->gt_pm.rps; - int ret = 0; - int gpu_freq, ia_freq; unsigned int max_gpu_freq, min_gpu_freq; + int gpu_freq, ia_freq; + int ret; if (!HAS_LLC(dev_priv)) return -ENODEV; @@ -1809,13 +1811,12 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) if (ret) goto out; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = rps->min_freq_softlimit / GEN9_FREQ_SCALER; - max_gpu_freq = rps->max_freq_softlimit / GEN9_FREQ_SCALER; - } else { - min_gpu_freq = rps->min_freq_softlimit; - max_gpu_freq = rps->max_freq_softlimit; + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; } seq_puts(m, "GPU freq (MHz)\tEffective CPU freq (MHz)\tEffective Ring freq (MHz)\n"); @@ -1828,7 +1829,7 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused) seq_printf(m, "%d\t\t%d\t\t\t\t%d\n", intel_gpu_freq(dev_priv, (gpu_freq * (IS_GEN9_BC(dev_priv) || - IS_CANNONLAKE(dev_priv) ? + INTEL_GEN(dev_priv) >= 10 ? GEN9_FREQ_SCALER : 1))), ((ia_freq >> 0) & 0xff) * 100, ((ia_freq >> 8) & 0xff) * 100); @@ -1923,8 +1924,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data) static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring) { - seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)", - ring->space, ring->head, ring->tail); + seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, emit: %u)", + ring->space, ring->head, ring->tail, ring->emit); } static int i915_context_status(struct seq_file *m, void *unused) @@ -1961,7 +1962,8 @@ static int i915_context_status(struct seq_file *m, void *unused) seq_putc(m, '\n'); for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); seq_printf(m, "%s: ", engine->name); if (ce->state) @@ -2326,30 +2328,45 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data) return 0; } -static void i915_guc_log_info(struct seq_file *m, - struct drm_i915_private *dev_priv) +static const char * +stringify_guc_log_type(enum guc_log_buffer_type type) { - struct intel_guc *guc = &dev_priv->guc; + switch (type) { + case GUC_ISR_LOG_BUFFER: + return "ISR"; + case GUC_DPC_LOG_BUFFER: + return "DPC"; + case GUC_CRASH_DUMP_LOG_BUFFER: + return "CRASH"; + default: + MISSING_CASE(type); + } - seq_puts(m, "\nGuC logging stats:\n"); + return ""; +} - seq_printf(m, "\tISR: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_ISR_LOG_BUFFER], - guc->log.total_overflow_count[GUC_ISR_LOG_BUFFER]); +static void i915_guc_log_info(struct seq_file *m, + struct drm_i915_private *dev_priv) +{ + struct intel_guc_log *log = &dev_priv->guc.log; + enum guc_log_buffer_type type; - seq_printf(m, "\tDPC: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_DPC_LOG_BUFFER], - guc->log.total_overflow_count[GUC_DPC_LOG_BUFFER]); + if (!intel_guc_log_relay_enabled(log)) { + seq_puts(m, "GuC log relay disabled\n"); + return; + } - seq_printf(m, "\tCRASH: flush count %10u, overflow count %10u\n", - guc->log.flush_count[GUC_CRASH_DUMP_LOG_BUFFER], - guc->log.total_overflow_count[GUC_CRASH_DUMP_LOG_BUFFER]); + seq_puts(m, "GuC logging stats:\n"); - seq_printf(m, "\tTotal flush interrupt count: %u\n", - guc->log.flush_interrupt_count); + seq_printf(m, "\tRelay full count: %u\n", + log->relay.full_count); - seq_printf(m, "\tCapture miss count: %u\n", - guc->log.capture_miss_count); + for (type = GUC_ISR_LOG_BUFFER; type < GUC_MAX_LOG_BUFFER; type++) { + seq_printf(m, "\t%s:\tflush count %10u, overflow count %10u\n", + stringify_guc_log_type(type), + log->stats[type].flush, + log->stats[type].sampled_overflow); + } } static void i915_guc_client_info(struct seq_file *m, @@ -2379,14 +2396,19 @@ static int i915_guc_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(m->private); const struct intel_guc *guc = &dev_priv->guc; - if (!USES_GUC_SUBMISSION(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; + i915_guc_log_info(m, dev_priv); + + if (!USES_GUC_SUBMISSION(dev_priv)) + return 0; + GEM_BUG_ON(!guc->execbuf_client); - seq_printf(m, "Doorbell map:\n"); + seq_printf(m, "\nDoorbell map:\n"); seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap); - seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline); + seq_printf(m, "Doorbell next cacheline: 0x%x\n", guc->db_cacheline); seq_printf(m, "\nGuC execbuf client @ %p:\n", guc->execbuf_client); i915_guc_client_info(m, dev_priv, guc->execbuf_client); @@ -2396,8 +2418,6 @@ static int i915_guc_info(struct seq_file *m, void *data) i915_guc_client_info(m, dev_priv, guc->preempt_client); } - i915_guc_log_info(m, dev_priv); - /* Add more as required ... */ return 0; @@ -2496,35 +2516,73 @@ static int i915_guc_log_dump(struct seq_file *m, void *data) return 0; } -static int i915_guc_log_control_get(void *data, u64 *val) +static int i915_guc_log_level_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; - if (!HAS_GUC(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; - if (!dev_priv->guc.log.vma) - return -EINVAL; - - *val = i915_modparams.guc_log_level; + *val = intel_guc_log_level_get(&dev_priv->guc.log); return 0; } -static int i915_guc_log_control_set(void *data, u64 val) +static int i915_guc_log_level_set(void *data, u64 val) { struct drm_i915_private *dev_priv = data; - if (!HAS_GUC(dev_priv)) + if (!USES_GUC(dev_priv)) return -ENODEV; - return intel_guc_log_control(&dev_priv->guc, val); + return intel_guc_log_level_set(&dev_priv->guc.log, val); } -DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_control_fops, - i915_guc_log_control_get, i915_guc_log_control_set, +DEFINE_SIMPLE_ATTRIBUTE(i915_guc_log_level_fops, + i915_guc_log_level_get, i915_guc_log_level_set, "%lld\n"); +static int i915_guc_log_relay_open(struct inode *inode, struct file *file) +{ + struct drm_i915_private *dev_priv = inode->i_private; + + if (!USES_GUC(dev_priv)) + return -ENODEV; + + file->private_data = &dev_priv->guc.log; + + return intel_guc_log_relay_open(&dev_priv->guc.log); +} + +static ssize_t +i915_guc_log_relay_write(struct file *filp, + const char __user *ubuf, + size_t cnt, + loff_t *ppos) +{ + struct intel_guc_log *log = filp->private_data; + + intel_guc_log_relay_flush(log); + + return cnt; +} + +static int i915_guc_log_relay_release(struct inode *inode, struct file *file) +{ + struct drm_i915_private *dev_priv = inode->i_private; + + intel_guc_log_relay_close(&dev_priv->guc.log); + + return 0; +} + +static const struct file_operations i915_guc_log_relay_fops = { + .owner = THIS_MODULE, + .open = i915_guc_log_relay_open, + .write = i915_guc_log_relay_write, + .release = i915_guc_log_relay_release, +}; + static const char *psr2_live_status(u32 val) { static const char * const live_status[] = { @@ -2548,6 +2606,26 @@ static const char *psr2_live_status(u32 val) return "unknown"; } +static const char *psr_sink_status(u8 val) +{ + static const char * const sink_status[] = { + "inactive", + "transition to active, capture and display", + "active, display from RFB", + "active, capture and display on sink device timings", + "transition to inactive, capture and display, timing re-sync", + "reserved", + "reserved", + "sink internal error" + }; + + val &= DP_PSR_SINK_STATE_MASK; + if (val < ARRAY_SIZE(sink_status)) + return sink_status[val]; + + return "unknown"; +} + static int i915_edp_psr_status(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -2569,14 +2647,13 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) mutex_lock(&dev_priv->psr.lock); seq_printf(m, "Enabled: %s\n", yesno((bool)dev_priv->psr.enabled)); - seq_printf(m, "Active: %s\n", yesno(dev_priv->psr.active)); seq_printf(m, "Busy frontbuffer bits: 0x%03x\n", dev_priv->psr.busy_frontbuffer_bits); seq_printf(m, "Re-enable work scheduled: %s\n", yesno(work_busy(&dev_priv->psr.work.work))); if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) enabled = I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE; else enabled = I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE; @@ -2624,18 +2701,67 @@ static int i915_edp_psr_status(struct seq_file *m, void *data) seq_printf(m, "Performance_Counter: %u\n", psrperf); } - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { u32 psr2 = I915_READ(EDP_PSR2_STATUS); seq_printf(m, "EDP_PSR2_STATUS: %x [%s]\n", psr2, psr2_live_status(psr2)); } + + if (dev_priv->psr.enabled) { + struct drm_dp_aux *aux = &dev_priv->psr.enabled->aux; + u8 val; + + if (drm_dp_dpcd_readb(aux, DP_PSR_STATUS, &val) == 1) + seq_printf(m, "Sink PSR status: 0x%x [%s]\n", val, + psr_sink_status(val)); + } mutex_unlock(&dev_priv->psr.lock); + if (READ_ONCE(dev_priv->psr.debug)) { + seq_printf(m, "Last attempted entry at: %lld\n", + dev_priv->psr.last_entry_attempt); + seq_printf(m, "Last exit at: %lld\n", + dev_priv->psr.last_exit); + } + intel_runtime_pm_put(dev_priv); return 0; } +static int +i915_edp_psr_debug_set(void *data, u64 val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + DRM_DEBUG_KMS("PSR debug %s\n", enableddisabled(val)); + + intel_runtime_pm_get(dev_priv); + intel_psr_irq_control(dev_priv, !!val); + intel_runtime_pm_put(dev_priv); + + return 0; +} + +static int +i915_edp_psr_debug_get(void *data, u64 *val) +{ + struct drm_i915_private *dev_priv = data; + + if (!CAN_PSR(dev_priv)) + return -ENODEV; + + *val = READ_ONCE(dev_priv->psr.debug); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_edp_psr_debug_fops, + i915_edp_psr_debug_get, i915_edp_psr_debug_set, + "%llu\n"); + static int i915_sink_crc(struct seq_file *m, void *data) { struct drm_i915_private *dev_priv = node_to_i915(m->private); @@ -3231,7 +3357,8 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) for (i = 0; i < dev_priv->num_shared_dpll; i++) { struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; - seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->name, pll->id); + seq_printf(m, "DPLL%i: %s, id: %i\n", i, pll->info->name, + pll->info->id); seq_printf(m, " crtc_mask: 0x%08x, active: 0x%x, on: %s\n", pll->state.crtc_mask, pll->active_mask, yesno(pll->on)); seq_printf(m, " tracked hardware state:\n"); @@ -3241,6 +3368,28 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) seq_printf(m, " fp0: 0x%08x\n", pll->state.hw_state.fp0); seq_printf(m, " fp1: 0x%08x\n", pll->state.hw_state.fp1); seq_printf(m, " wrpll: 0x%08x\n", pll->state.hw_state.wrpll); + seq_printf(m, " cfgcr0: 0x%08x\n", pll->state.hw_state.cfgcr0); + seq_printf(m, " cfgcr1: 0x%08x\n", pll->state.hw_state.cfgcr1); + seq_printf(m, " mg_refclkin_ctl: 0x%08x\n", + pll->state.hw_state.mg_refclkin_ctl); + seq_printf(m, " mg_clktop2_coreclkctl1: 0x%08x\n", + pll->state.hw_state.mg_clktop2_coreclkctl1); + seq_printf(m, " mg_clktop2_hsclkctl: 0x%08x\n", + pll->state.hw_state.mg_clktop2_hsclkctl); + seq_printf(m, " mg_pll_div0: 0x%08x\n", + pll->state.hw_state.mg_pll_div0); + seq_printf(m, " mg_pll_div1: 0x%08x\n", + pll->state.hw_state.mg_pll_div1); + seq_printf(m, " mg_pll_lf: 0x%08x\n", + pll->state.hw_state.mg_pll_lf); + seq_printf(m, " mg_pll_frac_lock: 0x%08x\n", + pll->state.hw_state.mg_pll_frac_lock); + seq_printf(m, " mg_pll_ssc: 0x%08x\n", + pll->state.hw_state.mg_pll_ssc); + seq_printf(m, " mg_pll_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_bias); + seq_printf(m, " mg_pll_tdc_coldst_bias: 0x%08x\n", + pll->state.hw_state.mg_pll_tdc_coldst_bias); } drm_modeset_unlock_all(dev); @@ -3249,24 +3398,13 @@ static int i915_shared_dplls_info(struct seq_file *m, void *unused) static int i915_wa_registers(struct seq_file *m, void *unused) { - int i; - int ret; - struct intel_engine_cs *engine; struct drm_i915_private *dev_priv = node_to_i915(m->private); - struct drm_device *dev = &dev_priv->drm; struct i915_workarounds *workarounds = &dev_priv->workarounds; - enum intel_engine_id id; - - ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; + int i; intel_runtime_pm_get(dev_priv); seq_printf(m, "Workarounds applied: %d\n", workarounds->count); - for_each_engine(engine, dev_priv, id) - seq_printf(m, "HW whitelist count for %s: %d\n", - engine->name, workarounds->hw_whitelist_count[id]); for (i = 0; i < workarounds->count; ++i) { i915_reg_t addr; u32 mask, value, read; @@ -3282,7 +3420,6 @@ static int i915_wa_registers(struct seq_file *m, void *unused) } intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev->struct_mutex); return 0; } @@ -3567,7 +3704,8 @@ static ssize_t i915_displayport_test_active_write(struct file *file, static int i915_displayport_test_active_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3601,10 +3739,8 @@ static int i915_displayport_test_active_show(struct seq_file *m, void *data) static int i915_displayport_test_active_open(struct inode *inode, struct file *file) { - struct drm_i915_private *dev_priv = inode->i_private; - return single_open(file, i915_displayport_test_active_show, - &dev_priv->drm); + inode->i_private); } static const struct file_operations i915_displayport_test_active_fops = { @@ -3618,7 +3754,8 @@ static const struct file_operations i915_displayport_test_active_fops = { static int i915_displayport_test_data_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3657,26 +3794,12 @@ static int i915_displayport_test_data_show(struct seq_file *m, void *data) return 0; } -static int i915_displayport_test_data_open(struct inode *inode, - struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - - return single_open(file, i915_displayport_test_data_show, - &dev_priv->drm); -} - -static const struct file_operations i915_displayport_test_data_fops = { - .owner = THIS_MODULE, - .open = i915_displayport_test_data_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; +DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_data); static int i915_displayport_test_type_show(struct seq_file *m, void *data) { - struct drm_device *dev = m->private; + struct drm_i915_private *dev_priv = m->private; + struct drm_device *dev = &dev_priv->drm; struct drm_connector *connector; struct drm_connector_list_iter conn_iter; struct intel_dp *intel_dp; @@ -3703,23 +3826,7 @@ static int i915_displayport_test_type_show(struct seq_file *m, void *data) return 0; } - -static int i915_displayport_test_type_open(struct inode *inode, - struct file *file) -{ - struct drm_i915_private *dev_priv = inode->i_private; - - return single_open(file, i915_displayport_test_type_show, - &dev_priv->drm); -} - -static const struct file_operations i915_displayport_test_type_fops = { - .owner = THIS_MODULE, - .open = i915_displayport_test_type_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release -}; +DEFINE_SHOW_ATTRIBUTE(i915_displayport_test_type); static void wm_latency_show(struct seq_file *m, const uint16_t wm[8]) { @@ -3987,8 +4094,8 @@ i915_wedged_set(void *data, u64 val) engine->hangcheck.stalled = true; } - i915_handle_error(i915, val, "Manually set wedged engine mask = %llx", - val); + i915_handle_error(i915, val, I915_ERROR_CAPTURE, + "Manually set wedged engine mask = %llx", val); wait_on_bit(&i915->gpu_error.flags, I915_RESET_HANDOFF, @@ -4152,119 +4259,6 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops, "0x%08llx\n"); static int -i915_max_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.max_freq_softlimit); - return 0; -} - -static int -i915_max_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting max freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go above the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || val > hw_max || val < rps->min_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->max_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_max_freq_fops, - i915_max_freq_get, i915_max_freq_set, - "%llu\n"); - -static int -i915_min_freq_get(void *data, u64 *val) -{ - struct drm_i915_private *dev_priv = data; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - *val = intel_gpu_freq(dev_priv, dev_priv->gt_pm.rps.min_freq_softlimit); - return 0; -} - -static int -i915_min_freq_set(void *data, u64 val) -{ - struct drm_i915_private *dev_priv = data; - struct intel_rps *rps = &dev_priv->gt_pm.rps; - u32 hw_max, hw_min; - int ret; - - if (INTEL_GEN(dev_priv) < 6) - return -ENODEV; - - DRM_DEBUG_DRIVER("Manually setting min freq to %llu\n", val); - - ret = mutex_lock_interruptible(&dev_priv->pcu_lock); - if (ret) - return ret; - - /* - * Turbo will still be enabled, but won't go below the set value. - */ - val = intel_freq_opcode(dev_priv, val); - - hw_max = rps->max_freq; - hw_min = rps->min_freq; - - if (val < hw_min || - val > hw_max || val > rps->max_freq_softlimit) { - mutex_unlock(&dev_priv->pcu_lock); - return -EINVAL; - } - - rps->min_freq_softlimit = val; - - if (intel_set_rps(dev_priv, val)) - DRM_DEBUG_DRIVER("failed to update RPS to new softlimit\n"); - - mutex_unlock(&dev_priv->pcu_lock); - - return 0; -} - -DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, - i915_min_freq_get, i915_min_freq_set, - "%llu\n"); - -static int i915_cache_sharing_get(void *data, u64 *val) { struct drm_i915_private *dev_priv = data; @@ -4316,9 +4310,10 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_cache_sharing_fops, static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { - int ss_max = 2; +#define SS_MAX 2 + const int ss_max = SS_MAX; + u32 sig1[SS_MAX], sig2[SS_MAX]; int ss; - u32 sig1[ss_max], sig2[ss_max]; sig1[0] = I915_READ(CHV_POWER_SS0_SIG1); sig1[1] = I915_READ(CHV_POWER_SS1_SIG1); @@ -4342,15 +4337,16 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv, sseu->eu_per_subslice = max_t(unsigned int, sseu->eu_per_subslice, eu_cnt); } +#undef SS_MAX } static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { +#define SS_MAX 6 const struct intel_device_info *info = INTEL_INFO(dev_priv); + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; int s, ss; - u32 s_reg[info->sseu.max_slices]; - u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; for (s = 0; s < info->sseu.max_slices; s++) { /* @@ -4397,15 +4393,16 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv, eu_cnt); } } +#undef SS_MAX } static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, struct sseu_dev_info *sseu) { +#define SS_MAX 3 const struct intel_device_info *info = INTEL_INFO(dev_priv); + u32 s_reg[SS_MAX], eu_reg[2 * SS_MAX], eu_mask[2]; int s, ss; - u32 s_reg[info->sseu.max_slices]; - u32 eu_reg[2 * info->sseu.max_subslices], eu_mask[2]; for (s = 0; s < info->sseu.max_slices; s++) { s_reg[s] = I915_READ(GEN9_SLICE_PGCTL_ACK(s)); @@ -4452,6 +4449,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv, eu_cnt); } } +#undef SS_MAX } static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv, @@ -4703,6 +4701,67 @@ static int i915_drrs_ctl_set(void *data, u64 val) DEFINE_SIMPLE_ATTRIBUTE(i915_drrs_ctl_fops, NULL, i915_drrs_ctl_set, "%llu\n"); +static ssize_t +i915_fifo_underrun_reset_write(struct file *filp, + const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct drm_i915_private *dev_priv = filp->private_data; + struct intel_crtc *intel_crtc; + struct drm_device *dev = &dev_priv->drm; + int ret; + bool reset; + + ret = kstrtobool_from_user(ubuf, cnt, &reset); + if (ret) + return ret; + + if (!reset) + return cnt; + + for_each_intel_crtc(dev, intel_crtc) { + struct drm_crtc_commit *commit; + struct intel_crtc_state *crtc_state; + + ret = drm_modeset_lock_single_interruptible(&intel_crtc->base.mutex); + if (ret) + return ret; + + crtc_state = to_intel_crtc_state(intel_crtc->base.state); + commit = crtc_state->base.commit; + if (commit) { + ret = wait_for_completion_interruptible(&commit->hw_done); + if (!ret) + ret = wait_for_completion_interruptible(&commit->flip_done); + } + + if (!ret && crtc_state->base.active) { + DRM_DEBUG_KMS("Re-arming FIFO underruns on pipe %c\n", + pipe_name(intel_crtc->pipe)); + + intel_crtc_arm_fifo_underrun(intel_crtc, crtc_state); + } + + drm_modeset_unlock(&intel_crtc->base.mutex); + + if (ret) + return ret; + } + + ret = intel_fbc_reset_underrun(dev_priv); + if (ret) + return ret; + + return cnt; +} + +static const struct file_operations i915_fifo_underrun_reset_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = i915_fifo_underrun_reset_write, + .llseek = default_llseek, +}; + static const struct drm_info_list i915_debugfs_list[] = { {"i915_capabilities", i915_capabilities, 0}, {"i915_gem_objects", i915_gem_object_info, 0}, @@ -4760,8 +4819,6 @@ static const struct i915_debugfs_files { const struct file_operations *fops; } i915_debugfs_files[] = { {"i915_wedged", &i915_wedged_fops}, - {"i915_max_freq", &i915_max_freq_fops}, - {"i915_min_freq", &i915_min_freq_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, {"i915_ring_test_irq", &i915_ring_test_irq_fops}, @@ -4770,6 +4827,7 @@ static const struct i915_debugfs_files { {"i915_error_state", &i915_error_state_fops}, {"i915_gpu_info", &i915_gpu_info_fops}, #endif + {"i915_fifo_underrun_reset", &i915_fifo_underrun_reset_ops}, {"i915_next_seqno", &i915_next_seqno_fops}, {"i915_display_crc_ctl", &i915_display_crc_ctl_fops}, {"i915_pri_wm_latency", &i915_pri_wm_latency_fops}, @@ -4779,10 +4837,12 @@ static const struct i915_debugfs_files { {"i915_dp_test_data", &i915_displayport_test_data_fops}, {"i915_dp_test_type", &i915_displayport_test_type_fops}, {"i915_dp_test_active", &i915_displayport_test_active_fops}, - {"i915_guc_log_control", &i915_guc_log_control_fops}, + {"i915_guc_log_level", &i915_guc_log_level_fops}, + {"i915_guc_log_relay", &i915_guc_log_relay_fops}, {"i915_hpd_storm_ctl", &i915_hpd_storm_ctl_fops}, {"i915_ipc_status", &i915_ipc_status_fops}, - {"i915_drrs_ctl", &i915_drrs_ctl_fops} + {"i915_drrs_ctl", &i915_drrs_ctl_fops}, + {"i915_edp_psr_debug", &i915_edp_psr_debug_fops} }; int i915_debugfs_register(struct drm_i915_private *dev_priv) @@ -4876,19 +4936,7 @@ static int i915_dpcd_show(struct seq_file *m, void *data) return 0; } - -static int i915_dpcd_open(struct inode *inode, struct file *file) -{ - return single_open(file, i915_dpcd_show, inode->i_private); -} - -static const struct file_operations i915_dpcd_fops = { - .owner = THIS_MODULE, - .open = i915_dpcd_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(i915_dpcd); static int i915_panel_show(struct seq_file *m, void *data) { @@ -4910,19 +4958,7 @@ static int i915_panel_show(struct seq_file *m, void *data) return 0; } - -static int i915_panel_open(struct inode *inode, struct file *file) -{ - return single_open(file, i915_panel_show, inode->i_private); -} - -static const struct file_operations i915_panel_fops = { - .owner = THIS_MODULE, - .open = i915_panel_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(i915_panel); /** * i915_debugfs_connector_add - add i915 specific connector debugfs files diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 07c07d5..9c449b8 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -101,7 +101,13 @@ __i915_printk(struct drm_i915_private *dev_priv, const char *level, __builtin_return_address(0), &vaf); if (is_error && !shown_bug_once) { - dev_notice(kdev, "%s", FDO_BUG_MSG); + /* + * Ask the user to file a bug report for the error, except + * if they may have caused the bug by fiddling with unsafe + * module parameters. + */ + if (!test_taint(TAINT_USER)) + dev_notice(kdev, "%s", FDO_BUG_MSG); shown_bug_once = true; } @@ -377,9 +383,9 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, value = INTEL_INFO(dev_priv)->sseu.min_eu_in_pool; break; case I915_PARAM_HUC_STATUS: - intel_runtime_pm_get(dev_priv); - value = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; - intel_runtime_pm_put(dev_priv); + value = intel_huc_check_status(&dev_priv->huc); + if (value < 0) + return value; break; case I915_PARAM_MMAP_GTT_VERSION: /* Though we've started our numbering from 1, and so class all @@ -449,7 +455,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, static int i915_get_bridge_dev(struct drm_i915_private *dev_priv) { - dev_priv->bridge_dev = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + int domain = pci_domain_nr(dev_priv->drm.pdev->bus); + + dev_priv->bridge_dev = + pci_get_domain_bus_and_slot(domain, 0, PCI_DEVFN(0, 0)); if (!dev_priv->bridge_dev) { DRM_ERROR("bridge device not found\n"); return -1; @@ -692,11 +701,9 @@ static int i915_load_modeset_init(struct drm_device *dev) if (ret) goto cleanup_irq; - intel_uc_init_fw(dev_priv); - ret = i915_gem_init(dev_priv); if (ret) - goto cleanup_uc; + goto cleanup_irq; intel_setup_overlay(dev_priv); @@ -716,8 +723,6 @@ cleanup_gem: if (i915_gem_suspend(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); -cleanup_uc: - intel_uc_fini_fw(dev_priv); cleanup_irq: drm_irq_uninstall(dev); intel_teardown_gmbus(dev_priv); @@ -919,16 +924,21 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, mutex_init(&dev_priv->wm.wm_mutex); mutex_init(&dev_priv->pps_mutex); - intel_uc_init_early(dev_priv); i915_memcpy_init_early(dev_priv); ret = i915_workqueues_init(dev_priv); if (ret < 0) goto err_engines; + ret = i915_gem_init_early(dev_priv); + if (ret < 0) + goto err_workqueues; + /* This must be called before any calls to HAS_PCH_* */ intel_detect_pch(dev_priv); + intel_wopcm_init_early(&dev_priv->wopcm); + intel_uc_init_early(dev_priv); intel_pm_setup(dev_priv); intel_init_dpio(dev_priv); intel_power_domains_init(dev_priv); @@ -937,18 +947,13 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv, intel_init_display_hooks(dev_priv); intel_init_clock_gating_hooks(dev_priv); intel_init_audio_hooks(dev_priv); - ret = i915_gem_load_init(dev_priv); - if (ret < 0) - goto err_irq; - intel_display_crc_init(dev_priv); intel_detect_preproduction_hw(dev_priv); return 0; -err_irq: - intel_irq_fini(dev_priv); +err_workqueues: i915_workqueues_cleanup(dev_priv); err_engines: i915_engines_cleanup(dev_priv); @@ -961,8 +966,9 @@ err_engines: */ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv) { - i915_gem_load_cleanup(dev_priv); intel_irq_fini(dev_priv); + intel_uc_cleanup_early(dev_priv); + i915_gem_cleanup_early(dev_priv); i915_workqueues_cleanup(dev_priv); i915_engines_cleanup(dev_priv); } @@ -1032,6 +1038,10 @@ static int i915_driver_init_mmio(struct drm_i915_private *dev_priv) intel_uncore_init(dev_priv); + intel_device_info_init_mmio(dev_priv); + + intel_uncore_prune(dev_priv); + intel_uc_init_mmio(dev_priv); ret = intel_engines_init_mmio(dev_priv); @@ -1074,8 +1084,6 @@ static void intel_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.enable_ppgtt); DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915_modparams.enable_ppgtt); - intel_uc_sanitize_options(dev_priv); - intel_gvt_sanitize_options(dev_priv); } @@ -1102,30 +1110,32 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = i915_ggtt_probe_hw(dev_priv); if (ret) - return ret; + goto err_perf; - /* WARNING: Apparently we must kick fbdev drivers before vgacon, - * otherwise the vga fbdev driver falls over. */ + /* + * WARNING: Apparently we must kick fbdev drivers before vgacon, + * otherwise the vga fbdev driver falls over. + */ ret = i915_kick_out_firmware_fb(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting framebuffer drivers\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_kick_out_vgacon(dev_priv); if (ret) { DRM_ERROR("failed to remove conflicting VGA console\n"); - goto out_ggtt; + goto err_ggtt; } ret = i915_ggtt_init_hw(dev_priv); if (ret) - return ret; + goto err_ggtt; ret = i915_ggtt_enable_hw(dev_priv); if (ret) { DRM_ERROR("failed to enable GGTT\n"); - goto out_ggtt; + goto err_ggtt; } pci_set_master(pdev); @@ -1136,7 +1146,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1154,7 +1164,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) if (ret) { DRM_ERROR("failed to set DMA mask\n"); - goto out_ggtt; + goto err_ggtt; } } @@ -1187,13 +1197,14 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) ret = intel_gvt_init(dev_priv); if (ret) - goto out_ggtt; + goto err_ggtt; return 0; -out_ggtt: +err_ggtt: i915_ggtt_cleanup_hw(dev_priv); - +err_perf: + i915_perf_fini(dev_priv); return ret; } @@ -1238,7 +1249,6 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) /* Reveal our presence to userspace */ if (drm_dev_register(dev, 0) == 0) { i915_debugfs_register(dev_priv); - i915_guc_log_register(dev_priv); i915_setup_sysfs(dev_priv); /* Depends on sysfs having been initialized */ @@ -1298,7 +1308,6 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); - i915_guc_log_unregister(dev_priv); drm_dev_unregister(&dev_priv->drm); i915_gem_shrinker_unregister(dev_priv); @@ -1457,7 +1466,6 @@ void i915_driver_unload(struct drm_device *dev) i915_reset_error_state(dev_priv); i915_gem_fini(dev_priv); - intel_uc_fini_fw(dev_priv); intel_fbc_cleanup_cfb(dev_priv); intel_power_domains_fini(dev_priv); @@ -1870,7 +1878,8 @@ static int i915_resume_switcheroo(struct drm_device *dev) /** * i915_reset - reset chip after a hang * @i915: #drm_i915_private to reset - * @flags: Instructions + * @stalled_mask: mask of the stalled engines with the guilty requests + * @reason: user error message for why we are resetting * * Reset the chip. Useful if a hang is detected. Marks the device as wedged * on failure. @@ -1885,12 +1894,16 @@ static int i915_resume_switcheroo(struct drm_device *dev) * - re-init interrupt state * - re-init display */ -void i915_reset(struct drm_i915_private *i915, unsigned int flags) +void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason) { struct i915_gpu_error *error = &i915->gpu_error; int ret; int i; + GEM_TRACE("flags=%lx\n", error->flags); + might_sleep(); lockdep_assert_held(&i915->drm.struct_mutex); GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, &error->flags)); @@ -1902,8 +1915,8 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) if (!i915_gem_unset_wedged(i915)) goto wakeup; - if (!(flags & I915_RESET_QUIET)) - dev_notice(i915->drm.dev, "Resetting chip after gpu hang\n"); + if (reason) + dev_notice(i915->drm.dev, "Resetting chip for %s\n", reason); error->reset_count++; disable_irq(i915->drm.irq); @@ -1946,7 +1959,7 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags) goto error; } - i915_gem_reset(i915); + i915_gem_reset(i915, stalled_mask); intel_overlay_reset(i915); /* @@ -1992,7 +2005,6 @@ taint: error: i915_gem_set_wedged(i915); i915_retire_requests(i915); - intel_gpu_reset(i915, ALL_ENGINES); goto finish; } @@ -2005,7 +2017,7 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, /** * i915_reset_engine - reset GPU engine to recover from a hang * @engine: engine to reset - * @flags: options + * @msg: reason for GPU reset; or NULL for no dev_notice() * * Reset a specific GPU engine. Useful if a hang is detected. * Returns zero on successful reset or otherwise an error code. @@ -2015,12 +2027,13 @@ static inline int intel_gt_reset_engine(struct drm_i915_private *dev_priv, * - reset engine (which will force the engine to idle) * - re-init/configure engine */ -int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) +int i915_reset_engine(struct intel_engine_cs *engine, const char *msg) { struct i915_gpu_error *error = &engine->i915->gpu_error; struct i915_request *active_request; int ret; + GEM_TRACE("%s flags=%lx\n", engine->name, error->flags); GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags)); active_request = i915_gem_reset_prepare_engine(engine); @@ -2030,10 +2043,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) goto out; } - if (!(flags & I915_RESET_QUIET)) { + if (msg) dev_notice(engine->i915->drm.dev, - "Resetting %s after gpu hang\n", engine->name); - } + "Resetting %s for %s\n", engine->name, msg); error->reset_engine_count[engine->id]++; if (!engine->i915->guc.execbuf_client) @@ -2053,7 +2065,7 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags) * active request and can drop it, adjust head to skip the offending * request to resume executing remaining requests in the queue. */ - i915_gem_reset_engine(engine, active_request); + i915_gem_reset_engine(engine, active_request, true); /* * The engine and its registers (and workarounds in case of render) @@ -2462,10 +2474,13 @@ static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv, /* * RC6 transitioning can be delayed up to 2 msec (see * valleyview_enable_rps), use 3 msec for safety. + * + * This can fail to turn off the rc6 if the GPU is stuck after a failed + * reset and we are trying to force the machine to sleep. */ if (vlv_wait_for_pw_status(dev_priv, mask, val)) - DRM_ERROR("timeout waiting for GT wells to go %s\n", - onoff(wait_for_on)); + DRM_DEBUG_DRIVER("timeout waiting for GT wells to go %s\n", + onoff(wait_for_on)); } static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv) @@ -2816,10 +2831,10 @@ static const struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0), DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW), - DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER), + DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER), DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce18b6c..34c125e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -64,6 +64,7 @@ #include "intel_opregion.h" #include "intel_ringbuffer.h" #include "intel_uncore.h" +#include "intel_wopcm.h" #include "intel_uc.h" #include "i915_gem.h" @@ -71,9 +72,10 @@ #include "i915_gem_fence_reg.h" #include "i915_gem_object.h" #include "i915_gem_gtt.h" -#include "i915_gem_timeline.h" - +#include "i915_gpu_error.h" #include "i915_request.h" +#include "i915_scheduler.h" +#include "i915_timeline.h" #include "i915_vma.h" #include "intel_gvt.h" @@ -83,8 +85,8 @@ #define DRIVER_NAME "i915" #define DRIVER_DESC "Intel Graphics" -#define DRIVER_DATE "20180308" -#define DRIVER_TIMESTAMP 1520513379 +#define DRIVER_DATE "20180514" +#define DRIVER_TIMESTAMP 1526300884 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and * WARN_ON()) for hw state sanity checks to check for unexpected conditions @@ -261,6 +263,7 @@ enum hpd_pin { HPD_PORT_C, HPD_PORT_D, HPD_PORT_E, + HPD_PORT_F, HPD_NUM_PINS }; @@ -453,172 +456,6 @@ struct intel_csr { uint32_t allowed_dc_mask; }; -struct intel_display_error_state; - -struct i915_gpu_state { - struct kref ref; - ktime_t time; - ktime_t boottime; - ktime_t uptime; - - struct drm_i915_private *i915; - - char error_msg[128]; - bool simulated; - bool awake; - bool wakelock; - bool suspended; - int iommu; - u32 reset_count; - u32 suspend_count; - struct intel_device_info device_info; - struct intel_driver_caps driver_caps; - struct i915_params params; - - struct i915_error_uc { - struct intel_uc_fw guc_fw; - struct intel_uc_fw huc_fw; - struct drm_i915_error_object *guc_log; - } uc; - - /* Generic register state */ - u32 eir; - u32 pgtbl_er; - u32 ier; - u32 gtier[4], ngtier; - u32 ccid; - u32 derrmr; - u32 forcewake; - u32 error; /* gen6+ */ - u32 err_int; /* gen7 */ - u32 fault_data0; /* gen8, gen9 */ - u32 fault_data1; /* gen8, gen9 */ - u32 done_reg; - u32 gac_eco; - u32 gam_ecochk; - u32 gab_ctl; - u32 gfx_mode; - - u32 nfence; - u64 fence[I915_MAX_NUM_FENCES]; - struct intel_overlay_error_state *overlay; - struct intel_display_error_state *display; - - struct drm_i915_error_engine { - int engine_id; - /* Software tracked state */ - bool idle; - bool waiting; - int num_waiters; - unsigned long hangcheck_timestamp; - bool hangcheck_stalled; - enum intel_engine_hangcheck_action hangcheck_action; - struct i915_address_space *vm; - int num_requests; - u32 reset_count; - - /* position of active request inside the ring */ - u32 rq_head, rq_post, rq_tail; - - /* our own tracking of ring head and tail */ - u32 cpu_ring_head; - u32 cpu_ring_tail; - - u32 last_seqno; - - /* Register state */ - u32 start; - u32 tail; - u32 head; - u32 ctl; - u32 mode; - u32 hws; - u32 ipeir; - u32 ipehr; - u32 bbstate; - u32 instpm; - u32 instps; - u32 seqno; - u64 bbaddr; - u64 acthd; - u32 fault_reg; - u64 faddr; - u32 rc_psmi; /* sleep state */ - u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; - struct intel_instdone instdone; - - struct drm_i915_error_context { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 handle; - u32 hw_id; - int priority; - int ban_score; - int active; - int guilty; - bool bannable; - } context; - - struct drm_i915_error_object { - u64 gtt_offset; - u64 gtt_size; - int page_count; - int unused; - u32 *pages[0]; - } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; - - struct drm_i915_error_object **user_bo; - long user_bo_count; - - struct drm_i915_error_object *wa_ctx; - struct drm_i915_error_object *default_state; - - struct drm_i915_error_request { - long jiffies; - pid_t pid; - u32 context; - int priority; - int ban_score; - u32 seqno; - u32 head; - u32 tail; - } *requests, execlist[EXECLIST_MAX_PORTS]; - unsigned int num_ports; - - struct drm_i915_error_waiter { - char comm[TASK_COMM_LEN]; - pid_t pid; - u32 seqno; - } *waiters; - - struct { - u32 gfx_mode; - union { - u64 pdp[4]; - u32 pp_dir_base; - }; - } vm_info; - } engine[I915_NUM_ENGINES]; - - struct drm_i915_error_buffer { - u32 size; - u32 name; - u32 rseqno[I915_NUM_ENGINES], wseqno; - u64 gtt_offset; - u32 read_domains; - u32 write_domain; - s32 fence_reg:I915_MAX_NUM_FENCE_BITS; - u32 tiling:2; - u32 dirty:1; - u32 purgeable:1; - u32 userptr:1; - s32 engine:4; - u32 cache_level:3; - } *active_bo[I915_NUM_ENGINES], *pinned_bo; - u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; - struct i915_address_space *active_vm[I915_NUM_ENGINES]; -}; - enum i915_cache_level { I915_CACHE_NONE = 0, I915_CACHE_LLC, /* also used for snoopable memory on non-LLC */ @@ -766,12 +603,16 @@ struct i915_psr { bool active; struct delayed_work work; unsigned busy_frontbuffer_bits; - bool psr2_support; - bool aux_frame_sync; + bool sink_psr2_support; bool link_standby; - bool y_cord_support; bool colorimetry_support; bool alpm; + bool has_hw_tracking; + bool psr2_enabled; + u8 sink_sync_latency; + bool debug; + ktime_t last_entry_attempt; + ktime_t last_exit; void (*enable_source)(struct intel_dp *, const struct intel_crtc_state *); @@ -1146,16 +987,6 @@ struct i915_gem_mm { u32 object_count; }; -struct drm_i915_error_state_buf { - struct drm_i915_private *i915; - unsigned bytes; - unsigned size; - int err; - u8 *buf; - loff_t start; - loff_t pos; -}; - #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */ @@ -1164,102 +995,6 @@ struct drm_i915_error_state_buf { #define I915_ENGINE_DEAD_TIMEOUT (4 * HZ) /* Seqno, head and subunits dead */ #define I915_SEQNO_DEAD_TIMEOUT (12 * HZ) /* Seqno dead with active head */ -struct i915_gpu_error { - /* For hangcheck timer */ -#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ -#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) - - struct delayed_work hangcheck_work; - - /* For reset and error_state handling. */ - spinlock_t lock; - /* Protected by the above dev->gpu_error.lock. */ - struct i915_gpu_state *first_error; - - atomic_t pending_fb_pin; - - unsigned long missed_irq_rings; - - /** - * State variable controlling the reset flow and count - * - * This is a counter which gets incremented when reset is triggered, - * - * Before the reset commences, the I915_RESET_BACKOFF bit is set - * meaning that any waiters holding onto the struct_mutex should - * relinquish the lock immediately in order for the reset to start. - * - * If reset is not completed succesfully, the I915_WEDGE bit is - * set meaning that hardware is terminally sour and there is no - * recovery. All waiters on the reset_queue will be woken when - * that happens. - * - * This counter is used by the wait_seqno code to notice that reset - * event happened and it needs to restart the entire ioctl (since most - * likely the seqno it waited for won't ever signal anytime soon). - * - * This is important for lock-free wait paths, where no contended lock - * naturally enforces the correct ordering between the bail-out of the - * waiter and the gpu reset work code. - */ - unsigned long reset_count; - - /** - * flags: Control various stages of the GPU reset - * - * #I915_RESET_BACKOFF - When we start a reset, we want to stop any - * other users acquiring the struct_mutex. To do this we set the - * #I915_RESET_BACKOFF bit in the error flags when we detect a reset - * and then check for that bit before acquiring the struct_mutex (in - * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a - * secondary role in preventing two concurrent global reset attempts. - * - * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the - * struct_mutex. We try to acquire the struct_mutex in the reset worker, - * but it may be held by some long running waiter (that we cannot - * interrupt without causing trouble). Once we are ready to do the GPU - * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If - * they already hold the struct_mutex and want to participate they can - * inspect the bit and do the reset directly, otherwise the worker - * waits for the struct_mutex. - * - * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to - * acquire the struct_mutex to reset an engine, we need an explicit - * flag to prevent two concurrent reset attempts in the same engine. - * As the number of engines continues to grow, allocate the flags from - * the most significant bits. - * - * #I915_WEDGED - If reset fails and we can no longer use the GPU, - * we set the #I915_WEDGED bit. Prior to command submission, e.g. - * i915_request_alloc(), this bit is checked and the sequence - * aborted (with -EIO reported to userspace) if set. - */ - unsigned long flags; -#define I915_RESET_BACKOFF 0 -#define I915_RESET_HANDOFF 1 -#define I915_RESET_MODESET 2 -#define I915_WEDGED (BITS_PER_LONG - 1) -#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) - - /** Number of times an engine has been reset */ - u32 reset_engine_count[I915_NUM_ENGINES]; - - /** - * Waitqueue to signal when a hang is detected. Used to for waiters - * to release the struct_mutex for the reset to procede. - */ - wait_queue_head_t wait_queue; - - /** - * Waitqueue to signal when the reset has completed. Used by clients - * that wait for dev_priv->mm.wedged to settle. - */ - wait_queue_head_t reset_queue; - - /* For missed irq/seqno simulation. */ - unsigned long test_irq_rings; -}; - enum modeset_restore { MODESET_ON_LID_OPEN, MODESET_DONE, @@ -1338,6 +1073,7 @@ struct intel_vbt_data { } edp; struct { + bool enable; bool full_link; bool require_aux_wakeup; int idle_frames; @@ -1451,11 +1187,13 @@ static inline bool skl_ddb_entry_equal(const struct skl_ddb_entry *e1, } struct skl_ddb_allocation { - struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; /* packed/uv */ - struct skl_ddb_entry y_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + /* packed/y */ + struct skl_ddb_entry plane[I915_MAX_PIPES][I915_MAX_PLANES]; + struct skl_ddb_entry uv_plane[I915_MAX_PIPES][I915_MAX_PLANES]; + u8 enabled_slices; /* GEN11 has configurable 2 slices */ }; -struct skl_wm_values { +struct skl_ddb_values { unsigned dirty_pipes; struct skl_ddb_allocation ddb; }; @@ -1470,6 +1208,7 @@ struct skl_wm_level { struct skl_wm_params { bool x_tiled, y_tiled; bool rc_surface; + bool is_planar; uint32_t width; uint8_t cpp; uint32_t plane_pixel_rate; @@ -1564,7 +1303,6 @@ struct i915_wa_reg { struct i915_workarounds { struct i915_wa_reg reg[I915_MAX_WA_REGS]; u32 count; - u32 hw_whitelist_count[I915_NUM_ENGINES]; }; struct i915_virtual_gpu { @@ -1860,6 +1598,8 @@ struct drm_i915_private { struct intel_gvt *gvt; + struct intel_wopcm wopcm; + struct intel_huc huc; struct intel_guc guc; @@ -2152,7 +1892,7 @@ struct drm_i915_private { /* current hardware state */ union { struct ilk_wm_values hw; - struct skl_wm_values skl_hw; + struct skl_ddb_values skl_hw; struct vlv_wm_values vlv; struct g4x_wm_values g4x; }; @@ -2321,8 +2061,11 @@ struct drm_i915_private { void (*cleanup_engine)(struct intel_engine_cs *engine); struct list_head timelines; - struct i915_gem_timeline global_timeline; + + struct list_head active_rings; + struct list_head closed_vma; u32 active_requests; + u32 request_serial; /** * Is the GPU currently considered idle, or busy executing @@ -2392,6 +2135,11 @@ static inline struct drm_i915_private *kdev_to_i915(struct device *kdev) return to_i915(dev_get_drvdata(kdev)); } +static inline struct drm_i915_private *wopcm_to_i915(struct intel_wopcm *wopcm) +{ + return container_of(wopcm, struct drm_i915_private, wopcm); +} + static inline struct drm_i915_private *guc_to_i915(struct intel_guc *guc) { return container_of(guc, struct drm_i915_private, guc); @@ -2411,8 +2159,10 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc) /* Iterator over subset of engines selected by mask */ #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \ - for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask; \ - tmp__ ? (engine__ = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : 0; ) + for ((tmp__) = (mask__) & INTEL_INFO(dev_priv__)->ring_mask; \ + (tmp__) ? \ + ((engine__) = (dev_priv__)->engine[__mask_next_bit(tmp__)]), 1 : \ + 0;) enum hdmi_force_audio { HDMI_AUDIO_OFF_DVI = -2, /* no aux data for HDMI-DVI converter */ @@ -2720,6 +2470,15 @@ intel_info(const struct drm_i915_private *dev_priv) #define IS_CNL_REVID(p, since, until) \ (IS_CANNONLAKE(p) && IS_REVID(p, since, until)) +#define ICL_REVID_A0 0x0 +#define ICL_REVID_A2 0x1 +#define ICL_REVID_B0 0x3 +#define ICL_REVID_B2 0x4 +#define ICL_REVID_C0 0x5 + +#define IS_ICL_REVID(p, since, until) \ + (IS_ICELAKE(p) && IS_REVID(p, since, until)) + /* * The genX designation typically refers to the render engine, so render * capability related checks should use IS_GEN, while display and other checks @@ -2963,10 +2722,11 @@ extern void i915_driver_unload(struct drm_device *dev); extern int intel_gpu_reset(struct drm_i915_private *dev_priv, u32 engine_mask); extern bool intel_has_gpu_reset(struct drm_i915_private *dev_priv); -#define I915_RESET_QUIET BIT(0) -extern void i915_reset(struct drm_i915_private *i915, unsigned int flags); +extern void i915_reset(struct drm_i915_private *i915, + unsigned int stalled_mask, + const char *reason); extern int i915_reset_engine(struct intel_engine_cs *engine, - unsigned int flags); + const char *reason); extern bool intel_has_reset_engine(struct drm_i915_private *dev_priv); extern int intel_reset_guc(struct drm_i915_private *dev_priv); @@ -3014,10 +2774,12 @@ static inline void i915_queue_hangcheck(struct drm_i915_private *dev_priv) &dev_priv->gpu_error.hangcheck_work, delay); } -__printf(3, 4) +__printf(4, 5) void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...); +#define I915_ERROR_CAPTURE BIT(0) extern void intel_irq_init(struct drm_i915_private *dev_priv); extern void intel_irq_fini(struct drm_i915_private *dev_priv); @@ -3132,8 +2894,8 @@ int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, int i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); void i915_gem_sanitize(struct drm_i915_private *i915); -int i915_gem_load_init(struct drm_i915_private *dev_priv); -void i915_gem_load_cleanup(struct drm_i915_private *dev_priv); +int i915_gem_init_early(struct drm_i915_private *dev_priv); +void i915_gem_cleanup_early(struct drm_i915_private *dev_priv); void i915_gem_load_init_fences(struct drm_i915_private *dev_priv); int i915_gem_freeze(struct drm_i915_private *dev_priv); int i915_gem_freeze_late(struct drm_i915_private *dev_priv); @@ -3388,13 +3150,15 @@ static inline u32 i915_reset_engine_count(struct i915_gpu_error *error, struct i915_request * i915_gem_reset_prepare_engine(struct intel_engine_cs *engine); int i915_gem_reset_prepare(struct drm_i915_private *dev_priv); -void i915_gem_reset(struct drm_i915_private *dev_priv); +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask); void i915_gem_reset_finish_engine(struct intel_engine_cs *engine); void i915_gem_reset_finish(struct drm_i915_private *dev_priv); void i915_gem_set_wedged(struct drm_i915_private *dev_priv); bool i915_gem_unset_wedged(struct drm_i915_private *dev_priv); void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request); + struct i915_request *request, + bool stalled); void i915_gem_init_mmio(struct drm_i915_private *i915); int __must_check i915_gem_init(struct drm_i915_private *dev_priv); @@ -3412,7 +3176,7 @@ int i915_gem_object_wait(struct drm_i915_gem_object *obj, struct intel_rps_client *rps); int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int priority); + const struct i915_sched_attr *attr); #define I915_PRIORITY_DISPLAY I915_PRIORITY_MAX int __must_check @@ -3481,16 +3245,6 @@ i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id) return ctx; } -static inline struct intel_timeline * -i915_gem_context_lookup_timeline(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) -{ - struct i915_address_space *vm; - - vm = ctx->ppgtt ? &ctx->ppgtt->base : &ctx->i915->ggtt.base; - return &vm->timeline.engine[engine->id]; -} - int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, @@ -3589,64 +3343,6 @@ static inline int i915_debugfs_connector_add(struct drm_connector *connector) static inline void intel_display_crc_init(struct drm_i915_private *dev_priv) {} #endif -/* i915_gpu_error.c */ -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) - -__printf(2, 3) -void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_gpu_state *gpu); -int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, - struct drm_i915_private *i915, - size_t count, loff_t pos); -static inline void i915_error_state_buf_release( - struct drm_i915_error_state_buf *eb) -{ - kfree(eb->buf); -} - -struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); -void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg); - -static inline struct i915_gpu_state * -i915_gpu_state_get(struct i915_gpu_state *gpu) -{ - kref_get(&gpu->ref); - return gpu; -} - -void __i915_gpu_state_free(struct kref *kref); -static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) -{ - if (gpu) - kref_put(&gpu->ref, __i915_gpu_state_free); -} - -struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); -void i915_reset_error_state(struct drm_i915_private *i915); - -#else - -static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, - u32 engine_mask, - const char *error_msg) -{ -} - -static inline struct i915_gpu_state * -i915_first_error_state(struct drm_i915_private *i915) -{ - return NULL; -} - -static inline void i915_reset_error_state(struct drm_i915_private *i915) -{ -} - -#endif - const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 7b5a9d7..0a20701 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -35,6 +35,7 @@ #include "intel_drv.h" #include "intel_frontbuffer.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #include "i915_gemfs.h" #include <linux/dma-fence-array.h> #include <linux/kthread.h> @@ -136,6 +137,102 @@ int i915_mutex_lock_interruptible(struct drm_device *dev) return 0; } +static u32 __i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + GEM_BUG_ON(!list_empty(&i915->gt.active_rings)); + + if (!i915->gt.awake) + return I915_EPOCH_INVALID; + + GEM_BUG_ON(i915->gt.epoch == I915_EPOCH_INVALID); + + /* + * Be paranoid and flush a concurrent interrupt to make sure + * we don't reactivate any irq tasklets after parking. + * + * FIXME: Note that even though we have waited for execlists to be idle, + * there may still be an in-flight interrupt even though the CSB + * is now empty. synchronize_irq() makes sure that a residual interrupt + * is completed before we continue, but it doesn't prevent the HW from + * raising a spurious interrupt later. To complete the shield we should + * coordinate disabling the CS irq with flushing the interrupts. + */ + synchronize_irq(i915->drm.irq); + + intel_engines_park(i915); + i915_timelines_park(i915); + + i915_pmu_gt_parked(i915); + i915_vma_parked(i915); + + i915->gt.awake = false; + + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + + intel_runtime_pm_put(i915); + + return i915->gt.epoch; +} + +void i915_gem_park(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(i915->gt.active_requests); + + if (!i915->gt.awake) + return; + + /* Defer the actual call to __i915_gem_park() to prevent ping-pongs */ + mod_delayed_work(i915->wq, &i915->gt.idle_work, msecs_to_jiffies(100)); +} + +void i915_gem_unpark(struct drm_i915_private *i915) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + GEM_BUG_ON(!i915->gt.active_requests); + + if (i915->gt.awake) + return; + + intel_runtime_pm_get_noresume(i915); + + /* + * It seems that the DMC likes to transition between the DC states a lot + * when there are no connected displays (no active power domains) during + * command submission. + * + * This activity has negative impact on the performance of the chip with + * huge latencies observed in the interrupt handler and elsewhere. + * + * Work around it by grabbing a GT IRQ power domain whilst there is any + * GT activity, preventing any DC state transitions. + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; + + intel_enable_gt_powersave(i915); + i915_update_gfx_val(i915); + if (INTEL_GEN(i915) >= 6) + gen6_rps_busy(i915); + i915_pmu_gt_unparked(i915); + + intel_engines_unpark(i915); + + i915_queue_hangcheck(i915); + + queue_delayed_work(i915->wq, + &i915->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, struct drm_file *file) @@ -469,7 +566,8 @@ i915_gem_object_wait_reservation(struct reservation_object *resv, return timeout; } -static void __fence_set_priority(struct dma_fence *fence, int prio) +static void __fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { struct i915_request *rq; struct intel_engine_cs *engine; @@ -480,13 +578,16 @@ static void __fence_set_priority(struct dma_fence *fence, int prio) rq = to_request(fence); engine = rq->engine; - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(rq, prio); + engine->schedule(rq, attr); rcu_read_unlock(); + local_bh_enable(); /* kick the tasklets if queues were reprioritised */ } -static void fence_set_priority(struct dma_fence *fence, int prio) +static void fence_set_priority(struct dma_fence *fence, + const struct i915_sched_attr *attr) { /* Recurse once into a fence-array */ if (dma_fence_is_array(fence)) { @@ -494,16 +595,16 @@ static void fence_set_priority(struct dma_fence *fence, int prio) int i; for (i = 0; i < array->num_fences; i++) - __fence_set_priority(array->fences[i], prio); + __fence_set_priority(array->fences[i], attr); } else { - __fence_set_priority(fence, prio); + __fence_set_priority(fence, attr); } } int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, unsigned int flags, - int prio) + const struct i915_sched_attr *attr) { struct dma_fence *excl; @@ -518,7 +619,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, return ret; for (i = 0; i < count; i++) { - fence_set_priority(shared[i], prio); + fence_set_priority(shared[i], attr); dma_fence_put(shared[i]); } @@ -528,7 +629,7 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object *obj, } if (excl) { - fence_set_priority(excl, prio); + fence_set_priority(excl, attr); dma_fence_put(excl); } return 0; @@ -2879,8 +2980,8 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) * extra delay for a recent interrupt is pointless. Hence, we do * not need an engine->irq_seqno_barrier() before the seqno reads. */ - spin_lock_irqsave(&engine->timeline->lock, flags); - list_for_each_entry(request, &engine->timeline->requests, link) { + spin_lock_irqsave(&engine->timeline.lock, flags); + list_for_each_entry(request, &engine->timeline.requests, link) { if (__i915_request_completed(request, request->global_seqno)) continue; @@ -2891,25 +2992,11 @@ i915_gem_find_active_request(struct intel_engine_cs *engine) active = request; break; } - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); return active; } -static bool engine_stalled(struct intel_engine_cs *engine) -{ - if (!engine->hangcheck.stalled) - return false; - - /* Check for possible seqno movement after hang declaration */ - if (engine->hangcheck.seqno != intel_engine_get_seqno(engine)) { - DRM_DEBUG_DRIVER("%s pardoned\n", engine->name); - return false; - } - - return true; -} - /* * Ensure irq handler finishes, and not run again. * Also return the active request so that we only search for it once. @@ -2998,6 +3085,7 @@ int i915_gem_reset_prepare(struct drm_i915_private *dev_priv) } i915_gem_revoke_fences(dev_priv); + intel_uc_sanitize(dev_priv); return err; } @@ -3025,15 +3113,15 @@ static void engine_skip_context(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_gem_context *hung_ctx = request->ctx; - struct intel_timeline *timeline; + struct i915_timeline *timeline = request->timeline; unsigned long flags; - timeline = i915_gem_context_lookup_timeline(hung_ctx, engine); + GEM_BUG_ON(timeline == &engine->timeline); - spin_lock_irqsave(&engine->timeline->lock, flags); - spin_lock(&timeline->lock); + spin_lock_irqsave(&engine->timeline.lock, flags); + spin_lock_nested(&timeline->lock, SINGLE_DEPTH_NESTING); - list_for_each_entry_continue(request, &engine->timeline->requests, link) + list_for_each_entry_continue(request, &engine->timeline.requests, link) if (request->ctx == hung_ctx) skip_request(request); @@ -3041,13 +3129,14 @@ static void engine_skip_context(struct i915_request *request) skip_request(request); spin_unlock(&timeline->lock); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } /* Returns the request if it was guilty of the hang */ static struct i915_request * i915_gem_reset_request(struct intel_engine_cs *engine, - struct i915_request *request) + struct i915_request *request, + bool stalled) { /* The guilty request will get skipped on a hung engine. * @@ -3070,7 +3159,15 @@ i915_gem_reset_request(struct intel_engine_cs *engine, * subsequent hangs. */ - if (engine_stalled(engine)) { + if (i915_request_completed(request)) { + GEM_TRACE("%s pardoned global=%d (fence %llx:%d), current %d\n", + engine->name, request->global_seqno, + request->fence.context, request->fence.seqno, + intel_engine_get_seqno(engine)); + stalled = false; + } + + if (stalled) { i915_gem_context_mark_guilty(request->ctx); skip_request(request); @@ -3089,11 +3186,11 @@ i915_gem_reset_request(struct intel_engine_cs *engine, dma_fence_set_error(&request->fence, -EAGAIN); /* Rewind the engine to replay the incomplete rq */ - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); request = list_prev_entry(request, link); - if (&request->link == &engine->timeline->requests) + if (&request->link == &engine->timeline.requests) request = NULL; - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } } @@ -3101,7 +3198,8 @@ i915_gem_reset_request(struct intel_engine_cs *engine, } void i915_gem_reset_engine(struct intel_engine_cs *engine, - struct i915_request *request) + struct i915_request *request, + bool stalled) { /* * Make sure this write is visible before we re-enable the interrupt @@ -3111,7 +3209,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, smp_store_mb(engine->irq_posted, 0); if (request) - request = i915_gem_reset_request(engine, request); + request = i915_gem_reset_request(engine, request, stalled); if (request) { DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n", @@ -3122,7 +3220,8 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, engine->reset_hw(engine, request); } -void i915_gem_reset(struct drm_i915_private *dev_priv) +void i915_gem_reset(struct drm_i915_private *dev_priv, + unsigned int stalled_mask) { struct intel_engine_cs *engine; enum intel_engine_id id; @@ -3134,10 +3233,12 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) for_each_engine(engine, dev_priv, id) { struct i915_gem_context *ctx; - i915_gem_reset_engine(engine, engine->hangcheck.active_request); + i915_gem_reset_engine(engine, + engine->hangcheck.active_request, + stalled_mask & ENGINE_MASK(id)); ctx = fetch_and_zero(&engine->last_retired_context); if (ctx) - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); /* * Ostensibily, we always want a context loaded for powersaving, @@ -3160,13 +3261,6 @@ void i915_gem_reset(struct drm_i915_private *dev_priv) } i915_gem_restore_fences(dev_priv); - - if (dev_priv->gt.awake) { - intel_sanitize_gt_powersave(dev_priv); - intel_enable_gt_powersave(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - } } void i915_gem_reset_finish_engine(struct intel_engine_cs *engine) @@ -3192,6 +3286,9 @@ void i915_gem_reset_finish(struct drm_i915_private *dev_priv) static void nop_submit_request(struct i915_request *request) { + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); i915_request_submit(request); @@ -3201,12 +3298,15 @@ static void nop_complete_submit_request(struct i915_request *request) { unsigned long flags; + GEM_TRACE("%s fence %llx:%d -> -EIO\n", + request->engine->name, + request->fence.context, request->fence.seqno); dma_fence_set_error(&request->fence, -EIO); - spin_lock_irqsave(&request->engine->timeline->lock, flags); + spin_lock_irqsave(&request->engine->timeline.lock, flags); __i915_request_submit(request); intel_engine_init_global_seqno(request->engine, request->global_seqno); - spin_unlock_irqrestore(&request->engine->timeline->lock, flags); + spin_unlock_irqrestore(&request->engine->timeline.lock, flags); } void i915_gem_set_wedged(struct drm_i915_private *i915) @@ -3214,7 +3314,9 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) struct intel_engine_cs *engine; enum intel_engine_id id; - if (drm_debug & DRM_UT_DRIVER) { + GEM_TRACE("start\n"); + + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); for_each_engine(engine, i915, id) @@ -3237,6 +3339,9 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) } i915->caps.scheduler = 0; + /* Even if the GPU reset fails, it should still stop the engines */ + intel_gpu_reset(i915, ALL_ENGINES); + /* * Make sure no one is running the old callback before we proceed with * cancelling requests and resetting the completion tracking. Otherwise @@ -3270,27 +3375,31 @@ void i915_gem_set_wedged(struct drm_i915_private *i915) * (lockless) lookup doesn't try and wait upon the request as we * reset it. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); intel_engine_init_global_seqno(engine, intel_engine_last_submit(engine)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); i915_gem_reset_finish_engine(engine); } + GEM_TRACE("end\n"); + wake_up_all(&i915->gpu_error.reset_queue); } bool i915_gem_unset_wedged(struct drm_i915_private *i915) { - struct i915_gem_timeline *tl; - int i; + struct i915_timeline *tl; lockdep_assert_held(&i915->drm.struct_mutex); if (!test_bit(I915_WEDGED, &i915->gpu_error.flags)) return true; - /* Before unwedging, make sure that all pending operations + GEM_TRACE("start\n"); + + /* + * Before unwedging, make sure that all pending operations * are flushed and errored out - we may have requests waiting upon * third party fences. We marked all inflight requests as EIO, and * every execbuf since returned EIO, for consistency we want all @@ -3300,31 +3409,33 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) * No more can be submitted until we reset the wedged bit. */ list_for_each_entry(tl, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - struct i915_request *rq; + struct i915_request *rq; - rq = i915_gem_active_peek(&tl->engine[i].last_request, - &i915->drm.struct_mutex); - if (!rq) - continue; + rq = i915_gem_active_peek(&tl->last_request, + &i915->drm.struct_mutex); + if (!rq) + continue; - /* We can't use our normal waiter as we want to - * avoid recursively trying to handle the current - * reset. The basic dma_fence_default_wait() installs - * a callback for dma_fence_signal(), which is - * triggered by our nop handler (indirectly, the - * callback enables the signaler thread which is - * woken by the nop_submit_request() advancing the seqno - * and when the seqno passes the fence, the signaler - * then signals the fence waking us up). - */ - if (dma_fence_default_wait(&rq->fence, true, - MAX_SCHEDULE_TIMEOUT) < 0) - return false; - } + /* + * We can't use our normal waiter as we want to + * avoid recursively trying to handle the current + * reset. The basic dma_fence_default_wait() installs + * a callback for dma_fence_signal(), which is + * triggered by our nop handler (indirectly, the + * callback enables the signaler thread which is + * woken by the nop_submit_request() advancing the seqno + * and when the seqno passes the fence, the signaler + * then signals the fence waking us up). + */ + if (dma_fence_default_wait(&rq->fence, true, + MAX_SCHEDULE_TIMEOUT) < 0) + return false; } + i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); - /* Undo nop_submit_request. We prevent all new i915 requests from + /* + * Undo nop_submit_request. We prevent all new i915 requests from * being queued (by disallowing execbuf whilst wedged) so having * waited for all active requests above, we know the system is idle * and do not have to worry about a thread being inside @@ -3335,6 +3446,8 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915) intel_engines_reset_default_submission(i915); i915_gem_contexts_lost(i915); + GEM_TRACE("end\n"); + smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ clear_bit(I915_WEDGED, &i915->gpu_error.flags); @@ -3473,36 +3586,9 @@ i915_gem_idle_work_handler(struct work_struct *work) if (new_requests_since_last_retire(dev_priv)) goto out_unlock; - /* - * Be paranoid and flush a concurrent interrupt to make sure - * we don't reactivate any irq tasklets after parking. - * - * FIXME: Note that even though we have waited for execlists to be idle, - * there may still be an in-flight interrupt even though the CSB - * is now empty. synchronize_irq() makes sure that a residual interrupt - * is completed before we continue, but it doesn't prevent the HW from - * raising a spurious interrupt later. To complete the shield we should - * coordinate disabling the CS irq with flushing the interrupts. - */ - synchronize_irq(dev_priv->drm.irq); - - intel_engines_park(dev_priv); - i915_gem_timelines_park(dev_priv); + epoch = __i915_gem_park(dev_priv); - i915_pmu_gt_parked(dev_priv); - - GEM_BUG_ON(!dev_priv->gt.awake); - dev_priv->gt.awake = false; - epoch = dev_priv->gt.epoch; - GEM_BUG_ON(epoch == I915_EPOCH_INVALID); rearm_hangcheck = false; - - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_idle(dev_priv); - - intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ); - - intel_runtime_pm_put(dev_priv); out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); @@ -3648,17 +3734,9 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file) return ret; } -static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags) +static int wait_for_timeline(struct i915_timeline *tl, unsigned int flags) { - int ret, i; - - for (i = 0; i < ARRAY_SIZE(tl->engine); i++) { - ret = i915_gem_active_wait(&tl->engine[i].last_request, flags); - if (ret) - return ret; - } - - return 0; + return i915_gem_active_wait(&tl->last_request, flags); } static int wait_for_engines(struct drm_i915_private *i915) @@ -3666,16 +3744,7 @@ static int wait_for_engines(struct drm_i915_private *i915) if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) { dev_err(i915->drm.dev, "Failed to idle engines, declaring wedged!\n"); - if (drm_debug & DRM_UT_DRIVER) { - struct drm_printer p = drm_debug_printer(__func__); - struct intel_engine_cs *engine; - enum intel_engine_id id; - - for_each_engine(engine, i915, id) - intel_engine_dump(engine, &p, - "%s\n", engine->name); - } - + GEM_TRACE_DUMP(); i915_gem_set_wedged(i915); return -EIO; } @@ -3685,30 +3754,37 @@ static int wait_for_engines(struct drm_i915_private *i915) int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags) { - int ret; - /* If the device is asleep, we have no requests outstanding */ if (!READ_ONCE(i915->gt.awake)) return 0; if (flags & I915_WAIT_LOCKED) { - struct i915_gem_timeline *tl; + struct i915_timeline *tl; + int err; lockdep_assert_held(&i915->drm.struct_mutex); list_for_each_entry(tl, &i915->gt.timelines, link) { - ret = wait_for_timeline(tl, flags); - if (ret) - return ret; + err = wait_for_timeline(tl, flags); + if (err) + return err; } i915_retire_requests(i915); - ret = wait_for_engines(i915); + return wait_for_engines(i915); } else { - ret = wait_for_timeline(&i915->gt.global_timeline, flags); - } + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err; - return ret; + for_each_engine(engine, i915, id) { + err = wait_for_timeline(&engine->timeline, flags); + if (err) + return err; + } + + return 0; + } } static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj) @@ -4088,9 +4164,10 @@ out: } /* - * Prepare buffer for display plane (scanout, cursors, etc). - * Can be called from an uninterruptible phase (modesetting) and allows - * any flushes to be pipelined (for pageflips). + * Prepare buffer for display plane (scanout, cursors, etc). Can be called from + * an uninterruptible phase (modesetting) and allows any flushes to be pipelined + * (for pageflips). We only flush the caches while preparing the buffer for + * display, the callers are responsible for frontbuffer flush. */ struct i915_vma * i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, @@ -4146,9 +4223,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj, vma->display_alignment = max_t(u64, vma->display_alignment, alignment); - /* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */ __i915_gem_object_flush_for_display(obj); - intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); /* It should now be out of any other write domains, and we can update * the domain values for our changes. @@ -4723,7 +4798,7 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, &obj->vma_list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; - i915_vma_close(vma); + i915_vma_destroy(vma); } GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); @@ -4878,7 +4953,7 @@ static void assert_kernel_context_is_current(struct drm_i915_private *i915) enum intel_engine_id id; for_each_engine(engine, i915, id) { - GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline->last_request)); + GEM_BUG_ON(__i915_gem_active_peek(&engine->timeline.last_request)); GEM_BUG_ON(engine->last_retired_context != kernel_context); } } @@ -4973,6 +5048,7 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) * machines is a good idea, we don't - just in case it leaves the * machine in an unusable condition. */ + intel_uc_sanitize(dev_priv); i915_gem_sanitize(dev_priv); intel_runtime_pm_put(dev_priv); @@ -5118,6 +5194,8 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) } } + intel_gt_workarounds_apply(dev_priv); + i915_gem_init_swizzling(dev_priv); /* @@ -5140,6 +5218,12 @@ int i915_gem_init_hw(struct drm_i915_private *dev_priv) goto out; } + ret = intel_wopcm_init_hw(&dev_priv->wopcm); + if (ret) { + DRM_ERROR("Enabling WOPCM failed (%d)\n", ret); + goto out; + } + /* We can't enable contexts until all firmware is loaded */ ret = intel_uc_init_hw(dev_priv); if (ret) { @@ -5207,7 +5291,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { struct i915_vma *state; - state = ctx->engine[id].state; + state = to_intel_context(ctx, engine)->state; if (!state) continue; @@ -5297,6 +5381,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv) if (ret) return ret; + ret = intel_wopcm_init(&dev_priv->wopcm); + if (ret) + return ret; + ret = intel_uc_init_misc(dev_priv); if (ret) return ret; @@ -5478,8 +5566,7 @@ static void i915_gem_init__mm(struct drm_i915_private *i915) INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); } -int -i915_gem_load_init(struct drm_i915_private *dev_priv) +int i915_gem_init_early(struct drm_i915_private *dev_priv) { int err = -ENOMEM; @@ -5512,12 +5599,9 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) if (!dev_priv->priorities) goto err_dependencies; - mutex_lock(&dev_priv->drm.struct_mutex); INIT_LIST_HEAD(&dev_priv->gt.timelines); - err = i915_gem_timeline_init__global(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - if (err) - goto err_priorities; + INIT_LIST_HEAD(&dev_priv->gt.active_rings); + INIT_LIST_HEAD(&dev_priv->gt.closed_vma); i915_gem_init__mm(dev_priv); @@ -5538,8 +5622,6 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) return 0; -err_priorities: - kmem_cache_destroy(dev_priv->priorities); err_dependencies: kmem_cache_destroy(dev_priv->dependencies); err_requests: @@ -5554,17 +5636,13 @@ err_out: return err; } -void i915_gem_load_cleanup(struct drm_i915_private *dev_priv) +void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) { i915_gem_drain_freed_objects(dev_priv); GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); WARN_ON(dev_priv->mm.object_count); - - mutex_lock(&dev_priv->drm.struct_mutex); - i915_gem_timeline_fini(&dev_priv->gt.global_timeline); WARN_ON(!list_empty(&dev_priv->gt.timelines)); - mutex_unlock(&dev_priv->drm.struct_mutex); kmem_cache_destroy(dev_priv->priorities); kmem_cache_destroy(dev_priv->dependencies); diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index f54c4ff..5259204 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -27,7 +27,12 @@ #include <linux/bug.h> +struct drm_i915_private; + #ifdef CONFIG_DRM_I915_DEBUG_GEM + +#define GEM_SHOW_DEBUG() (drm_debug & DRM_UT_DRIVER) + #define GEM_BUG_ON(condition) do { if (unlikely((condition))) { \ pr_err("%s:%d GEM_BUG_ON(%s)\n", \ __func__, __LINE__, __stringify(condition)); \ @@ -43,6 +48,9 @@ #define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #else + +#define GEM_SHOW_DEBUG() (0) + #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) (BUILD_BUG_ON_INVALID(expr), 0) @@ -53,10 +61,15 @@ #if IS_ENABLED(CONFIG_DRM_I915_TRACE_GEM) #define GEM_TRACE(...) trace_printk(__VA_ARGS__) +#define GEM_TRACE_DUMP() ftrace_dump(DUMP_ALL) #else #define GEM_TRACE(...) do { } while (0) +#define GEM_TRACE_DUMP() do { } while (0) #endif #define I915_NUM_ENGINES 8 +void i915_gem_park(struct drm_i915_private *i915); +void i915_gem_unpark(struct drm_i915_private *i915); + #endif /* __I915_GEM_H__ */ diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index d3cbe84..f3890b6 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -1,29 +1,11 @@ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2014-2018 Intel Corporation */ -#include "i915_drv.h" #include "i915_gem_batch_pool.h" +#include "i915_drv.h" /** * DOC: batch pool @@ -41,11 +23,11 @@ /** * i915_gem_batch_pool_init() - initialize a batch buffer pool - * @engine: the associated request submission engine * @pool: the batch buffer pool + * @engine: the associated request submission engine */ -void i915_gem_batch_pool_init(struct intel_engine_cs *engine, - struct i915_gem_batch_pool *pool) +void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, + struct intel_engine_cs *engine) { int n; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.h b/drivers/gpu/drm/i915/i915_gem_batch_pool.h index 10d5ac4..56947da 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.h +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.h @@ -1,31 +1,13 @@ /* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2014-2018 Intel Corporation */ #ifndef I915_GEM_BATCH_POOL_H #define I915_GEM_BATCH_POOL_H -#include "i915_drv.h" +#include <linux/types.h> struct intel_engine_cs; @@ -34,9 +16,8 @@ struct i915_gem_batch_pool { struct list_head cache_list[4]; }; -/* i915_gem_batch_pool.c */ -void i915_gem_batch_pool_init(struct intel_engine_cs *engine, - struct i915_gem_batch_pool *pool); +void i915_gem_batch_pool_init(struct i915_gem_batch_pool *pool, + struct intel_engine_cs *engine); void i915_gem_batch_pool_fini(struct i915_gem_batch_pool *pool); struct drm_i915_gem_object* i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, size_t size); diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f2cbea7..33f8a4b 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -90,6 +90,7 @@ #include <drm/i915_drm.h> #include "i915_drv.h" #include "i915_trace.h" +#include "intel_workarounds.h" #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1 @@ -116,15 +117,15 @@ static void lut_close(struct i915_gem_context *ctx) static void i915_gem_context_free(struct i915_gem_context *ctx) { - int i; + unsigned int n; lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); i915_ppgtt_put(ctx->ppgtt); - for (i = 0; i < I915_NUM_ENGINES; i++) { - struct intel_context *ce = &ctx->engine[i]; + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; if (!ce->state) continue; @@ -280,7 +281,7 @@ __create_hw_context(struct drm_i915_private *dev_priv, kref_init(&ctx->ref); list_add_tail(&ctx->link, &dev_priv->contexts.list); ctx->i915 = dev_priv; - ctx->priority = I915_PRIORITY_NORMAL; + ctx->sched.priority = I915_PRIORITY_NORMAL; INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL); INIT_LIST_HEAD(&ctx->handles_list); @@ -318,12 +319,13 @@ __create_hw_context(struct drm_i915_private *dev_priv, ctx->desc_template = default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt); - /* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not + /* + * GuC requires the ring to be placed in Non-WOPCM memory. If GuC is not * present or not in use we still need a small bias as ring wraparound * at offset 0 sometimes hangs. No idea why. */ if (USES_GUC(dev_priv)) - ctx->ggtt_offset_bias = GUC_WOPCM_TOP; + ctx->ggtt_offset_bias = dev_priv->guc.ggtt_pin_bias; else ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; @@ -429,7 +431,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio) return ctx; i915_gem_context_clear_bannable(ctx); - ctx->priority = prio; + ctx->sched.priority = prio; ctx->ring_size = PAGE_SIZE; GEM_BUG_ON(!i915_gem_context_is_kernel(ctx)); @@ -458,11 +460,16 @@ static bool needs_preempt_context(struct drm_i915_private *i915) int i915_gem_contexts_init(struct drm_i915_private *dev_priv) { struct i915_gem_context *ctx; + int ret; /* Reassure ourselves we are only called once */ GEM_BUG_ON(dev_priv->kernel_context); GEM_BUG_ON(dev_priv->preempt_context); + ret = intel_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + INIT_LIST_HEAD(&dev_priv->contexts.list); INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker); init_llist_head(&dev_priv->contexts.free_list); @@ -514,7 +521,7 @@ void i915_gem_contexts_lost(struct drm_i915_private *dev_priv) if (!engine->last_retired_context) continue; - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); engine->last_retired_context = NULL; } } @@ -570,19 +577,29 @@ void i915_gem_context_close(struct drm_file *file) idr_destroy(&file_priv->context_idr); } -static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +static struct i915_request * +last_request_on_engine(struct i915_timeline *timeline, + struct intel_engine_cs *engine) { - struct i915_gem_timeline *timeline; + struct i915_request *rq; - list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { - struct intel_timeline *tl; + if (timeline == &engine->timeline) + return NULL; - if (timeline == &engine->i915->gt.global_timeline) - continue; + rq = i915_gem_active_raw(&timeline->last_request, + &engine->i915->drm.struct_mutex); + if (rq && rq->engine == engine) + return rq; + + return NULL; +} - tl = &timeline->engine[engine->id]; - if (i915_gem_active_peek(&tl->last_request, - &engine->i915->drm.struct_mutex)) +static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) +{ + struct i915_timeline *timeline; + + list_for_each_entry(timeline, &engine->i915->gt.timelines, link) { + if (last_request_on_engine(timeline, engine)) return false; } @@ -592,7 +609,7 @@ static bool engine_has_idle_kernel_context(struct intel_engine_cs *engine) int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) { struct intel_engine_cs *engine; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; enum intel_engine_id id; lockdep_assert_held(&dev_priv->drm.struct_mutex); @@ -612,11 +629,8 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv) /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, - &dev_priv->drm.struct_mutex); + prev = last_request_on_engine(timeline, engine); if (prev) i915_sw_fence_await_sw_fence_gfp(&rq->submit, &prev->submit, @@ -746,7 +760,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, args->value = i915_gem_context_is_bannable(ctx); break; case I915_CONTEXT_PARAM_PRIORITY: - args->value = ctx->priority; + args->value = ctx->sched.priority; break; default: ret = -EINVAL; @@ -819,7 +833,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, !capable(CAP_SYS_NICE)) ret = -EPERM; else - ctx->priority = priority; + ctx->sched.priority = priority; } break; diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h index 7854262..ace3b12 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.h +++ b/drivers/gpu/drm/i915/i915_gem_context.h @@ -137,18 +137,7 @@ struct i915_gem_context { */ u32 user_handle; - /** - * @priority: execution and service priority - * - * All clients are equal, but some are more equal than others! - * - * Requests from a context with a greater (more positive) value of - * @priority will be executed before those with a lower @priority - * value, forming a simple QoS. - * - * The &drm_i915_private.kernel_context is assigned the lowest priority. - */ - int priority; + struct i915_sched_attr sched; /** ggtt_offset_bias: placement restriction for context objects */ u32 ggtt_offset_bias; @@ -160,7 +149,7 @@ struct i915_gem_context { u32 *lrc_reg_state; u64 lrc_desc; int pin_count; - } engine[I915_NUM_ENGINES]; + } __engine[I915_NUM_ENGINES]; /** ring_size: size for allocating the per-engine ring buffer */ u32 ring_size; @@ -267,6 +256,34 @@ static inline bool i915_gem_context_is_kernel(struct i915_gem_context *ctx) return !ctx->file_priv; } +static inline struct intel_context * +to_intel_context(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + return &ctx->__engine[engine->id]; +} + +static inline struct intel_ring * +intel_context_pin(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + return engine->context_pin(engine, ctx); +} + +static inline void __intel_context_pin(struct i915_gem_context *ctx, + const struct intel_engine_cs *engine) +{ + struct intel_context *ce = to_intel_context(ctx, engine); + + GEM_BUG_ON(!ce->pin_count); + ce->pin_count++; +} + +static inline void intel_context_unpin(struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + engine->context_unpin(engine, ctx); +} + /* i915_gem_context.c */ int __must_check i915_gem_contexts_init(struct drm_i915_private *dev_priv); void i915_gem_contexts_lost(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 8c170db..f627a8c 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -81,6 +81,35 @@ enum { * but this remains just a hint as the kernel may choose a new location for * any object in the future. * + * At the level of talking to the hardware, submitting a batchbuffer for the + * GPU to execute is to add content to a buffer from which the HW + * command streamer is reading. + * + * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e. + * Execlists, this command is not placed on the same buffer as the + * remaining items. + * + * 2. Add a command to invalidate caches to the buffer. + * + * 3. Add a batchbuffer start command to the buffer; the start command is + * essentially a token together with the GPU address of the batchbuffer + * to be executed. + * + * 4. Add a pipeline flush to the buffer. + * + * 5. Add a memory write command to the buffer to record when the GPU + * is done executing the batchbuffer. The memory write writes the + * global sequence number of the request, ``i915_request::global_seqno``; + * the i915 driver uses the current value in the register to determine + * if the GPU has completed the batchbuffer. + * + * 6. Add a user interrupt command to the buffer. This command instructs + * the GPU to issue an interrupt when the command, pipeline flush and + * memory write are completed. + * + * 7. Inform the hardware of the additional commands added to the buffer + * (by updating the tail pointer). + * * Processing an execbuf ioctl is conceptually split up into a few phases. * * 1. Validation - Ensure all the pointers, handles and flags are valid. @@ -728,12 +757,13 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb) err = radix_tree_insert(handles_vma, handle, vma); if (unlikely(err)) { - kfree(lut); + kmem_cache_free(eb->i915->luts, lut); goto err_obj; } /* transfer ref to ctx */ - vma->open_count++; + if (!vma->open_count++) + i915_vma_reopen(vma); list_add(&lut->obj_link, &obj->lut_list); list_add(&lut->ctx_link, &eb->ctx->handles_list); lut->ctx = eb->ctx; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 21d72f6..996ab2a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -110,7 +110,8 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma); static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv) { - /* Note that as an uncached mmio write, this should flush the + /* + * Note that as an uncached mmio write, this will flush the * WCB of the writes into the GGTT before it triggers the invalidate. */ I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); @@ -1161,6 +1162,27 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma, vaddr[idx.pde] |= GEN8_PDE_IPS_64K; kunmap_atomic(vaddr); page_size = I915_GTT_PAGE_SIZE_64K; + + /* + * We write all 4K page entries, even when using 64K + * pages. In order to verify that the HW isn't cheating + * by using the 4K PTE instead of the 64K PTE, we want + * to remove all the surplus entries. If the HW skipped + * the 64K PTE, it will read/write into the scratch page + * instead - which we detect as missing results during + * selftests. + */ + if (I915_SELFTEST_ONLY(vma->vm->scrub_64K)) { + u16 i; + + encode = pte_encode | vma->vm->scratch_page.daddr; + vaddr = kmap_atomic_px(pd->page_table[idx.pde]); + + for (i = 1; i < index; i += 16) + memset64(vaddr + i, encode, 15); + + kunmap_atomic(vaddr); + } } vma->page_sizes.gtt |= page_size; @@ -2111,8 +2133,6 @@ static void i915_address_space_init(struct i915_address_space *vm, struct drm_i915_private *dev_priv, const char *name) { - i915_gem_timeline_init(dev_priv, &vm->timeline, name); - drm_mm_init(&vm->mm, 0, vm->total); vm->mm.head_node.color = I915_COLOR_UNEVICTABLE; @@ -2129,7 +2149,6 @@ static void i915_address_space_fini(struct i915_address_space *vm) if (pagevec_count(&vm->free_pages)) vm_free_pages_release(vm, true); - i915_gem_timeline_fini(&vm->timeline); drm_mm_takedown(&vm->mm); list_del(&vm->global_link); } @@ -2140,15 +2159,15 @@ static void gtt_write_workarounds(struct drm_i915_private *dev_priv) * called on driver load and after a GPU reset, so you can place * workarounds here even if they get overwritten by GPU reset. */ - /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl */ + /* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk,cfl,cnl,icl */ if (IS_BROADWELL(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW); else if (IS_CHERRYVIEW(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV); - else if (IS_GEN9_BC(dev_priv) || IS_GEN10(dev_priv)) - I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); else if (IS_GEN9_LP(dev_priv)) I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT); + else if (INTEL_GEN(dev_priv) >= 9) + I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL); /* * To support 64K PTEs we need to first enable the use of the @@ -2222,6 +2241,12 @@ i915_ppgtt_create(struct drm_i915_private *dev_priv, void i915_ppgtt_close(struct i915_address_space *vm) { + GEM_BUG_ON(vm->closed); + vm->closed = true; +} + +static void ppgtt_destroy_vma(struct i915_address_space *vm) +{ struct list_head *phases[] = { &vm->active_list, &vm->inactive_list, @@ -2229,15 +2254,12 @@ void i915_ppgtt_close(struct i915_address_space *vm) NULL, }, **phase; - GEM_BUG_ON(vm->closed); vm->closed = true; - for (phase = phases; *phase; phase++) { struct i915_vma *vma, *vn; list_for_each_entry_safe(vma, vn, *phase, vm_link) - if (!i915_vma_is_closed(vma)) - i915_vma_close(vma); + i915_vma_destroy(vma); } } @@ -2248,7 +2270,8 @@ void i915_ppgtt_release(struct kref *kref) trace_i915_ppgtt_release(&ppgtt->base); - /* vmas should already be unbound and destroyed */ + ppgtt_destroy_vma(&ppgtt->base); + GEM_BUG_ON(!list_empty(&ppgtt->base.active_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.inactive_list)); GEM_BUG_ON(!list_empty(&ppgtt->base.unbound_list)); @@ -2417,11 +2440,9 @@ static void gen8_ggtt_insert_entries(struct i915_address_space *vm, for_each_sgt_dma(addr, sgt_iter, vma->pages) gen8_set_pte(gtt_entries++, pte_encode | addr); - wmb(); - - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -2459,11 +2480,10 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm, dma_addr_t addr; for_each_sgt_dma(addr, iter, vma->pages) iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]); - wmb(); - /* This next bit makes the above posting read even more important. We - * want to flush the TLBs only after we're certain all the PTE updates - * have finished. + /* + * We want to flush the TLBs only after we're certain all the PTE + * updates have finished. */ ggtt->invalidate(vm->i915); } @@ -3325,14 +3345,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err); pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl); - - if (INTEL_GEN(dev_priv) >= 9) { - size = gen8_get_total_gtt_size(snb_gmch_ctl); - } else if (IS_CHERRYVIEW(dev_priv)) { + if (IS_CHERRYVIEW(dev_priv)) size = chv_get_total_gtt_size(snb_gmch_ctl); - } else { + else size = gen8_get_total_gtt_size(snb_gmch_ctl); - } ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT; ggtt->base.cleanup = gen6_gmch_remove; diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 6efc017..aec4f73 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -38,10 +38,9 @@ #include <linux/mm.h> #include <linux/pagevec.h> -#include "i915_gem_timeline.h" - #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" #define I915_GTT_PAGE_SIZE_4K BIT(12) #define I915_GTT_PAGE_SIZE_64K BIT(16) @@ -257,7 +256,6 @@ struct i915_pml4 { struct i915_address_space { struct drm_mm mm; - struct i915_gem_timeline timeline; struct drm_i915_private *i915; struct device *dma; /* Every address space belongs to a struct file - except for the global @@ -344,6 +342,7 @@ struct i915_address_space { void (*clear_pages)(struct i915_vma *vma); I915_SELFTEST_DECLARE(struct fault_attr fault_attr); + I915_SELFTEST_DECLARE(bool scrub_64K); }; #define i915_is_ggtt(V) (!(V)->file) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 62aa679..ad949cc 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -51,6 +51,10 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv, if (!drm_mm_initialized(&dev_priv->mm.stolen)) return -ENODEV; + /* WaSkipStolenMemoryFirstPage:bdw+ */ + if (INTEL_GEN(dev_priv) >= 8 && start < 4096) + start = 4096; + mutex_lock(&dev_priv->mm.stolen_lock); ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size, alignment, 0, @@ -121,8 +125,8 @@ static int i915_adjust_stolen(struct drm_i915_private *dev_priv, if (stolen[0].start != stolen[1].start || stolen[0].end != stolen[1].end) { - DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res); - DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm); + DRM_DEBUG_DRIVER("GTT within stolen memory at %pR\n", &ggtt_res); + DRM_DEBUG_DRIVER("Stolen memory adjusted to %pR\n", dsm); } } @@ -174,18 +178,19 @@ void i915_gem_cleanup_stolen(struct drm_device *dev) } static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ? - CTG_STOLEN_RESERVED : - ELK_STOLEN_RESERVED); + u32 reg_val = I915_READ(IS_GM45(dev_priv) ? + CTG_STOLEN_RESERVED : + ELK_STOLEN_RESERVED); resource_size_t stolen_top = dev_priv->dsm.end + 1; - if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("%s_STOLEN_RESERVED = %08x\n", + IS_GM45(dev_priv) ? "CTG" : "ELK", reg_val); + + if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) return; - } /* * Whether ILK really reuses the ELK register for this is unclear. @@ -193,30 +198,25 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv, */ WARN(IS_GEN5(dev_priv), "ILK stolen reserved found? 0x%08x\n", reg_val); - *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; + if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK)) + return; + *base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16; WARN_ON((reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base); - /* On these platforms, the register doesn't have a size field, so the - * size is the distance between the base and the top of the stolen - * memory. We also have the genuine case where base is zero and there's - * nothing reserved. */ - if (*base == 0) - *size = 0; - else - *size = stolen_top - *base; + *size = stolen_top - *base; } static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -239,17 +239,44 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv, } } -static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) +static void vlv_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; + + switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { + default: + MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK); + case GEN7_STOLEN_RESERVED_1M: + *size = 1024 * 1024; + break; } + /* + * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the + * reserved location as (top - size). + */ + *base = stolen_top - *size; +} + +static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, + resource_size_t *base, + resource_size_t *size) +{ + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) + return; + *base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK; switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) { @@ -266,15 +293,15 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; @@ -298,36 +325,28 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv, } static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv, - resource_size_t *base, resource_size_t *size) + resource_size_t *base, + resource_size_t *size) { - uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED); - resource_size_t stolen_top; + u32 reg_val = I915_READ(GEN6_STOLEN_RESERVED); + resource_size_t stolen_top = dev_priv->dsm.end + 1; - if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) { - *base = 0; - *size = 0; + DRM_DEBUG_DRIVER("GEN6_STOLEN_RESERVED = %08x\n", reg_val); + + if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE)) return; - } - stolen_top = dev_priv->dsm.end + 1; + if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK)) + return; *base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK; - - /* On these platforms, the register doesn't have a size field, so the - * size is the distance between the base and the top of the stolen - * memory. We also have the genuine case where base is zero and there's - * nothing reserved. */ - if (*base == 0) - *size = 0; - else - *size = stolen_top - *base; + *size = stolen_top - *base; } int i915_gem_init_stolen(struct drm_i915_private *dev_priv) { resource_size_t reserved_base, stolen_top; resource_size_t reserved_total, reserved_size; - resource_size_t stolen_usable_start; mutex_init(&dev_priv->mm.stolen_lock); @@ -353,7 +372,7 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start); stolen_top = dev_priv->dsm.end + 1; - reserved_base = 0; + reserved_base = stolen_top; reserved_size = 0; switch (INTEL_GEN(dev_priv)) { @@ -373,8 +392,12 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) &reserved_base, &reserved_size); break; case 7: - gen7_get_stolen_reserved(dev_priv, - &reserved_base, &reserved_size); + if (IS_VALLEYVIEW(dev_priv)) + vlv_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); + else + gen7_get_stolen_reserved(dev_priv, + &reserved_base, &reserved_size); break; default: if (IS_LP(dev_priv)) @@ -386,11 +409,16 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) break; } - /* It is possible for the reserved base to be zero, but the register - * field for size doesn't have a zero option. */ - if (reserved_base == 0) { - reserved_size = 0; + /* + * Our expectation is that the reserved space is at the top of the + * stolen region and *never* at the bottom. If we see !reserved_base, + * it likely means we failed to read the registers correctly. + */ + if (!reserved_base) { + DRM_ERROR("inconsistent reservation %pa + %pa; ignoring\n", + &reserved_base, &reserved_size); reserved_base = stolen_top; + reserved_size = 0; } dev_priv->dsm_reserved = @@ -406,21 +434,15 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv) * memory, so just consider the start. */ reserved_total = stolen_top - reserved_base; - DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n", - (u64)resource_size(&dev_priv->dsm) >> 10, - ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); - - stolen_usable_start = 0; - /* WaSkipStolenMemoryFirstPage:bdw+ */ - if (INTEL_GEN(dev_priv) >= 8) - stolen_usable_start = 4096; + DRM_DEBUG_DRIVER("Memory reserved for graphics device: %lluK, usable: %lluK\n", + (u64)resource_size(&dev_priv->dsm) >> 10, + ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10); dev_priv->stolen_usable_size = - resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start; + resource_size(&dev_priv->dsm) - reserved_total; /* Basic memrange allocator for stolen space. */ - drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start, - dev_priv->stolen_usable_size); + drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->stolen_usable_size); return 0; } @@ -580,8 +602,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv lockdep_assert_held(&dev_priv->drm.struct_mutex); - DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", - &stolen_offset, >t_offset, &size); + DRM_DEBUG_DRIVER("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n", + &stolen_offset, >t_offset, &size); /* KISS and expect everything to be page-aligned */ if (WARN_ON(size == 0) || @@ -599,14 +621,14 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv ret = drm_mm_reserve_node(&dev_priv->mm.stolen, stolen); mutex_unlock(&dev_priv->mm.stolen_lock); if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen space\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen space\n"); kfree(stolen); return NULL; } obj = _i915_gem_object_create_stolen(dev_priv, stolen); if (obj == NULL) { - DRM_DEBUG_KMS("failed to allocate stolen object\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen object\n"); i915_gem_stolen_remove_node(dev_priv, stolen); kfree(stolen); return NULL; @@ -635,7 +657,7 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv size, gtt_offset, obj->cache_level, 0); if (ret) { - DRM_DEBUG_KMS("failed to allocate stolen GTT space\n"); + DRM_DEBUG_DRIVER("failed to allocate stolen GTT space\n"); goto err_pages; } diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c deleted file mode 100644 index e9fd876..0000000 --- a/drivers/gpu/drm/i915/i915_gem_timeline.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * Copyright © 2016 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - */ - -#include "i915_drv.h" -#include "i915_syncmap.h" - -static void __intel_timeline_init(struct intel_timeline *tl, - struct i915_gem_timeline *parent, - u64 context, - struct lock_class_key *lockclass, - const char *lockname) -{ - tl->fence_context = context; - tl->common = parent; - spin_lock_init(&tl->lock); - lockdep_set_class_and_name(&tl->lock, lockclass, lockname); - init_request_active(&tl->last_request, NULL); - INIT_LIST_HEAD(&tl->requests); - i915_syncmap_init(&tl->sync); -} - -static void __intel_timeline_fini(struct intel_timeline *tl) -{ - GEM_BUG_ON(!list_empty(&tl->requests)); - - i915_syncmap_free(&tl->sync); -} - -static int __i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name, - struct lock_class_key *lockclass, - const char *lockname) -{ - unsigned int i; - u64 fences; - - lockdep_assert_held(&i915->drm.struct_mutex); - - /* - * Ideally we want a set of engines on a single leaf as we expect - * to mostly be tracking synchronisation between engines. It is not - * a huge issue if this is not the case, but we may want to mitigate - * any page crossing penalties if they become an issue. - */ - BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); - - timeline->i915 = i915; - timeline->name = kstrdup(name ?: "[kernel]", GFP_KERNEL); - if (!timeline->name) - return -ENOMEM; - - list_add(&timeline->link, &i915->gt.timelines); - - /* Called during early_init before we know how many engines there are */ - fences = dma_fence_context_alloc(ARRAY_SIZE(timeline->engine)); - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_init(&timeline->engine[i], - timeline, fences++, - lockclass, lockname); - - return 0; -} - -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *timeline, - const char *name) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, timeline, name, - &class, "&timeline->lock"); -} - -int i915_gem_timeline_init__global(struct drm_i915_private *i915) -{ - static struct lock_class_key class; - - return __i915_gem_timeline_init(i915, - &i915->gt.global_timeline, - "[execution]", - &class, "&global_timeline->lock"); -} - -/** - * i915_gem_timelines_park - called when the driver idles - * @i915: the drm_i915_private device - * - * When the driver is completely idle, we know that all of our sync points - * have been signaled and our tracking is then entirely redundant. Any request - * to wait upon an older sync point will be completed instantly as we know - * the fence is signaled and therefore we will not even look them up in the - * sync point map. - */ -void i915_gem_timelines_park(struct drm_i915_private *i915) -{ - struct i915_gem_timeline *timeline; - int i; - - lockdep_assert_held(&i915->drm.struct_mutex); - - list_for_each_entry(timeline, &i915->gt.timelines, link) { - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) { - struct intel_timeline *tl = &timeline->engine[i]; - - /* - * All known fences are completed so we can scrap - * the current sync point tracking and start afresh, - * any attempt to wait upon a previous sync point - * will be skipped as the fence was signaled. - */ - i915_syncmap_free(&tl->sync); - } - } -} - -void i915_gem_timeline_fini(struct i915_gem_timeline *timeline) -{ - int i; - - lockdep_assert_held(&timeline->i915->drm.struct_mutex); - - for (i = 0; i < ARRAY_SIZE(timeline->engine); i++) - __intel_timeline_fini(&timeline->engine[i]); - - list_del(&timeline->link); - kfree(timeline->name); -} - -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) -#include "selftests/mock_timeline.c" -#include "selftests/i915_gem_timeline.c" -#endif diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index d596a83..854bd51 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index f89ac7a8..df234dc 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -32,6 +32,7 @@ #include <linux/zlib.h> #include <drm/drm_print.h> +#include "i915_gpu_error.h" #include "i915_drv.h" static inline const struct intel_engine_cs * @@ -403,16 +404,17 @@ static const char *bannable(const struct drm_i915_error_context *ctx) static void error_print_request(struct drm_i915_error_state_buf *m, const char *prefix, - const struct drm_i915_error_request *erq) + const struct drm_i915_error_request *erq, + const unsigned long epoch) { if (!erq->seqno) return; - err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms ago, head %08x, tail %08x\n", + err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n", prefix, erq->pid, erq->ban_score, - erq->context, erq->seqno, erq->priority, - jiffies_to_msecs(jiffies - erq->jiffies), - erq->head, erq->tail); + erq->context, erq->seqno, erq->sched_attr.priority, + jiffies_to_msecs(erq->jiffies - epoch), + erq->start, erq->head, erq->tail); } static void error_print_context(struct drm_i915_error_state_buf *m, @@ -421,12 +423,13 @@ static void error_print_context(struct drm_i915_error_state_buf *m, { err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n", header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id, - ctx->priority, ctx->ban_score, bannable(ctx), + ctx->sched_attr.priority, ctx->ban_score, bannable(ctx), ctx->guilty, ctx->active); } static void error_print_engine(struct drm_i915_error_state_buf *m, - const struct drm_i915_error_engine *ee) + const struct drm_i915_error_engine *ee, + const unsigned long epoch) { int n; @@ -496,14 +499,15 @@ static void error_print_engine(struct drm_i915_error_state_buf *m, err_printf(m, " hangcheck stall: %s\n", yesno(ee->hangcheck_stalled)); err_printf(m, " hangcheck action: %s\n", hangcheck_action_to_str(ee->hangcheck_action)); - err_printf(m, " hangcheck action timestamp: %lu, %u ms ago\n", + err_printf(m, " hangcheck action timestamp: %dms (%lu%s)\n", + jiffies_to_msecs(ee->hangcheck_timestamp - epoch), ee->hangcheck_timestamp, - jiffies_to_msecs(jiffies - ee->hangcheck_timestamp)); + ee->hangcheck_timestamp == epoch ? "; epoch" : ""); err_printf(m, " engine reset count: %u\n", ee->reset_count); for (n = 0; n < ee->num_ports; n++) { err_printf(m, " ELSP[%d]:", n); - error_print_request(m, " ", &ee->execlist[n]); + error_print_request(m, " ", &ee->execlist[n], epoch); } error_print_context(m, " Active context: ", &ee->context); @@ -649,6 +653,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, ts = ktime_to_timespec64(error->uptime); err_printf(m, "Uptime: %lld s %ld us\n", (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC); + err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ); + err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n", + error->capture, + jiffies_to_msecs(jiffies - error->capture), + jiffies_to_msecs(error->capture - error->epoch)); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].hangcheck_stalled && @@ -709,7 +718,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, for (i = 0; i < ARRAY_SIZE(error->engine); i++) { if (error->engine[i].engine_id != -1) - error_print_engine(m, &error->engine[i]); + error_print_engine(m, &error->engine[i], error->epoch); } for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) { @@ -768,7 +777,9 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, dev_priv->engine[i]->name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) - error_print_request(m, " ", &ee->requests[j]); + error_print_request(m, " ", + &ee->requests[j], + error->epoch); } if (IS_ERR(ee->waiters)) { @@ -1277,10 +1288,11 @@ static void record_request(struct i915_request *request, struct drm_i915_error_request *erq) { erq->context = request->ctx->hw_id; - erq->priority = request->priotree.priority; + erq->sched_attr = request->sched.attr; erq->ban_score = atomic_read(&request->ctx->ban_score); erq->seqno = request->global_seqno; erq->jiffies = request->emitted_jiffies; + erq->start = i915_ggtt_offset(request->ring->vma); erq->head = request->head; erq->tail = request->tail; @@ -1298,7 +1310,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) + list_for_each_entry_from(request, &engine->timeline.requests, link) count++; if (!count) return; @@ -1311,7 +1323,7 @@ static void engine_record_requests(struct intel_engine_cs *engine, count = 0; request = first; - list_for_each_entry_from(request, &engine->timeline->requests, link) { + list_for_each_entry_from(request, &engine->timeline.requests, link) { if (count >= ee->num_requests) { /* * If the ring request list was changed in @@ -1371,7 +1383,7 @@ static void record_context(struct drm_i915_error_context *e, e->handle = ctx->user_handle; e->hw_id = ctx->hw_id; - e->priority = ctx->priority; + e->sched_attr = ctx->sched; e->ban_score = atomic_read(&ctx->ban_score); e->bannable = i915_gem_context_is_bannable(ctx); e->guilty = atomic_read(&ctx->guilty_count); @@ -1471,7 +1483,8 @@ static void gem_record_rings(struct i915_gpu_state *error) ee->ctx = i915_error_object_create(i915, - request->ctx->engine[i].state); + to_intel_context(request->ctx, + engine)->state); error->simulated |= i915_gem_context_no_error_capture(request->ctx); @@ -1734,6 +1747,22 @@ static void capture_params(struct i915_gpu_state *error) #undef DUP } +static unsigned long capture_find_epoch(const struct i915_gpu_state *error) +{ + unsigned long epoch = error->capture; + int i; + + for (i = 0; i < ARRAY_SIZE(error->engine); i++) { + const struct drm_i915_error_engine *ee = &error->engine[i]; + + if (ee->hangcheck_stalled && + time_before(ee->hangcheck_timestamp, epoch)) + epoch = ee->hangcheck_timestamp; + } + + return epoch; +} + static int capture(void *data) { struct i915_gpu_state *error = data; @@ -1742,6 +1771,7 @@ static int capture(void *data) error->boottime = ktime_get_boottime(); error->uptime = ktime_sub(ktime_get(), error->i915->gt.last_init_time); + error->capture = jiffies; capture_params(error); capture_gen_state(error); @@ -1755,6 +1785,8 @@ static int capture(void *data) error->overlay = intel_overlay_capture_error_state(error->i915); error->display = intel_display_capture_error_state(error->i915); + error->epoch = capture_find_epoch(error); + return 0; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h new file mode 100644 index 0000000..dac0f8c --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -0,0 +1,366 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright � 2008-2018 Intel Corporation + */ + +#ifndef _I915_GPU_ERROR_H_ +#define _I915_GPU_ERROR_H_ + +#include <linux/kref.h> +#include <linux/ktime.h> +#include <linux/sched.h> + +#include <drm/drm_mm.h> + +#include "intel_device_info.h" +#include "intel_ringbuffer.h" +#include "intel_uc_fw.h" + +#include "i915_gem.h" +#include "i915_gem_gtt.h" +#include "i915_params.h" +#include "i915_scheduler.h" + +struct drm_i915_private; +struct intel_overlay_error_state; +struct intel_display_error_state; + +struct i915_gpu_state { + struct kref ref; + ktime_t time; + ktime_t boottime; + ktime_t uptime; + unsigned long capture; + unsigned long epoch; + + struct drm_i915_private *i915; + + char error_msg[128]; + bool simulated; + bool awake; + bool wakelock; + bool suspended; + int iommu; + u32 reset_count; + u32 suspend_count; + struct intel_device_info device_info; + struct intel_driver_caps driver_caps; + struct i915_params params; + + struct i915_error_uc { + struct intel_uc_fw guc_fw; + struct intel_uc_fw huc_fw; + struct drm_i915_error_object *guc_log; + } uc; + + /* Generic register state */ + u32 eir; + u32 pgtbl_er; + u32 ier; + u32 gtier[4], ngtier; + u32 ccid; + u32 derrmr; + u32 forcewake; + u32 error; /* gen6+ */ + u32 err_int; /* gen7 */ + u32 fault_data0; /* gen8, gen9 */ + u32 fault_data1; /* gen8, gen9 */ + u32 done_reg; + u32 gac_eco; + u32 gam_ecochk; + u32 gab_ctl; + u32 gfx_mode; + + u32 nfence; + u64 fence[I915_MAX_NUM_FENCES]; + struct intel_overlay_error_state *overlay; + struct intel_display_error_state *display; + + struct drm_i915_error_engine { + int engine_id; + /* Software tracked state */ + bool idle; + bool waiting; + int num_waiters; + unsigned long hangcheck_timestamp; + bool hangcheck_stalled; + enum intel_engine_hangcheck_action hangcheck_action; + struct i915_address_space *vm; + int num_requests; + u32 reset_count; + + /* position of active request inside the ring */ + u32 rq_head, rq_post, rq_tail; + + /* our own tracking of ring head and tail */ + u32 cpu_ring_head; + u32 cpu_ring_tail; + + u32 last_seqno; + + /* Register state */ + u32 start; + u32 tail; + u32 head; + u32 ctl; + u32 mode; + u32 hws; + u32 ipeir; + u32 ipehr; + u32 bbstate; + u32 instpm; + u32 instps; + u32 seqno; + u64 bbaddr; + u64 acthd; + u32 fault_reg; + u64 faddr; + u32 rc_psmi; /* sleep state */ + u32 semaphore_mboxes[I915_NUM_ENGINES - 1]; + struct intel_instdone instdone; + + struct drm_i915_error_context { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 handle; + u32 hw_id; + int ban_score; + int active; + int guilty; + bool bannable; + struct i915_sched_attr sched_attr; + } context; + + struct drm_i915_error_object { + u64 gtt_offset; + u64 gtt_size; + int page_count; + int unused; + u32 *pages[0]; + } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page; + + struct drm_i915_error_object **user_bo; + long user_bo_count; + + struct drm_i915_error_object *wa_ctx; + struct drm_i915_error_object *default_state; + + struct drm_i915_error_request { + long jiffies; + pid_t pid; + u32 context; + int ban_score; + u32 seqno; + u32 start; + u32 head; + u32 tail; + struct i915_sched_attr sched_attr; + } *requests, execlist[EXECLIST_MAX_PORTS]; + unsigned int num_ports; + + struct drm_i915_error_waiter { + char comm[TASK_COMM_LEN]; + pid_t pid; + u32 seqno; + } *waiters; + + struct { + u32 gfx_mode; + union { + u64 pdp[4]; + u32 pp_dir_base; + }; + } vm_info; + } engine[I915_NUM_ENGINES]; + + struct drm_i915_error_buffer { + u32 size; + u32 name; + u32 rseqno[I915_NUM_ENGINES], wseqno; + u64 gtt_offset; + u32 read_domains; + u32 write_domain; + s32 fence_reg:I915_MAX_NUM_FENCE_BITS; + u32 tiling:2; + u32 dirty:1; + u32 purgeable:1; + u32 userptr:1; + s32 engine:4; + u32 cache_level:3; + } *active_bo[I915_NUM_ENGINES], *pinned_bo; + u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; + struct i915_address_space *active_vm[I915_NUM_ENGINES]; +}; + +struct i915_gpu_error { + /* For hangcheck timer */ +#define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ +#define DRM_I915_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD) + + struct delayed_work hangcheck_work; + + /* For reset and error_state handling. */ + spinlock_t lock; + /* Protected by the above dev->gpu_error.lock. */ + struct i915_gpu_state *first_error; + + atomic_t pending_fb_pin; + + unsigned long missed_irq_rings; + + /** + * State variable controlling the reset flow and count + * + * This is a counter which gets incremented when reset is triggered, + * + * Before the reset commences, the I915_RESET_BACKOFF bit is set + * meaning that any waiters holding onto the struct_mutex should + * relinquish the lock immediately in order for the reset to start. + * + * If reset is not completed successfully, the I915_WEDGE bit is + * set meaning that hardware is terminally sour and there is no + * recovery. All waiters on the reset_queue will be woken when + * that happens. + * + * This counter is used by the wait_seqno code to notice that reset + * event happened and it needs to restart the entire ioctl (since most + * likely the seqno it waited for won't ever signal anytime soon). + * + * This is important for lock-free wait paths, where no contended lock + * naturally enforces the correct ordering between the bail-out of the + * waiter and the gpu reset work code. + */ + unsigned long reset_count; + + /** + * flags: Control various stages of the GPU reset + * + * #I915_RESET_BACKOFF - When we start a reset, we want to stop any + * other users acquiring the struct_mutex. To do this we set the + * #I915_RESET_BACKOFF bit in the error flags when we detect a reset + * and then check for that bit before acquiring the struct_mutex (in + * i915_mutex_lock_interruptible()?). I915_RESET_BACKOFF serves a + * secondary role in preventing two concurrent global reset attempts. + * + * #I915_RESET_HANDOFF - To perform the actual GPU reset, we need the + * struct_mutex. We try to acquire the struct_mutex in the reset worker, + * but it may be held by some long running waiter (that we cannot + * interrupt without causing trouble). Once we are ready to do the GPU + * reset, we set the I915_RESET_HANDOFF bit and wakeup any waiters. If + * they already hold the struct_mutex and want to participate they can + * inspect the bit and do the reset directly, otherwise the worker + * waits for the struct_mutex. + * + * #I915_RESET_ENGINE[num_engines] - Since the driver doesn't need to + * acquire the struct_mutex to reset an engine, we need an explicit + * flag to prevent two concurrent reset attempts in the same engine. + * As the number of engines continues to grow, allocate the flags from + * the most significant bits. + * + * #I915_WEDGED - If reset fails and we can no longer use the GPU, + * we set the #I915_WEDGED bit. Prior to command submission, e.g. + * i915_request_alloc(), this bit is checked and the sequence + * aborted (with -EIO reported to userspace) if set. + */ + unsigned long flags; +#define I915_RESET_BACKOFF 0 +#define I915_RESET_HANDOFF 1 +#define I915_RESET_MODESET 2 +#define I915_WEDGED (BITS_PER_LONG - 1) +#define I915_RESET_ENGINE (I915_WEDGED - I915_NUM_ENGINES) + + /** Number of times an engine has been reset */ + u32 reset_engine_count[I915_NUM_ENGINES]; + + /** Set of stalled engines with guilty requests, in the current reset */ + u32 stalled_mask; + + /** Reason for the current *global* reset */ + const char *reason; + + /** + * Waitqueue to signal when a hang is detected. Used to for waiters + * to release the struct_mutex for the reset to procede. + */ + wait_queue_head_t wait_queue; + + /** + * Waitqueue to signal when the reset has completed. Used by clients + * that wait for dev_priv->mm.wedged to settle. + */ + wait_queue_head_t reset_queue; + + /* For missed irq/seqno simulation. */ + unsigned long test_irq_rings; +}; + +struct drm_i915_error_state_buf { + struct drm_i915_private *i915; + unsigned int bytes; + unsigned int size; + int err; + u8 *buf; + loff_t start; + loff_t pos; +}; + +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) + +__printf(2, 3) +void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); +int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, + const struct i915_gpu_state *gpu); +int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, + struct drm_i915_private *i915, + size_t count, loff_t pos); + +static inline void +i915_error_state_buf_release(struct drm_i915_error_state_buf *eb) +{ + kfree(eb->buf); +} + +struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); +void i915_capture_error_state(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *error_msg); + +static inline struct i915_gpu_state * +i915_gpu_state_get(struct i915_gpu_state *gpu) +{ + kref_get(&gpu->ref); + return gpu; +} + +void __i915_gpu_state_free(struct kref *kref); +static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) +{ + if (gpu) + kref_put(&gpu->ref, __i915_gpu_state_free); +} + +struct i915_gpu_state *i915_first_error_state(struct drm_i915_private *i915); +void i915_reset_error_state(struct drm_i915_private *i915); + +#else + +static inline void i915_capture_error_state(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *error_msg) +{ +} + +static inline struct i915_gpu_state * +i915_first_error_state(struct drm_i915_private *i915) +{ + return NULL; +} + +static inline void i915_reset_error_state(struct drm_i915_private *i915) +{ +} + +#endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */ + +#endif /* _I915_GPU_ERROR_H_ */ diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 633c187..f9bc3aa 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -243,6 +243,41 @@ void i915_hotplug_interrupt_update(struct drm_i915_private *dev_priv, spin_unlock_irq(&dev_priv->irq_lock); } +static u32 +gen11_gt_engine_identity(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit); + +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit) +{ + void __iomem * const regs = i915->regs; + u32 dw; + + lockdep_assert_held(&i915->irq_lock); + + dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (dw & BIT(bit)) { + /* + * According to the BSpec, DW_IIR bits cannot be cleared without + * first servicing the Selector & Shared IIR registers. + */ + gen11_gt_engine_identity(i915, bank, bit); + + /* + * We locked GT INT DW by reading it. If we want to (try + * to) recover from this succesfully, we need to clear + * our bit, otherwise we are locking the register for + * everybody. + */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), BIT(bit)); + + return true; + } + + return false; +} + /** * ilk_update_display_irq - update DEIMR * @dev_priv: driver private @@ -308,17 +343,29 @@ void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask) static i915_reg_t gen6_pm_iir(struct drm_i915_private *dev_priv) { + WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11); + return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IIR(2) : GEN6_PMIIR; } static i915_reg_t gen6_pm_imr(struct drm_i915_private *dev_priv) { - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IMR(2) : GEN6_PMIMR; + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_GPM_WGBOXPERF_INTR_MASK; + else if (INTEL_GEN(dev_priv) >= 8) + return GEN8_GT_IMR(2); + else + return GEN6_PMIMR; } static i915_reg_t gen6_pm_ier(struct drm_i915_private *dev_priv) { - return INTEL_GEN(dev_priv) >= 8 ? GEN8_GT_IER(2) : GEN6_PMIER; + if (INTEL_GEN(dev_priv) >= 11) + return GEN11_GPM_WGBOXPERF_INTR_ENABLE; + else if (INTEL_GEN(dev_priv) >= 8) + return GEN8_GT_IER(2); + else + return GEN6_PMIER; } /** @@ -400,6 +447,18 @@ static void gen6_disable_pm_irq(struct drm_i915_private *dev_priv, u32 disable_m /* though a barrier is missing here, but don't really need a one */ } +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv) +{ + spin_lock_irq(&dev_priv->irq_lock); + + while (gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)) + ; + + dev_priv->gt_pm.rps.pm_iir = 0; + + spin_unlock_irq(&dev_priv->irq_lock); +} + void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv) { spin_lock_irq(&dev_priv->irq_lock); @@ -415,12 +474,14 @@ void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv) if (READ_ONCE(rps->interrupts_enabled)) return; - if (WARN_ON_ONCE(IS_GEN11(dev_priv))) - return; - spin_lock_irq(&dev_priv->irq_lock); WARN_ON_ONCE(rps->pm_iir); - WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + + if (INTEL_GEN(dev_priv) >= 11) + WARN_ON_ONCE(gen11_reset_one_iir(dev_priv, 0, GEN11_GTPM)); + else + WARN_ON_ONCE(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events); + rps->interrupts_enabled = true; gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events); @@ -434,9 +495,6 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) if (!READ_ONCE(rps->interrupts_enabled)) return; - if (WARN_ON_ONCE(IS_GEN11(dev_priv))) - return; - spin_lock_irq(&dev_priv->irq_lock); rps->interrupts_enabled = false; @@ -453,7 +511,10 @@ void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv) * state of the worker can be discarded. */ cancel_work_sync(&rps->work); - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); + else + gen6_reset_rps_interrupts(dev_priv); } void gen9_reset_guc_interrupts(struct drm_i915_private *dev_priv) @@ -1399,19 +1460,18 @@ static void snb_gt_irq_handler(struct drm_i915_private *dev_priv, } static void -gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift) +gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir) { struct intel_engine_execlists * const execlists = &engine->execlists; bool tasklet = false; - if (iir & (GT_CONTEXT_SWITCH_INTERRUPT << test_shift)) { - if (READ_ONCE(engine->execlists.active)) { - __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - tasklet = true; - } + if (iir & GT_CONTEXT_SWITCH_INTERRUPT) { + if (READ_ONCE(engine->execlists.active)) + tasklet = !test_and_set_bit(ENGINE_IRQ_EXECLIST, + &engine->irq_posted); } - if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) { + if (iir & GT_RENDER_USER_INTERRUPT) { notify_ring(engine); tasklet |= USES_GUC_SUBMISSION(engine->i915); } @@ -1466,21 +1526,21 @@ static void gen8_gt_irq_handler(struct drm_i915_private *i915, { if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) { gen8_cs_irq_handler(i915->engine[RCS], - gt_iir[0], GEN8_RCS_IRQ_SHIFT); + gt_iir[0] >> GEN8_RCS_IRQ_SHIFT); gen8_cs_irq_handler(i915->engine[BCS], - gt_iir[0], GEN8_BCS_IRQ_SHIFT); + gt_iir[0] >> GEN8_BCS_IRQ_SHIFT); } if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) { gen8_cs_irq_handler(i915->engine[VCS], - gt_iir[1], GEN8_VCS1_IRQ_SHIFT); + gt_iir[1] >> GEN8_VCS1_IRQ_SHIFT); gen8_cs_irq_handler(i915->engine[VCS2], - gt_iir[1], GEN8_VCS2_IRQ_SHIFT); + gt_iir[1] >> GEN8_VCS2_IRQ_SHIFT); } if (master_ctl & GEN8_GT_VECS_IRQ) { gen8_cs_irq_handler(i915->engine[VECS], - gt_iir[3], GEN8_VECS_IRQ_SHIFT); + gt_iir[3] >> GEN8_VECS_IRQ_SHIFT); } if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) { @@ -1627,7 +1687,7 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, int head, tail; spin_lock(&pipe_crc->lock); - if (pipe_crc->source) { + if (pipe_crc->source && !crtc->base.crc.opened) { if (!pipe_crc->entries) { spin_unlock(&pipe_crc->lock); DRM_DEBUG_KMS("spurious interrupt\n"); @@ -1667,7 +1727,7 @@ static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv, * On GEN8+ sometimes the second CRC is bonkers as well, so * don't trust that one either. */ - if (pipe_crc->skipped == 0 || + if (pipe_crc->skipped <= 0 || (INTEL_GEN(dev_priv) >= 8 && pipe_crc->skipped == 1)) { pipe_crc->skipped++; spin_unlock(&pipe_crc->lock); @@ -1766,37 +1826,8 @@ static void gen6_rps_irq_handler(struct drm_i915_private *dev_priv, u32 pm_iir) static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir) { - if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) { - /* Sample the log buffer flush related bits & clear them out now - * itself from the message identity register to minimize the - * probability of losing a flush interrupt, when there are back - * to back flush interrupts. - * There can be a new flush interrupt, for different log buffer - * type (like for ISR), whilst Host is handling one (for DPC). - * Since same bit is used in message register for ISR & DPC, it - * could happen that GuC sets the bit for 2nd interrupt but Host - * clears out the bit on handling the 1st interrupt. - */ - u32 msg, flush; - - msg = I915_READ(SOFT_SCRATCH(15)); - flush = msg & (INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED | - INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER); - if (flush) { - /* Clear the message bits that are handled */ - I915_WRITE(SOFT_SCRATCH(15), msg & ~flush); - - /* Handle flush interrupt in bottom half */ - queue_work(dev_priv->guc.log.runtime.flush_wq, - &dev_priv->guc.log.runtime.flush_work); - - dev_priv->guc.log.flush_interrupt_count++; - } else { - /* Not clearing of unhandled event bits won't result in - * re-triggering of the interrupt. - */ - } - } + if (gt_iir & GEN9_GUC_TO_HOST_INT_EVENT) + intel_guc_to_host_event_handler(&dev_priv->guc); } static void i9xx_pipestat_irq_reset(struct drm_i915_private *dev_priv) @@ -2433,6 +2464,13 @@ static void ivb_display_irq_handler(struct drm_i915_private *dev_priv, if (de_iir & DE_ERR_INT_IVB) ivb_err_int_handler(dev_priv); + if (de_iir & DE_EDP_PSR_INT_HSW) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + } + if (de_iir & DE_AUX_CHANNEL_A_IVB) dp_aux_irq_handler(dev_priv); @@ -2562,11 +2600,25 @@ gen8_de_irq_handler(struct drm_i915_private *dev_priv, u32 master_ctl) if (master_ctl & GEN8_DE_MISC_IRQ) { iir = I915_READ(GEN8_DE_MISC_IIR); if (iir) { + bool found = false; + I915_WRITE(GEN8_DE_MISC_IIR, iir); ret = IRQ_HANDLED; - if (iir & GEN8_DE_MISC_GSE) + + if (iir & GEN8_DE_MISC_GSE) { intel_opregion_asle_intr(dev_priv); - else + found = true; + } + + if (iir & GEN8_DE_EDP_PSR) { + u32 psr_iir = I915_READ(EDP_PSR_IIR); + + intel_psr_irq_handler(dev_priv, psr_iir); + I915_WRITE(EDP_PSR_IIR, psr_iir); + found = true; + } + + if (!found) DRM_ERROR("Unexpected DE Misc interrupt\n"); } else @@ -2762,58 +2814,16 @@ static void __fini_wedge(struct wedge_me *w) (W)->i915; \ __fini_wedge((W))) -static __always_inline void -gen11_cs_irq_handler(struct intel_engine_cs * const engine, const u32 iir) -{ - gen8_cs_irq_handler(engine, iir, 0); -} - -static void -gen11_gt_engine_irq_handler(struct drm_i915_private * const i915, - const unsigned int bank, - const unsigned int engine_n, - const u16 iir) -{ - struct intel_engine_cs ** const engine = i915->engine; - - switch (bank) { - case 0: - switch (engine_n) { - - case GEN11_RCS0: - return gen11_cs_irq_handler(engine[RCS], iir); - - case GEN11_BCS: - return gen11_cs_irq_handler(engine[BCS], iir); - } - case 1: - switch (engine_n) { - - case GEN11_VCS(0): - return gen11_cs_irq_handler(engine[_VCS(0)], iir); - case GEN11_VCS(1): - return gen11_cs_irq_handler(engine[_VCS(1)], iir); - case GEN11_VCS(2): - return gen11_cs_irq_handler(engine[_VCS(2)], iir); - case GEN11_VCS(3): - return gen11_cs_irq_handler(engine[_VCS(3)], iir); - - case GEN11_VECS(0): - return gen11_cs_irq_handler(engine[_VECS(0)], iir); - case GEN11_VECS(1): - return gen11_cs_irq_handler(engine[_VECS(1)], iir); - } - } -} - static u32 -gen11_gt_engine_intr(struct drm_i915_private * const i915, - const unsigned int bank, const unsigned int bit) +gen11_gt_engine_identity(struct drm_i915_private * const i915, + const unsigned int bank, const unsigned int bit) { void __iomem * const regs = i915->regs; u32 timeout_ts; u32 ident; + lockdep_assert_held(&i915->irq_lock); + raw_reg_write(regs, GEN11_IIR_REG_SELECTOR(bank), BIT(bit)); /* @@ -2835,42 +2845,101 @@ gen11_gt_engine_intr(struct drm_i915_private * const i915, raw_reg_write(regs, GEN11_INTR_IDENTITY_REG(bank), GEN11_INTR_DATA_VALID); - return ident & GEN11_INTR_ENGINE_MASK; + return ident; } static void -gen11_gt_irq_handler(struct drm_i915_private * const i915, - const u32 master_ctl) +gen11_other_irq_handler(struct drm_i915_private * const i915, + const u8 instance, const u16 iir) +{ + if (instance == OTHER_GTPM_INSTANCE) + return gen6_rps_irq_handler(i915, iir); + + WARN_ONCE(1, "unhandled other interrupt instance=0x%x, iir=0x%x\n", + instance, iir); +} + +static void +gen11_engine_irq_handler(struct drm_i915_private * const i915, + const u8 class, const u8 instance, const u16 iir) +{ + struct intel_engine_cs *engine; + + if (instance <= MAX_ENGINE_INSTANCE) + engine = i915->engine_class[class][instance]; + else + engine = NULL; + + if (likely(engine)) + return gen8_cs_irq_handler(engine, iir); + + WARN_ONCE(1, "unhandled engine interrupt class=0x%x, instance=0x%x\n", + class, instance); +} + +static void +gen11_gt_identity_handler(struct drm_i915_private * const i915, + const u32 identity) +{ + const u8 class = GEN11_INTR_ENGINE_CLASS(identity); + const u8 instance = GEN11_INTR_ENGINE_INSTANCE(identity); + const u16 intr = GEN11_INTR_ENGINE_INTR(identity); + + if (unlikely(!intr)) + return; + + if (class <= COPY_ENGINE_CLASS) + return gen11_engine_irq_handler(i915, class, instance, intr); + + if (class == OTHER_CLASS) + return gen11_other_irq_handler(i915, instance, intr); + + WARN_ONCE(1, "unknown interrupt class=0x%x, instance=0x%x, intr=0x%x\n", + class, instance, intr); +} + +static void +gen11_gt_bank_handler(struct drm_i915_private * const i915, + const unsigned int bank) { void __iomem * const regs = i915->regs; - unsigned int bank; + unsigned long intr_dw; + unsigned int bit; - for (bank = 0; bank < 2; bank++) { - unsigned long intr_dw; - unsigned int bit; + lockdep_assert_held(&i915->irq_lock); - if (!(master_ctl & GEN11_GT_DW_IRQ(bank))) - continue; + intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); - intr_dw = raw_reg_read(regs, GEN11_GT_INTR_DW(bank)); + if (unlikely(!intr_dw)) { + DRM_ERROR("GT_INTR_DW%u blank!\n", bank); + return; + } - if (unlikely(!intr_dw)) { - DRM_ERROR("GT_INTR_DW%u blank!\n", bank); - continue; - } + for_each_set_bit(bit, &intr_dw, 32) { + const u32 ident = gen11_gt_engine_identity(i915, + bank, bit); - for_each_set_bit(bit, &intr_dw, 32) { - const u16 iir = gen11_gt_engine_intr(i915, bank, bit); + gen11_gt_identity_handler(i915, ident); + } - if (unlikely(!iir)) - continue; + /* Clear must be after shared has been served for engine */ + raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); +} - gen11_gt_engine_irq_handler(i915, bank, bit, iir); - } +static void +gen11_gt_irq_handler(struct drm_i915_private * const i915, + const u32 master_ctl) +{ + unsigned int bank; - /* Clear must be after shared has been served for engine */ - raw_reg_write(regs, GEN11_GT_INTR_DW(bank), intr_dw); + spin_lock(&i915->irq_lock); + + for (bank = 0; bank < 2; bank++) { + if (master_ctl & GEN11_GT_DW_IRQ(bank)) + gen11_gt_bank_handler(i915, bank); } + + spin_unlock(&i915->irq_lock); } static irqreturn_t gen11_irq_handler(int irq, void *arg) @@ -2912,15 +2981,11 @@ static irqreturn_t gen11_irq_handler(int irq, void *arg) return IRQ_HANDLED; } -/** - * i915_reset_device - do process context error handling work - * @dev_priv: i915 device private - * - * Fire an error uevent so userspace can see that a hang or error - * was detected. - */ -static void i915_reset_device(struct drm_i915_private *dev_priv) +static void i915_reset_device(struct drm_i915_private *dev_priv, + u32 engine_mask, + const char *reason) { + struct i915_gpu_error *error = &dev_priv->gpu_error; struct kobject *kobj = &dev_priv->drm.primary->kdev->kobj; char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; @@ -2936,29 +3001,35 @@ static void i915_reset_device(struct drm_i915_private *dev_priv) i915_wedge_on_timeout(&w, dev_priv, 5*HZ) { intel_prepare_reset(dev_priv); + error->reason = reason; + error->stalled_mask = engine_mask; + /* Signal that locked waiters should reset the GPU */ - set_bit(I915_RESET_HANDOFF, &dev_priv->gpu_error.flags); - wake_up_all(&dev_priv->gpu_error.wait_queue); + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); + wake_up_all(&error->wait_queue); /* Wait for anyone holding the lock to wakeup, without * blocking indefinitely on struct_mutex. */ do { if (mutex_trylock(&dev_priv->drm.struct_mutex)) { - i915_reset(dev_priv, 0); + i915_reset(dev_priv, engine_mask, reason); mutex_unlock(&dev_priv->drm.struct_mutex); } - } while (wait_on_bit_timeout(&dev_priv->gpu_error.flags, + } while (wait_on_bit_timeout(&error->flags, I915_RESET_HANDOFF, TASK_UNINTERRUPTIBLE, 1)); + error->stalled_mask = 0; + error->reason = NULL; + intel_finish_reset(dev_priv); } - if (!test_bit(I915_WEDGED, &dev_priv->gpu_error.flags)) - kobject_uevent_env(kobj, - KOBJ_CHANGE, reset_done_event); + if (!test_bit(I915_WEDGED, &error->flags)) + kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); } static void i915_clear_error_registers(struct drm_i915_private *dev_priv) @@ -2990,6 +3061,7 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) * i915_handle_error - handle a gpu error * @dev_priv: i915 device private * @engine_mask: mask representing engines that are hung + * @flags: control flags * @fmt: Error message format string * * Do some basic checking of register state at error time and @@ -3000,16 +3072,23 @@ static void i915_clear_error_registers(struct drm_i915_private *dev_priv) */ void i915_handle_error(struct drm_i915_private *dev_priv, u32 engine_mask, + unsigned long flags, const char *fmt, ...) { struct intel_engine_cs *engine; unsigned int tmp; - va_list args; char error_msg[80]; + char *msg = NULL; - va_start(args, fmt); - vscnprintf(error_msg, sizeof(error_msg), fmt, args); - va_end(args); + if (fmt) { + va_list args; + + va_start(args, fmt); + vscnprintf(error_msg, sizeof(error_msg), fmt, args); + va_end(args); + + msg = error_msg; + } /* * In most cases it's guaranteed that we get here with an RPM @@ -3020,8 +3099,12 @@ void i915_handle_error(struct drm_i915_private *dev_priv, */ intel_runtime_pm_get(dev_priv); - i915_capture_error_state(dev_priv, engine_mask, error_msg); - i915_clear_error_registers(dev_priv); + engine_mask &= INTEL_INFO(dev_priv)->ring_mask; + + if (flags & I915_ERROR_CAPTURE) { + i915_capture_error_state(dev_priv, engine_mask, msg); + i915_clear_error_registers(dev_priv); + } /* * Try engine reset when available. We fall back to full reset if @@ -3034,7 +3117,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, &dev_priv->gpu_error.flags)) continue; - if (i915_reset_engine(engine, 0) == 0) + if (i915_reset_engine(engine, msg) == 0) engine_mask &= ~intel_engine_flag(engine); clear_bit(I915_RESET_ENGINE + engine->id, @@ -3064,7 +3147,7 @@ void i915_handle_error(struct drm_i915_private *dev_priv, TASK_UNINTERRUPTIBLE); } - i915_reset_device(dev_priv); + i915_reset_device(dev_priv, engine_mask, msg); for_each_engine(engine, dev_priv, tmp) { clear_bit(I915_RESET_ENGINE + engine->id, @@ -3286,6 +3369,11 @@ static void ironlake_irq_reset(struct drm_device *dev) if (IS_GEN7(dev_priv)) I915_WRITE(GEN7_ERR_INT, 0xffffffff); + if (IS_HASWELL(dev_priv)) { + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + } + gen5_gt_irq_reset(dev_priv); ibx_irq_reset(dev_priv); @@ -3324,6 +3412,9 @@ static void gen8_irq_reset(struct drm_device *dev) gen8_gt_irq_reset(dev_priv); + I915_WRITE(EDP_PSR_IMR, 0xffffffff); + I915_WRITE(EDP_PSR_IIR, 0xffffffff); + for_each_pipe(dev_priv, pipe) if (intel_display_power_is_enabled(dev_priv, POWER_DOMAIN_PIPE(pipe))) @@ -3349,6 +3440,9 @@ static void gen11_gt_irq_reset(struct drm_i915_private *dev_priv) I915_WRITE(GEN11_VCS0_VCS1_INTR_MASK, ~0); I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~0); I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~0); + + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); } static void gen11_irq_reset(struct drm_device *dev) @@ -3697,6 +3791,12 @@ static int ironlake_irq_postinstall(struct drm_device *dev) DE_DP_A_HOTPLUG); } + if (IS_HASWELL(dev_priv)) { + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + display_mask |= DE_EDP_PSR_INT_HSW; + } + dev_priv->irq_mask = ~display_mask; ibx_irq_pre_postinstall(dev); @@ -3807,7 +3907,7 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) uint32_t de_pipe_enables; u32 de_port_masked = GEN8_AUX_CHANNEL_A; u32 de_port_enables; - u32 de_misc_masked = GEN8_DE_MISC_GSE; + u32 de_misc_masked = GEN8_DE_MISC_GSE | GEN8_DE_EDP_PSR; enum pipe pipe; if (INTEL_GEN(dev_priv) >= 9) { @@ -3832,6 +3932,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv) else if (IS_BROADWELL(dev_priv)) de_port_enables |= GEN8_PORT_DP_A_HOTPLUG; + gen3_assert_iir_is_zero(dev_priv, EDP_PSR_IIR); + intel_psr_irq_control(dev_priv, dev_priv->psr.debug); + for_each_pipe(dev_priv, pipe) { dev_priv->de_irq_mask[pipe] = ~de_pipe_masked; @@ -3887,7 +3990,14 @@ static void gen11_gt_irq_postinstall(struct drm_i915_private *dev_priv) I915_WRITE(GEN11_VCS2_VCS3_INTR_MASK, ~(irqs | irqs << 16)); I915_WRITE(GEN11_VECS0_VECS1_INTR_MASK, ~(irqs | irqs << 16)); - dev_priv->pm_imr = 0xffffffff; /* TODO */ + /* + * RPS interrupts will get enabled/disabled on demand when RPS itself + * is enabled/disabled. + */ + dev_priv->pm_ier = 0x0; + dev_priv->pm_imr = ~dev_priv->pm_ier; + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_ENABLE, 0); + I915_WRITE(GEN11_GPM_WGBOXPERF_INTR_MASK, ~0); } static int gen11_irq_postinstall(struct drm_device *dev) diff --git a/drivers/gpu/drm/i915/i915_oa_icl.c b/drivers/gpu/drm/i915/i915_oa_icl.c new file mode 100644 index 0000000..a566792 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_icl.c @@ -0,0 +1,118 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include <linux/sysfs.h> + +#include "i915_drv.h" +#include "i915_oa_icl.h" + +static const struct i915_oa_reg b_counter_config_test_oa[] = { + { _MMIO(0x2740), 0x00000000 }, + { _MMIO(0x2710), 0x00000000 }, + { _MMIO(0x2714), 0xf0800000 }, + { _MMIO(0x2720), 0x00000000 }, + { _MMIO(0x2724), 0xf0800000 }, + { _MMIO(0x2770), 0x00000004 }, + { _MMIO(0x2774), 0x0000ffff }, + { _MMIO(0x2778), 0x00000003 }, + { _MMIO(0x277c), 0x0000ffff }, + { _MMIO(0x2780), 0x00000007 }, + { _MMIO(0x2784), 0x0000ffff }, + { _MMIO(0x2788), 0x00100002 }, + { _MMIO(0x278c), 0x0000fff7 }, + { _MMIO(0x2790), 0x00100002 }, + { _MMIO(0x2794), 0x0000ffcf }, + { _MMIO(0x2798), 0x00100082 }, + { _MMIO(0x279c), 0x0000ffef }, + { _MMIO(0x27a0), 0x001000c2 }, + { _MMIO(0x27a4), 0x0000ffe7 }, + { _MMIO(0x27a8), 0x00100001 }, + { _MMIO(0x27ac), 0x0000ffe7 }, +}; + +static const struct i915_oa_reg flex_eu_config_test_oa[] = { +}; + +static const struct i915_oa_reg mux_config_test_oa[] = { + { _MMIO(0xd04), 0x00000200 }, + { _MMIO(0x9840), 0x00000000 }, + { _MMIO(0x9884), 0x00000000 }, + { _MMIO(0x9888), 0x10060000 }, + { _MMIO(0x9888), 0x22060000 }, + { _MMIO(0x9888), 0x16060000 }, + { _MMIO(0x9888), 0x24060000 }, + { _MMIO(0x9888), 0x18060000 }, + { _MMIO(0x9888), 0x1a060000 }, + { _MMIO(0x9888), 0x12060000 }, + { _MMIO(0x9888), 0x14060000 }, + { _MMIO(0x9888), 0x10060000 }, + { _MMIO(0x9888), 0x22060000 }, + { _MMIO(0x9884), 0x00000003 }, + { _MMIO(0x9888), 0x16130000 }, + { _MMIO(0x9888), 0x24000001 }, + { _MMIO(0x9888), 0x0e130056 }, + { _MMIO(0x9888), 0x10130000 }, + { _MMIO(0x9888), 0x1a130000 }, + { _MMIO(0x9888), 0x541f0001 }, + { _MMIO(0x9888), 0x181f0000 }, + { _MMIO(0x9888), 0x4c1f0000 }, + { _MMIO(0x9888), 0x301f0000 }, +}; + +static ssize_t +show_test_oa_id(struct device *kdev, struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "1\n"); +} + +void +i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv) +{ + strlcpy(dev_priv->perf.oa.test_config.uuid, + "a291665e-244b-4b76-9b9a-01de9d3c8068", + sizeof(dev_priv->perf.oa.test_config.uuid)); + dev_priv->perf.oa.test_config.id = 1; + + dev_priv->perf.oa.test_config.mux_regs = mux_config_test_oa; + dev_priv->perf.oa.test_config.mux_regs_len = ARRAY_SIZE(mux_config_test_oa); + + dev_priv->perf.oa.test_config.b_counter_regs = b_counter_config_test_oa; + dev_priv->perf.oa.test_config.b_counter_regs_len = ARRAY_SIZE(b_counter_config_test_oa); + + dev_priv->perf.oa.test_config.flex_regs = flex_eu_config_test_oa; + dev_priv->perf.oa.test_config.flex_regs_len = ARRAY_SIZE(flex_eu_config_test_oa); + + dev_priv->perf.oa.test_config.sysfs_metric.name = "a291665e-244b-4b76-9b9a-01de9d3c8068"; + dev_priv->perf.oa.test_config.sysfs_metric.attrs = dev_priv->perf.oa.test_config.attrs; + + dev_priv->perf.oa.test_config.attrs[0] = &dev_priv->perf.oa.test_config.sysfs_metric_id.attr; + + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.name = "id"; + dev_priv->perf.oa.test_config.sysfs_metric_id.attr.mode = 0444; + dev_priv->perf.oa.test_config.sysfs_metric_id.show = show_test_oa_id; +} diff --git a/drivers/gpu/drm/i915/i915_oa_icl.h b/drivers/gpu/drm/i915/i915_oa_icl.h new file mode 100644 index 0000000..ae1c24a --- /dev/null +++ b/drivers/gpu/drm/i915/i915_oa_icl.h @@ -0,0 +1,34 @@ +/* + * Autogenerated file by GPU Top : https://github.com/rib/gputop + * DO NOT EDIT manually! + * + * + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef __I915_OA_ICL_H__ +#define __I915_OA_ICL_H__ + +extern void i915_perf_load_test_config_icl(struct drm_i915_private *dev_priv); + +#endif diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c index 08108ce..66ea355 100644 --- a/drivers/gpu/drm/i915/i915_params.c +++ b/drivers/gpu/drm/i915/i915_params.c @@ -164,6 +164,9 @@ i915_param_named_unsafe(guc_firmware_path, charp, 0400, i915_param_named_unsafe(huc_firmware_path, charp, 0400, "HuC firmware path to use instead of the default one"); +i915_param_named_unsafe(dmc_firmware_path, charp, 0400, + "DMC firmware path to use instead of the default one"); + i915_param_named_unsafe(enable_dp_mst, bool, 0600, "Enable multi-stream transport (MST) for new DisplayPort sinks. (default: true)"); diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h index 430f5f9..6684025 100644 --- a/drivers/gpu/drm/i915/i915_params.h +++ b/drivers/gpu/drm/i915/i915_params.h @@ -48,9 +48,10 @@ struct drm_printer; param(int, enable_ips, 1) \ param(int, invert_brightness, 0) \ param(int, enable_guc, 0) \ - param(int, guc_log_level, 0) \ + param(int, guc_log_level, -1) \ param(char *, guc_firmware_path, NULL) \ param(char *, huc_firmware_path, NULL) \ + param(char *, dmc_firmware_path, NULL) \ param(int, mmio_debug, 0) \ param(int, edp_vswing, 0) \ param(int, reset, 2) \ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 062e91b..4364922 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -602,6 +602,7 @@ static const struct intel_device_info intel_icelake_11_info = { PLATFORM(INTEL_ICELAKE), .is_alpha_support = 1, .has_resource_streamer = 0, + .ring_mask = RENDER_RING | BLT_RING | VEBOX_RING | BSD_RING | BSD3_RING, }; #undef GEN diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index abaca6e..019bd2d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -209,6 +209,7 @@ #include "i915_oa_cflgt2.h" #include "i915_oa_cflgt3.h" #include "i915_oa_cnl.h" +#include "i915_oa_icl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -1042,7 +1043,7 @@ static int gen7_append_oa_reports(struct i915_perf_stream *stream, I915_WRITE(GEN7_OASTATUS2, ((head & GEN7_OASTATUS2_HEAD_MASK) | - OA_MEM_SELECT_GGTT)); + GEN7_OASTATUS2_MEM_SELECT_GGTT)); dev_priv->perf.oa.oa_buffer.head = head; spin_unlock_irqrestore(&dev_priv->perf.oa.oa_buffer.ptr_lock, flags); @@ -1233,7 +1234,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * * NB: implied RCS engine... */ - ring = engine->context_pin(engine, stream->ctx); + ring = intel_context_pin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -1245,7 +1246,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream) * with gen8+ and execlists */ dev_priv->perf.oa.specific_ctx_id = - i915_ggtt_offset(stream->ctx->engine[engine->id].state); + i915_ggtt_offset(to_intel_context(stream->ctx, engine)->state); } return 0; @@ -1270,7 +1271,7 @@ static void oa_put_render_ctx_id(struct i915_perf_stream *stream) mutex_lock(&dev_priv->drm.struct_mutex); dev_priv->perf.oa.specific_ctx_id = INVALID_CTX_ID; - engine->context_unpin(engine, stream->ctx); + intel_context_unpin(stream->ctx, engine); mutex_unlock(&dev_priv->drm.struct_mutex); } @@ -1332,7 +1333,8 @@ static void gen7_init_oa_buffer(struct drm_i915_private *dev_priv) /* Pre-DevBDW: OABUFFER must be set with counters off, * before OASTATUS1, but after OASTATUS2 */ - I915_WRITE(GEN7_OASTATUS2, gtt_offset | OA_MEM_SELECT_GGTT); /* head */ + I915_WRITE(GEN7_OASTATUS2, + gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ dev_priv->perf.oa.oa_buffer.head = gtt_offset; I915_WRITE(GEN7_OABUFFER, gtt_offset); @@ -1392,7 +1394,7 @@ static void gen8_init_oa_buffer(struct drm_i915_private *dev_priv) * bit." */ I915_WRITE(GEN8_OABUFFER, gtt_offset | - OABUFFER_SIZE_16M | OA_MEM_SELECT_GGTT); + OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); /* Mark that we need updated tail pointers to read from... */ @@ -1693,7 +1695,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr const struct i915_oa_config *oa_config) { struct intel_engine_cs *engine = dev_priv->engine[RCS]; - struct i915_gem_timeline *timeline; + struct i915_timeline *timeline; struct i915_request *rq; int ret; @@ -1714,15 +1716,11 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr /* Queue this switch after all other activity */ list_for_each_entry(timeline, &dev_priv->gt.timelines, link) { struct i915_request *prev; - struct intel_timeline *tl; - tl = &timeline->engine[engine->id]; - prev = i915_gem_active_raw(&tl->last_request, + prev = i915_gem_active_raw(&timeline->last_request, &dev_priv->drm.struct_mutex); if (prev) - i915_sw_fence_await_sw_fence_gfp(&rq->submit, - &prev->submit, - GFP_KERNEL); + i915_request_await_dma_fence(rq, &prev->fence); } i915_request_add(rq); @@ -1757,6 +1755,7 @@ static int gen8_switch_to_updated_kernel_context(struct drm_i915_private *dev_pr static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, const struct i915_oa_config *oa_config) { + struct intel_engine_cs *engine = dev_priv->engine[RCS]; struct i915_gem_context *ctx; int ret; unsigned int wait_flags = I915_WAIT_LOCKED; @@ -1787,7 +1786,7 @@ static int gen8_configure_all_contexts(struct drm_i915_private *dev_priv, /* Update all contexts now that we've stalled the submission. */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { - struct intel_context *ce = &ctx->engine[RCS]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 *regs; /* OA settings will be set upon first use */ @@ -1840,7 +1839,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv, * be read back from automatically triggered reports, as part of the * RPT_ID field. */ - if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) { + if (IS_GEN(dev_priv, 9, 11)) { I915_WRITE(GEN8_OA_DEBUG, _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); @@ -1870,7 +1869,6 @@ static void gen8_disable_metric_set(struct drm_i915_private *dev_priv) I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & ~GT_NOA_ENABLE)); - } static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) @@ -1885,6 +1883,13 @@ static void gen10_disable_metric_set(struct drm_i915_private *dev_priv) static void gen7_oa_enable(struct drm_i915_private *dev_priv) { + struct i915_gem_context *ctx = + dev_priv->perf.oa.exclusive_stream->ctx; + u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; + bool periodic = dev_priv->perf.oa.periodic; + u32 period_exponent = dev_priv->perf.oa.period_exponent; + u32 report_format = dev_priv->perf.oa.oa_buffer.format; + /* * Reset buf pointers so we don't forward reports from before now. * @@ -1896,25 +1901,14 @@ static void gen7_oa_enable(struct drm_i915_private *dev_priv) */ gen7_init_oa_buffer(dev_priv); - if (dev_priv->perf.oa.exclusive_stream->enabled) { - struct i915_gem_context *ctx = - dev_priv->perf.oa.exclusive_stream->ctx; - u32 ctx_id = dev_priv->perf.oa.specific_ctx_id; - - bool periodic = dev_priv->perf.oa.periodic; - u32 period_exponent = dev_priv->perf.oa.period_exponent; - u32 report_format = dev_priv->perf.oa.oa_buffer.format; - - I915_WRITE(GEN7_OACONTROL, - (ctx_id & GEN7_OACONTROL_CTX_MASK) | - (period_exponent << - GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | - (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | - (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | - (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | - GEN7_OACONTROL_ENABLE); - } else - I915_WRITE(GEN7_OACONTROL, 0); + I915_WRITE(GEN7_OACONTROL, + (ctx_id & GEN7_OACONTROL_CTX_MASK) | + (period_exponent << + GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | + (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | + (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | + (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | + GEN7_OACONTROL_ENABLE); } static void gen8_oa_enable(struct drm_i915_private *dev_priv) @@ -1966,11 +1960,19 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) static void gen7_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN7_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } static void gen8_oa_disable(struct drm_i915_private *dev_priv) { I915_WRITE(GEN8_OACONTROL, 0); + if (intel_wait_for_register(dev_priv, + GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, + 50)) + DRM_ERROR("wait for OA to be disabled timed out\n"); } /** @@ -2099,13 +2101,17 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, if (stream->ctx) { ret = oa_get_render_ctx_id(stream); - if (ret) + if (ret) { + DRM_DEBUG("Invalid context id to filter with\n"); return ret; + } } ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); - if (ret) + if (ret) { + DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); goto err_config; + } /* PRM - observability performance counters: * @@ -2132,8 +2138,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, ret = dev_priv->perf.oa.ops.enable_metric_set(dev_priv, stream->oa_config); - if (ret) + if (ret) { + DRM_DEBUG("Unable to enable metric set\n"); goto err_enable; + } stream->ops = &i915_oa_stream_ops; @@ -2745,7 +2753,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv, props->ctx_handle = value; break; case DRM_I915_PERF_PROP_SAMPLE_OA: - props->sample_flags |= SAMPLE_OA_REPORT; + if (value) + props->sample_flags |= SAMPLE_OA_REPORT; break; case DRM_I915_PERF_PROP_OA_METRICS_SET: if (value == 0) { @@ -2935,6 +2944,8 @@ void i915_perf_register(struct drm_i915_private *dev_priv) i915_perf_load_test_config_cflgt3(dev_priv); } else if (IS_CANNONLAKE(dev_priv)) { i915_perf_load_test_config_cnl(dev_priv); + } else if (IS_ICELAKE(dev_priv)) { + i915_perf_load_test_config_icl(dev_priv); } if (dev_priv->perf.oa.test_config.id == 0) @@ -3292,6 +3303,8 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, mutex_unlock(&dev_priv->perf.metrics_lock); + DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); + return oa_config->id; sysfs_err: @@ -3348,6 +3361,9 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, &oa_config->sysfs_metric); idr_remove(&dev_priv->perf.metrics_idr, *arg); + + DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); + put_oa_config(dev_priv, oa_config); config_err: @@ -3467,7 +3483,7 @@ void i915_perf_init(struct drm_i915_private *dev_priv) dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16); } - } else if (IS_GEN10(dev_priv)) { + } else if (IS_GEN(dev_priv, 10, 11)) { dev_priv->perf.oa.ops.is_valid_b_counter_reg = gen7_is_valid_b_counter_addr; dev_priv->perf.oa.ops.is_valid_mux_reg = diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d8feb90..dc87797 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -1,33 +1,12 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ -#include <linux/perf_event.h> -#include <linux/pm_runtime.h> - -#include "i915_drv.h" #include "i915_pmu.h" #include "intel_ringbuffer.h" +#include "i915_drv.h" /* Frequency for the sampling timer for events which need it. */ #define FREQUENCY 200 @@ -473,20 +452,37 @@ static u64 get_rc6(struct drm_i915_private *i915) spin_lock_irqsave(&i915->pmu.lock, flags); spin_lock(&kdev->power.lock); - if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) - i915->pmu.suspended_jiffies_last = - kdev->power.suspended_jiffies; + /* + * After the above branch intel_runtime_pm_get_if_in_use failed + * to get the runtime PM reference we cannot assume we are in + * runtime suspend since we can either: a) race with coming out + * of it before we took the power.lock, or b) there are other + * states than suspended which can bring us here. + * + * We need to double-check that we are indeed currently runtime + * suspended and if not we cannot do better than report the last + * known RC6 value. + */ + if (kdev->power.runtime_status == RPM_SUSPENDED) { + if (!i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) + i915->pmu.suspended_jiffies_last = + kdev->power.suspended_jiffies; - val = kdev->power.suspended_jiffies - - i915->pmu.suspended_jiffies_last; - val += jiffies - kdev->power.accounting_timestamp; + val = kdev->power.suspended_jiffies - + i915->pmu.suspended_jiffies_last; + val += jiffies - kdev->power.accounting_timestamp; - spin_unlock(&kdev->power.lock); + val = jiffies_to_nsecs(val); + val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - val = jiffies_to_nsecs(val); - val += i915->pmu.sample[__I915_SAMPLE_RC6].cur; - i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val; + } else if (i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur) { + val = i915->pmu.sample[__I915_SAMPLE_RC6_ESTIMATED].cur; + } else { + val = i915->pmu.sample[__I915_SAMPLE_RC6].cur; + } + spin_unlock(&kdev->power.lock); spin_unlock_irqrestore(&i915->pmu.lock, flags); } diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index aa1b1a9..2ba7352 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -1,29 +1,19 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ + #ifndef __I915_PMU_H__ #define __I915_PMU_H__ +#include <linux/hrtimer.h> +#include <linux/perf_event.h> +#include <linux/spinlock_types.h> +#include <drm/i915_drm.h> + +struct drm_i915_private; + enum { __I915_SAMPLE_FREQ_ACT = 0, __I915_SAMPLE_FREQ_REQ, diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e6a8c0e..f11bb21 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -153,9 +153,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define _MMIO_PORT3(pipe, a, b, c) _MMIO(_PICK(pipe, a, b, c)) #define _PLL(pll, a, b) ((a) + (pll)*((b)-(a))) #define _MMIO_PLL(pll, a, b) _MMIO(_PLL(pll, a, b)) -#define _MMIO_PORT6(port, a, b, c, d, e, f) _MMIO(_PICK(port, a, b, c, d, e, f)) -#define _MMIO_PORT6_LN(port, ln, a0, a1, b, c, d, e, f) \ - _MMIO(_PICK(port, a0, b, c, d, e, f) + (ln * (a1 - a0))) #define _PHY3(phy, ...) _PICK(phy, __VA_ARGS__) #define _MMIO_PHY3(phy, a, b, c) _MMIO(_PHY3(phy, a, b, c)) @@ -191,6 +188,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OTHER_CLASS 4 #define MAX_ENGINE_CLASS 4 +#define OTHER_GTPM_INSTANCE 1 #define MAX_ENGINE_INSTANCE 3 /* PCI config space */ @@ -304,6 +302,17 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN6_GRDOM_VECS (1 << 4) #define GEN9_GRDOM_GUC (1 << 5) #define GEN8_GRDOM_MEDIA2 (1 << 7) +/* GEN11 changed all bit defs except for FULL & RENDER */ +#define GEN11_GRDOM_FULL GEN6_GRDOM_FULL +#define GEN11_GRDOM_RENDER GEN6_GRDOM_RENDER +#define GEN11_GRDOM_BLT (1 << 2) +#define GEN11_GRDOM_GUC (1 << 3) +#define GEN11_GRDOM_MEDIA (1 << 5) +#define GEN11_GRDOM_MEDIA2 (1 << 6) +#define GEN11_GRDOM_MEDIA3 (1 << 7) +#define GEN11_GRDOM_MEDIA4 (1 << 8) +#define GEN11_GRDOM_VECS (1 << 13) +#define GEN11_GRDOM_VECS2 (1 << 14) #define RING_PP_DIR_BASE(engine) _MMIO((engine)->mmio_base+0x228) #define RING_PP_DIR_BASE_READ(engine) _MMIO((engine)->mmio_base+0x518) @@ -430,145 +439,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define VGA_CR_INDEX_CGA 0x3d4 #define VGA_CR_DATA_CGA 0x3d5 -/* - * Instruction field definitions used by the command parser - */ -#define INSTR_CLIENT_SHIFT 29 -#define INSTR_MI_CLIENT 0x0 -#define INSTR_BC_CLIENT 0x2 -#define INSTR_RC_CLIENT 0x3 -#define INSTR_SUBCLIENT_SHIFT 27 -#define INSTR_SUBCLIENT_MASK 0x18000000 -#define INSTR_MEDIA_SUBCLIENT 0x2 -#define INSTR_26_TO_24_MASK 0x7000000 -#define INSTR_26_TO_24_SHIFT 24 - -/* - * Memory interface instructions used by the kernel - */ -#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) -/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ -#define MI_GLOBAL_GTT (1<<22) - -#define MI_NOOP MI_INSTR(0, 0) -#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) -#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) -#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) -#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) -#define MI_FLUSH MI_INSTR(0x04, 0) -#define MI_READ_FLUSH (1 << 0) -#define MI_EXE_FLUSH (1 << 1) -#define MI_NO_WRITE_FLUSH (1 << 2) -#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ -#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ -#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ -#define MI_REPORT_HEAD MI_INSTR(0x07, 0) -#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) -#define MI_ARB_ENABLE (1<<0) -#define MI_ARB_DISABLE (0<<0) -#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) -#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) -#define MI_SUSPEND_FLUSH_EN (1<<0) -#define MI_SET_APPID MI_INSTR(0x0e, 0) -#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) -#define MI_OVERLAY_CONTINUE (0x0<<21) -#define MI_OVERLAY_ON (0x1<<21) -#define MI_OVERLAY_OFF (0x2<<21) -#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) -#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) -#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) -#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) -/* IVB has funny definitions for which plane to flip. */ -#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) -#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) -#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) -/* SKL ones */ -#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) -#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) -#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ -#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) -#define MI_SEMAPHORE_UPDATE (1<<21) -#define MI_SEMAPHORE_COMPARE (1<<20) -#define MI_SEMAPHORE_REGISTER (1<<18) -#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ -#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ -#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ -#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ -#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ -#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ -#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ -#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ -#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ -#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ -#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ -#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ -#define MI_SEMAPHORE_SYNC_INVALID (3<<16) -#define MI_SEMAPHORE_SYNC_MASK (3<<16) -#define MI_SET_CONTEXT MI_INSTR(0x18, 0) -#define MI_MM_SPACE_GTT (1<<8) -#define MI_MM_SPACE_PHYSICAL (0<<8) -#define MI_SAVE_EXT_STATE_EN (1<<3) -#define MI_RESTORE_EXT_STATE_EN (1<<2) -#define MI_FORCE_RESTORE (1<<1) -#define MI_RESTORE_INHIBIT (1<<0) -#define HSW_MI_RS_SAVE_STATE_EN (1<<3) -#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) -#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ -#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) -#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ -#define MI_SEMAPHORE_POLL (1<<15) -#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) -#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) -#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) -#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ -#define MI_USE_GGTT (1 << 22) /* g4x+ */ -#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) -#define MI_STORE_DWORD_INDEX_SHIFT 2 -/* Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: - * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw - * simply ignores the register load under certain conditions. - * - One can actually load arbitrary many arbitrary registers: Simply issue x - * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! - */ -#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) -#define MI_LRI_FORCE_POSTED (1<<12) -#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) -#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) -#define MI_SRM_LRM_GLOBAL_GTT (1<<22) -#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ -#define MI_FLUSH_DW_STORE_INDEX (1<<21) -#define MI_INVALIDATE_TLB (1<<18) -#define MI_FLUSH_DW_OP_STOREDW (1<<14) -#define MI_FLUSH_DW_OP_MASK (3<<14) -#define MI_FLUSH_DW_NOTIFY (1<<8) -#define MI_INVALIDATE_BSD (1<<7) -#define MI_FLUSH_DW_USE_GTT (1<<2) -#define MI_FLUSH_DW_USE_PPGTT (0<<2) -#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) -#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) -#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) -#define MI_BATCH_NON_SECURE (1) -/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ -#define MI_BATCH_NON_SECURE_I965 (1<<8) -#define MI_BATCH_PPGTT_HSW (1<<8) -#define MI_BATCH_NON_SECURE_HSW (1<<13) -#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) -#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ -#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) -#define MI_BATCH_RESOURCE_STREAMER (1<<10) - #define MI_PREDICATE_SRC0 _MMIO(0x2400) #define MI_PREDICATE_SRC0_UDW _MMIO(0x2400 + 4) #define MI_PREDICATE_SRC1 _MMIO(0x2408) @@ -579,130 +449,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define LOWER_SLICE_DISABLED (0<<0) /* - * 3D instructions used by the kernel - */ -#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) - -#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) -#define GEN9_MEDIA_POOL_ENABLE (1 << 31) -#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) -#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define SC_UPDATE_SCISSOR (0x1<<1) -#define SC_ENABLE_MASK (0x1<<0) -#define SC_ENABLE (0x1<<0) -#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) -#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) -#define SCI_YMIN_MASK (0xffff<<16) -#define SCI_XMIN_MASK (0xffff<<0) -#define SCI_YMAX_MASK (0xffff<<16) -#define SCI_XMAX_MASK (0xffff<<0) -#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) -#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) -#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) -#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) -#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) -#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) -#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) -#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) -#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) - -#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) -#define BLT_WRITE_A (2<<20) -#define BLT_WRITE_RGB (1<<20) -#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) -#define BLT_DEPTH_8 (0<<24) -#define BLT_DEPTH_16_565 (1<<24) -#define BLT_DEPTH_16_1555 (2<<24) -#define BLT_DEPTH_32 (3<<24) -#define BLT_ROP_SRC_COPY (0xcc<<16) -#define BLT_ROP_COLOR_COPY (0xf0<<16) -#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ -#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ -#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) -#define ASYNC_FLIP (1<<22) -#define DISPLAY_PLANE_A (0<<20) -#define DISPLAY_PLANE_B (1<<20) -#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) -#define PIPE_CONTROL_FLUSH_L3 (1<<27) -#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ -#define PIPE_CONTROL_MMIO_WRITE (1<<23) -#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) -#define PIPE_CONTROL_CS_STALL (1<<20) -#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) -#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) -#define PIPE_CONTROL_QW_WRITE (1<<14) -#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) -#define PIPE_CONTROL_DEPTH_STALL (1<<13) -#define PIPE_CONTROL_WRITE_FLUSH (1<<12) -#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ -#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on Ironlake */ -#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ -#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) -#define PIPE_CONTROL_NOTIFY (1<<8) -#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ -#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) -#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) -#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) -#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) -#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) -#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) -#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ - -/* - * Commands used only by the command parser - */ -#define MI_SET_PREDICATE MI_INSTR(0x01, 0) -#define MI_ARB_CHECK MI_INSTR(0x05, 0) -#define MI_RS_CONTROL MI_INSTR(0x06, 0) -#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) -#define MI_PREDICATE MI_INSTR(0x0C, 0) -#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) -#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) -#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) -#define MI_URB_CLEAR MI_INSTR(0x19, 0) -#define MI_UPDATE_GTT MI_INSTR(0x23, 0) -#define MI_CLFLUSH MI_INSTR(0x27, 0) -#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) -#define MI_REPORT_PERF_COUNT_GGTT (1<<0) -#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) -#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) -#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) -#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) -#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) - -#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) -#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) -#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) -#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) -#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) -#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) -#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) -#define GFX_OP_3DSTATE_SO_DECL_LIST \ - ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) - -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) -#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ - ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) - -#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) - -#define COLOR_BLT ((0x2<<29)|(0x40<<22)) -#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) - -/* * Registers used only by the command parser */ #define BCS_SWCTRL _MMIO(0x22200) @@ -802,6 +548,7 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN8_OABUFFER_UDW _MMIO(0x23b4) #define GEN8_OABUFFER _MMIO(0x2b14) +#define GEN8_OABUFFER_MEM_SELECT_GGTT (1 << 0) /* 0: PPGTT, 1: GGTT */ #define GEN7_OASTATUS1 _MMIO(0x2364) #define GEN7_OASTATUS1_TAIL_MASK 0xffffffc0 @@ -810,7 +557,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN7_OASTATUS1_REPORT_LOST (1<<0) #define GEN7_OASTATUS2 _MMIO(0x2368) -#define GEN7_OASTATUS2_HEAD_MASK 0xffffffc0 +#define GEN7_OASTATUS2_HEAD_MASK 0xffffffc0 +#define GEN7_OASTATUS2_MEM_SELECT_GGTT (1 << 0) /* 0: PPGTT, 1: GGTT */ #define GEN8_OASTATUS _MMIO(0x2b08) #define GEN8_OASTATUS_OVERRUN_STATUS (1<<3) @@ -832,8 +580,6 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define OABUFFER_SIZE_8M (6<<3) #define OABUFFER_SIZE_16M (7<<3) -#define OA_MEM_SELECT_GGTT (1<<0) - /* * Flexible, Aggregate EU Counter Registers. * Note: these aren't contiguous @@ -1127,6 +873,12 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (1 << GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 0 #define GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 1 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT 3 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK (0x7 << GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT) +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ 0 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ 1 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ 2 +#define GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ 3 #define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT 1 #define GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK (0x3 << GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT) @@ -1948,79 +1700,100 @@ enum i915_power_well_id { #define _CNL_PORT_PCS_DW1_LN0_C 0x162C04 #define _CNL_PORT_PCS_DW1_LN0_D 0x162E04 #define _CNL_PORT_PCS_DW1_LN0_F 0x162804 -#define CNL_PORT_PCS_DW1_GRP(port) _MMIO_PORT6(port, \ +#define CNL_PORT_PCS_DW1_GRP(port) _MMIO(_PICK(port, \ _CNL_PORT_PCS_DW1_GRP_AE, \ _CNL_PORT_PCS_DW1_GRP_B, \ _CNL_PORT_PCS_DW1_GRP_C, \ _CNL_PORT_PCS_DW1_GRP_D, \ _CNL_PORT_PCS_DW1_GRP_AE, \ - _CNL_PORT_PCS_DW1_GRP_F) -#define CNL_PORT_PCS_DW1_LN0(port) _MMIO_PORT6(port, \ + _CNL_PORT_PCS_DW1_GRP_F)) + +#define CNL_PORT_PCS_DW1_LN0(port) _MMIO(_PICK(port, \ _CNL_PORT_PCS_DW1_LN0_AE, \ _CNL_PORT_PCS_DW1_LN0_B, \ _CNL_PORT_PCS_DW1_LN0_C, \ _CNL_PORT_PCS_DW1_LN0_D, \ _CNL_PORT_PCS_DW1_LN0_AE, \ - _CNL_PORT_PCS_DW1_LN0_F) + _CNL_PORT_PCS_DW1_LN0_F)) +#define _ICL_PORT_PCS_DW1_GRP_A 0x162604 +#define _ICL_PORT_PCS_DW1_GRP_B 0x6C604 +#define _ICL_PORT_PCS_DW1_LN0_A 0x162804 +#define _ICL_PORT_PCS_DW1_LN0_B 0x6C804 +#define ICL_PORT_PCS_DW1_GRP(port) _MMIO_PORT(port,\ + _ICL_PORT_PCS_DW1_GRP_A, \ + _ICL_PORT_PCS_DW1_GRP_B) +#define ICL_PORT_PCS_DW1_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_PCS_DW1_LN0_A, \ + _ICL_PORT_PCS_DW1_LN0_B) #define COMMON_KEEPER_EN (1 << 26) -#define _CNL_PORT_TX_DW2_GRP_AE 0x162348 -#define _CNL_PORT_TX_DW2_GRP_B 0x1623C8 -#define _CNL_PORT_TX_DW2_GRP_C 0x162B48 -#define _CNL_PORT_TX_DW2_GRP_D 0x162BC8 -#define _CNL_PORT_TX_DW2_GRP_F 0x162A48 -#define _CNL_PORT_TX_DW2_LN0_AE 0x162448 -#define _CNL_PORT_TX_DW2_LN0_B 0x162648 -#define _CNL_PORT_TX_DW2_LN0_C 0x162C48 -#define _CNL_PORT_TX_DW2_LN0_D 0x162E48 -#define _CNL_PORT_TX_DW2_LN0_F 0x162848 -#define CNL_PORT_TX_DW2_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW2_GRP_AE, \ - _CNL_PORT_TX_DW2_GRP_B, \ - _CNL_PORT_TX_DW2_GRP_C, \ - _CNL_PORT_TX_DW2_GRP_D, \ - _CNL_PORT_TX_DW2_GRP_AE, \ - _CNL_PORT_TX_DW2_GRP_F) -#define CNL_PORT_TX_DW2_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW2_LN0_AE, \ - _CNL_PORT_TX_DW2_LN0_B, \ - _CNL_PORT_TX_DW2_LN0_C, \ - _CNL_PORT_TX_DW2_LN0_D, \ - _CNL_PORT_TX_DW2_LN0_AE, \ - _CNL_PORT_TX_DW2_LN0_F) -#define SWING_SEL_UPPER(x) ((x >> 3) << 15) +/* CNL Port TX registers */ +#define _CNL_PORT_TX_AE_GRP_OFFSET 0x162340 +#define _CNL_PORT_TX_B_GRP_OFFSET 0x1623C0 +#define _CNL_PORT_TX_C_GRP_OFFSET 0x162B40 +#define _CNL_PORT_TX_D_GRP_OFFSET 0x162BC0 +#define _CNL_PORT_TX_F_GRP_OFFSET 0x162A40 +#define _CNL_PORT_TX_AE_LN0_OFFSET 0x162440 +#define _CNL_PORT_TX_B_LN0_OFFSET 0x162640 +#define _CNL_PORT_TX_C_LN0_OFFSET 0x162C40 +#define _CNL_PORT_TX_D_LN0_OFFSET 0x162E40 +#define _CNL_PORT_TX_F_LN0_OFFSET 0x162840 +#define _CNL_PORT_TX_DW_GRP(port, dw) (_PICK((port), \ + _CNL_PORT_TX_AE_GRP_OFFSET, \ + _CNL_PORT_TX_B_GRP_OFFSET, \ + _CNL_PORT_TX_B_GRP_OFFSET, \ + _CNL_PORT_TX_D_GRP_OFFSET, \ + _CNL_PORT_TX_AE_GRP_OFFSET, \ + _CNL_PORT_TX_F_GRP_OFFSET) + \ + 4*(dw)) +#define _CNL_PORT_TX_DW_LN0(port, dw) (_PICK((port), \ + _CNL_PORT_TX_AE_LN0_OFFSET, \ + _CNL_PORT_TX_B_LN0_OFFSET, \ + _CNL_PORT_TX_B_LN0_OFFSET, \ + _CNL_PORT_TX_D_LN0_OFFSET, \ + _CNL_PORT_TX_AE_LN0_OFFSET, \ + _CNL_PORT_TX_F_LN0_OFFSET) + \ + 4*(dw)) + +#define CNL_PORT_TX_DW2_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 2)) +#define CNL_PORT_TX_DW2_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 2)) +#define _ICL_PORT_TX_DW2_GRP_A 0x162688 +#define _ICL_PORT_TX_DW2_GRP_B 0x6C688 +#define _ICL_PORT_TX_DW2_LN0_A 0x162888 +#define _ICL_PORT_TX_DW2_LN0_B 0x6C888 +#define ICL_PORT_TX_DW2_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW2_GRP_A, \ + _ICL_PORT_TX_DW2_GRP_B) +#define ICL_PORT_TX_DW2_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW2_LN0_A, \ + _ICL_PORT_TX_DW2_LN0_B) +#define SWING_SEL_UPPER(x) (((x) >> 3) << 15) #define SWING_SEL_UPPER_MASK (1 << 15) -#define SWING_SEL_LOWER(x) ((x & 0x7) << 11) +#define SWING_SEL_LOWER(x) (((x) & 0x7) << 11) #define SWING_SEL_LOWER_MASK (0x7 << 11) #define RCOMP_SCALAR(x) ((x) << 0) #define RCOMP_SCALAR_MASK (0xFF << 0) -#define _CNL_PORT_TX_DW4_GRP_AE 0x162350 -#define _CNL_PORT_TX_DW4_GRP_B 0x1623D0 -#define _CNL_PORT_TX_DW4_GRP_C 0x162B50 -#define _CNL_PORT_TX_DW4_GRP_D 0x162BD0 -#define _CNL_PORT_TX_DW4_GRP_F 0x162A50 #define _CNL_PORT_TX_DW4_LN0_AE 0x162450 #define _CNL_PORT_TX_DW4_LN1_AE 0x1624D0 -#define _CNL_PORT_TX_DW4_LN0_B 0x162650 -#define _CNL_PORT_TX_DW4_LN0_C 0x162C50 -#define _CNL_PORT_TX_DW4_LN0_D 0x162E50 -#define _CNL_PORT_TX_DW4_LN0_F 0x162850 -#define CNL_PORT_TX_DW4_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW4_GRP_AE, \ - _CNL_PORT_TX_DW4_GRP_B, \ - _CNL_PORT_TX_DW4_GRP_C, \ - _CNL_PORT_TX_DW4_GRP_D, \ - _CNL_PORT_TX_DW4_GRP_AE, \ - _CNL_PORT_TX_DW4_GRP_F) -#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO_PORT6_LN(port, ln, \ - _CNL_PORT_TX_DW4_LN0_AE, \ - _CNL_PORT_TX_DW4_LN1_AE, \ - _CNL_PORT_TX_DW4_LN0_B, \ - _CNL_PORT_TX_DW4_LN0_C, \ - _CNL_PORT_TX_DW4_LN0_D, \ - _CNL_PORT_TX_DW4_LN0_AE, \ - _CNL_PORT_TX_DW4_LN0_F) +#define CNL_PORT_TX_DW4_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 4)) +#define CNL_PORT_TX_DW4_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4)) +#define CNL_PORT_TX_DW4_LN(port, ln) _MMIO(_CNL_PORT_TX_DW_LN0((port), 4) + \ + (ln * (_CNL_PORT_TX_DW4_LN1_AE - \ + _CNL_PORT_TX_DW4_LN0_AE))) +#define _ICL_PORT_TX_DW4_GRP_A 0x162690 +#define _ICL_PORT_TX_DW4_GRP_B 0x6C690 +#define _ICL_PORT_TX_DW4_LN0_A 0x162890 +#define _ICL_PORT_TX_DW4_LN1_A 0x162990 +#define _ICL_PORT_TX_DW4_LN0_B 0x6C890 +#define ICL_PORT_TX_DW4_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW4_GRP_A, \ + _ICL_PORT_TX_DW4_GRP_B) +#define ICL_PORT_TX_DW4_LN(port, ln) _MMIO(_PORT(port, \ + _ICL_PORT_TX_DW4_LN0_A, \ + _ICL_PORT_TX_DW4_LN0_B) + \ + (ln * (_ICL_PORT_TX_DW4_LN1_A - \ + _ICL_PORT_TX_DW4_LN0_A))) #define LOADGEN_SELECT (1 << 31) #define POST_CURSOR_1(x) ((x) << 12) #define POST_CURSOR_1_MASK (0x3F << 12) @@ -2029,64 +1802,147 @@ enum i915_power_well_id { #define CURSOR_COEFF(x) ((x) << 0) #define CURSOR_COEFF_MASK (0x3F << 0) -#define _CNL_PORT_TX_DW5_GRP_AE 0x162354 -#define _CNL_PORT_TX_DW5_GRP_B 0x1623D4 -#define _CNL_PORT_TX_DW5_GRP_C 0x162B54 -#define _CNL_PORT_TX_DW5_GRP_D 0x162BD4 -#define _CNL_PORT_TX_DW5_GRP_F 0x162A54 -#define _CNL_PORT_TX_DW5_LN0_AE 0x162454 -#define _CNL_PORT_TX_DW5_LN0_B 0x162654 -#define _CNL_PORT_TX_DW5_LN0_C 0x162C54 -#define _CNL_PORT_TX_DW5_LN0_D 0x162E54 -#define _CNL_PORT_TX_DW5_LN0_F 0x162854 -#define CNL_PORT_TX_DW5_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW5_GRP_AE, \ - _CNL_PORT_TX_DW5_GRP_B, \ - _CNL_PORT_TX_DW5_GRP_C, \ - _CNL_PORT_TX_DW5_GRP_D, \ - _CNL_PORT_TX_DW5_GRP_AE, \ - _CNL_PORT_TX_DW5_GRP_F) -#define CNL_PORT_TX_DW5_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW5_LN0_AE, \ - _CNL_PORT_TX_DW5_LN0_B, \ - _CNL_PORT_TX_DW5_LN0_C, \ - _CNL_PORT_TX_DW5_LN0_D, \ - _CNL_PORT_TX_DW5_LN0_AE, \ - _CNL_PORT_TX_DW5_LN0_F) +#define CNL_PORT_TX_DW5_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 5)) +#define CNL_PORT_TX_DW5_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 5)) +#define _ICL_PORT_TX_DW5_GRP_A 0x162694 +#define _ICL_PORT_TX_DW5_GRP_B 0x6C694 +#define _ICL_PORT_TX_DW5_LN0_A 0x162894 +#define _ICL_PORT_TX_DW5_LN0_B 0x6C894 +#define ICL_PORT_TX_DW5_GRP(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW5_GRP_A, \ + _ICL_PORT_TX_DW5_GRP_B) +#define ICL_PORT_TX_DW5_LN0(port) _MMIO_PORT(port, \ + _ICL_PORT_TX_DW5_LN0_A, \ + _ICL_PORT_TX_DW5_LN0_B) #define TX_TRAINING_EN (1 << 31) +#define TAP2_DISABLE (1 << 30) #define TAP3_DISABLE (1 << 29) #define SCALING_MODE_SEL(x) ((x) << 18) #define SCALING_MODE_SEL_MASK (0x7 << 18) #define RTERM_SELECT(x) ((x) << 3) #define RTERM_SELECT_MASK (0x7 << 3) -#define _CNL_PORT_TX_DW7_GRP_AE 0x16235C -#define _CNL_PORT_TX_DW7_GRP_B 0x1623DC -#define _CNL_PORT_TX_DW7_GRP_C 0x162B5C -#define _CNL_PORT_TX_DW7_GRP_D 0x162BDC -#define _CNL_PORT_TX_DW7_GRP_F 0x162A5C -#define _CNL_PORT_TX_DW7_LN0_AE 0x16245C -#define _CNL_PORT_TX_DW7_LN0_B 0x16265C -#define _CNL_PORT_TX_DW7_LN0_C 0x162C5C -#define _CNL_PORT_TX_DW7_LN0_D 0x162E5C -#define _CNL_PORT_TX_DW7_LN0_F 0x16285C -#define CNL_PORT_TX_DW7_GRP(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW7_GRP_AE, \ - _CNL_PORT_TX_DW7_GRP_B, \ - _CNL_PORT_TX_DW7_GRP_C, \ - _CNL_PORT_TX_DW7_GRP_D, \ - _CNL_PORT_TX_DW7_GRP_AE, \ - _CNL_PORT_TX_DW7_GRP_F) -#define CNL_PORT_TX_DW7_LN0(port) _MMIO_PORT6(port, \ - _CNL_PORT_TX_DW7_LN0_AE, \ - _CNL_PORT_TX_DW7_LN0_B, \ - _CNL_PORT_TX_DW7_LN0_C, \ - _CNL_PORT_TX_DW7_LN0_D, \ - _CNL_PORT_TX_DW7_LN0_AE, \ - _CNL_PORT_TX_DW7_LN0_F) +#define CNL_PORT_TX_DW7_GRP(port) _MMIO(_CNL_PORT_TX_DW_GRP((port), 7)) +#define CNL_PORT_TX_DW7_LN0(port) _MMIO(_CNL_PORT_TX_DW_LN0((port), 7)) #define N_SCALAR(x) ((x) << 24) #define N_SCALAR_MASK (0x7F << 24) +#define _ICL_MG_PHY_PORT_LN(port, ln, ln0p1, ln0p2, ln1p1) \ + _MMIO(_PORT((port) - PORT_C, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1))) + +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1 0x16812C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1 0x16852C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2 0x16912C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT2 0x16952C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT3 0x16A12C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT3 0x16A52C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT4 0x16B12C +#define _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT4 0x16B52C +#define ICL_PORT_MG_TX1_LINK_PARAMS(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT1, \ + _ICL_MG_TX_LINK_PARAMS_TX1LN0_PORT2, \ + _ICL_MG_TX_LINK_PARAMS_TX1LN1_PORT1) + +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1 0x1680AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1 0x1684AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2 0x1690AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT2 0x1694AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT3 0x16A0AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT3 0x16A4AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT4 0x16B0AC +#define _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT4 0x16B4AC +#define ICL_PORT_MG_TX2_LINK_PARAMS(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT1, \ + _ICL_MG_TX_LINK_PARAMS_TX2LN0_PORT2, \ + _ICL_MG_TX_LINK_PARAMS_TX2LN1_PORT1) +#define CRI_USE_FS32 (1 << 5) + +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1 0x16814C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1 0x16854C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2 0x16914C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT2 0x16954C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT3 0x16A14C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT3 0x16A54C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT4 0x16B14C +#define _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT4 0x16B54C +#define ICL_PORT_MG_TX1_PISO_READLOAD(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT1, \ + _ICL_MG_TX_PISO_READLOAD_TX1LN0_PORT2, \ + _ICL_MG_TX_PISO_READLOAD_TX1LN1_PORT1) + +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1 0x1680CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1 0x1684CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2 0x1690CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT2 0x1694CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT3 0x16A0CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT3 0x16A4CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT4 0x16B0CC +#define _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT4 0x16B4CC +#define ICL_PORT_MG_TX2_PISO_READLOAD(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT1, \ + _ICL_MG_TX_PISO_READLOAD_TX2LN0_PORT2, \ + _ICL_MG_TX_PISO_READLOAD_TX2LN1_PORT1) +#define CRI_CALCINIT (1 << 1) + +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1 0x168148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1 0x168548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2 0x169148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT2 0x169548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT3 0x16A148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT3 0x16A548 +#define _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT4 0x16B148 +#define _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT4 0x16B548 +#define ICL_PORT_MG_TX1_SWINGCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT1, \ + _ICL_MG_TX_SWINGCTRL_TX1LN0_PORT2, \ + _ICL_MG_TX_SWINGCTRL_TX1LN1_PORT1) + +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1 0x1680C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1 0x1684C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2 0x1690C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT2 0x1694C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT3 0x16A0C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT3 0x16A4C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT4 0x16B0C8 +#define _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT4 0x16B4C8 +#define ICL_PORT_MG_TX2_SWINGCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT1, \ + _ICL_MG_TX_SWINGCTRL_TX2LN0_PORT2, \ + _ICL_MG_TX_SWINGCTRL_TX2LN1_PORT1) +#define CRI_TXDEEMPH_OVERRIDE_17_12(x) ((x) << 0) +#define CRI_TXDEEMPH_OVERRIDE_17_12_MASK (0x3F << 0) + +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1 0x168144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1 0x168544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2 0x169144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT2 0x169544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT3 0x16A144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT3 0x16A544 +#define _ICL_MG_TX_DRVCTRL_TX1LN0_PORT4 0x16B144 +#define _ICL_MG_TX_DRVCTRL_TX1LN1_PORT4 0x16B544 +#define ICL_PORT_MG_TX1_DRVCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX1LN0_PORT1, \ + _ICL_MG_TX_DRVCTRL_TX1LN0_PORT2, \ + _ICL_MG_TX_DRVCTRL_TX1LN1_PORT1) + +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1 0x1680C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1 0x1684C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2 0x1690C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT2 0x1694C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT3 0x16A0C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT3 0x16A4C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN0_PORT4 0x16B0C4 +#define _ICL_MG_TX_DRVCTRL_TX2LN1_PORT4 0x16B4C4 +#define ICL_PORT_MG_TX2_DRVCTRL(port, ln) \ + _ICL_MG_PHY_PORT_LN(port, ln, _ICL_MG_TX_DRVCTRL_TX2LN0_PORT1, \ + _ICL_MG_TX_DRVCTRL_TX2LN0_PORT2, \ + _ICL_MG_TX_DRVCTRL_TX2LN1_PORT1) +#define CRI_TXDEEMPH_OVERRIDE_11_6(x) ((x) << 24) +#define CRI_TXDEEMPH_OVERRIDE_11_6_MASK (0x3F << 24) +#define CRI_TXDEEMPH_OVERRIDE_EN (1 << 22) +#define CRI_TXDEEMPH_OVERRIDE_5_0(x) ((x) << 16) +#define CRI_TXDEEMPH_OVERRIDE_5_0_MASK (0x3F << 16) + /* The spec defines this only for BXT PHY0, but lets assume that this * would exist for PHY1 too if it had a second channel. */ @@ -2473,6 +2329,10 @@ enum i915_power_well_id { #define GEN8_MCR_SLICE_MASK GEN8_MCR_SLICE(3) #define GEN8_MCR_SUBSLICE(subslice) (((subslice) & 3) << 24) #define GEN8_MCR_SUBSLICE_MASK GEN8_MCR_SUBSLICE(3) +#define GEN11_MCR_SLICE(slice) (((slice) & 0xf) << 27) +#define GEN11_MCR_SLICE_MASK GEN11_MCR_SLICE(0xf) +#define GEN11_MCR_SUBSLICE(subslice) (((subslice) & 0x7) << 24) +#define GEN11_MCR_SUBSLICE_MASK GEN11_MCR_SUBSLICE(0x7) #define RING_IPEIR(base) _MMIO((base)+0x64) #define RING_IPEHR(base) _MMIO((base)+0x68) /* @@ -2867,6 +2727,19 @@ enum i915_power_well_id { #define GEN10_EU_DISABLE3 _MMIO(0x9140) #define GEN10_EU_DIS_SS_MASK 0xff +#define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) +#define GEN11_GT_VDBOX_DISABLE_MASK 0xff +#define GEN11_GT_VEBOX_DISABLE_SHIFT 16 +#define GEN11_GT_VEBOX_DISABLE_MASK (0xff << GEN11_GT_VEBOX_DISABLE_SHIFT) + +#define GEN11_EU_DISABLE _MMIO(0x9134) +#define GEN11_EU_DIS_MASK 0xFF + +#define GEN11_GT_SLICE_ENABLE _MMIO(0x9138) +#define GEN11_GT_S_ENA_MASK 0xFF + +#define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C) + #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050) #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0) #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2) @@ -3951,6 +3824,9 @@ enum { #define _CLKGATE_DIS_PSL_A 0x46520 #define _CLKGATE_DIS_PSL_B 0x46524 #define _CLKGATE_DIS_PSL_C 0x46528 +#define DUPS1_GATING_DIS (1 << 15) +#define DUPS2_GATING_DIS (1 << 19) +#define DUPS3_GATING_DIS (1 << 23) #define DPF_GATING_DIS (1 << 10) #define DPF_RAM_GATING_DIS (1 << 9) #define DPFR_GATING_DIS (1 << 8) @@ -3964,6 +3840,7 @@ enum { #define SLICE_UNIT_LEVEL_CLKGATE _MMIO(0x94d4) #define SARBUNIT_CLKGATE_DIS (1 << 5) #define RCCUNIT_CLKGATE_DIS (1 << 7) +#define MSCUNIT_CLKGATE_DIS (1 << 10) #define SUBSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9524) #define GWUNIT_CLKGATE_DIS (1 << 16) @@ -3971,6 +3848,9 @@ enum { #define UNSLICE_UNIT_LEVEL_CLKGATE _MMIO(0x9434) #define VFUNIT_CLKGATE_DIS (1 << 20) +#define INF_UNIT_LEVEL_CLKGATE _MMIO(0x9560) +#define CGPSF_CLKGATE_DIS (1 << 3) + /* * Display engine regs */ @@ -4150,7 +4030,20 @@ enum { #define EDP_PSR_TP1_TIME_0us (3<<4) #define EDP_PSR_IDLE_FRAME_SHIFT 0 +/* Bspec claims those aren't shifted but stay at 0x64800 */ +#define EDP_PSR_IMR _MMIO(0x64834) +#define EDP_PSR_IIR _MMIO(0x64838) +#define EDP_PSR_ERROR(trans) (1 << (((trans) * 8 + 10) & 31)) +#define EDP_PSR_POST_EXIT(trans) (1 << (((trans) * 8 + 9) & 31)) +#define EDP_PSR_PRE_ENTRY(trans) (1 << (((trans) * 8 + 8) & 31)) + #define EDP_PSR_AUX_CTL _MMIO(dev_priv->psr_mmio_base + 0x10) +#define EDP_PSR_AUX_CTL_TIME_OUT_MASK (3 << 26) +#define EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK (0x1f << 20) +#define EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK (0xf << 16) +#define EDP_PSR_AUX_CTL_ERROR_INTERRUPT (1 << 11) +#define EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK (0x7ff) + #define EDP_PSR_AUX_DATA(i) _MMIO(dev_priv->psr_mmio_base + 0x14 + (i) * 4) /* 5 registers */ #define EDP_PSR_STATUS _MMIO(dev_priv->psr_mmio_base + 0x40) @@ -4180,17 +4073,19 @@ enum { #define EDP_PSR_PERF_CNT _MMIO(dev_priv->psr_mmio_base + 0x44) #define EDP_PSR_PERF_CNT_MASK 0xffffff -#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) +#define EDP_PSR_DEBUG _MMIO(dev_priv->psr_mmio_base + 0x60) /* PSR_MASK on SKL+ */ #define EDP_PSR_DEBUG_MASK_MAX_SLEEP (1<<28) #define EDP_PSR_DEBUG_MASK_LPSP (1<<27) #define EDP_PSR_DEBUG_MASK_MEMUP (1<<26) #define EDP_PSR_DEBUG_MASK_HPD (1<<25) #define EDP_PSR_DEBUG_MASK_DISP_REG_WRITE (1<<16) -#define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) +#define EDP_PSR_DEBUG_EXIT_ON_PIXEL_UNDERRUN (1<<15) /* SKL+ */ #define EDP_PSR2_CTL _MMIO(0x6f900) #define EDP_PSR2_ENABLE (1<<31) #define EDP_SU_TRACK_ENABLE (1<<30) +#define EDP_Y_COORDINATE_VALID (1<<26) /* GLK and CNL+ */ +#define EDP_Y_COORDINATE_ENABLE (1<<25) /* GLK and CNL+ */ #define EDP_MAX_SU_DISABLE_TIME(t) ((t)<<20) #define EDP_MAX_SU_DISABLE_TIME_MASK (0x1f<<20) #define EDP_PSR2_TP2_TIME_500 (0<<8) @@ -4200,8 +4095,32 @@ enum { #define EDP_PSR2_TP2_TIME_MASK (3<<8) #define EDP_PSR2_FRAME_BEFORE_SU_SHIFT 4 #define EDP_PSR2_FRAME_BEFORE_SU_MASK (0xf<<4) -#define EDP_PSR2_IDLE_MASK 0xf #define EDP_PSR2_FRAME_BEFORE_SU(a) ((a)<<4) +#define EDP_PSR2_IDLE_FRAME_MASK 0xf +#define EDP_PSR2_IDLE_FRAME_SHIFT 0 + +#define _PSR_EVENT_TRANS_A 0x60848 +#define _PSR_EVENT_TRANS_B 0x61848 +#define _PSR_EVENT_TRANS_C 0x62848 +#define _PSR_EVENT_TRANS_D 0x63848 +#define _PSR_EVENT_TRANS_EDP 0x6F848 +#define PSR_EVENT(trans) _MMIO_TRANS2(trans, _PSR_EVENT_TRANS_A) +#define PSR_EVENT_PSR2_WD_TIMER_EXPIRE (1 << 17) +#define PSR_EVENT_PSR2_DISABLED (1 << 16) +#define PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN (1 << 15) +#define PSR_EVENT_SU_CRC_FIFO_UNDERRUN (1 << 14) +#define PSR_EVENT_GRAPHICS_RESET (1 << 12) +#define PSR_EVENT_PCH_INTERRUPT (1 << 11) +#define PSR_EVENT_MEMORY_UP (1 << 10) +#define PSR_EVENT_FRONT_BUFFER_MODIFY (1 << 9) +#define PSR_EVENT_WD_TIMER_EXPIRE (1 << 8) +#define PSR_EVENT_PIPE_REGISTERS_UPDATE (1 << 6) +#define PSR_EVENT_REGISTER_UPDATE (1 << 5) +#define PSR_EVENT_HDCP_ENABLE (1 << 4) +#define PSR_EVENT_KVMR_SESSION_ENABLE (1 << 3) +#define PSR_EVENT_VBI_ENABLE (1 << 2) +#define PSR_EVENT_LPSP_MODE_EXIT (1 << 1) +#define PSR_EVENT_PSR_DISABLE (1 << 0) #define EDP_PSR2_STATUS _MMIO(0x6f940) #define EDP_PSR2_STATUS_STATE_MASK (0xf<<28) @@ -5265,8 +5184,6 @@ enum { #define DP_LINK_TRAIN_OFF (3 << 28) #define DP_LINK_TRAIN_MASK (3 << 28) #define DP_LINK_TRAIN_SHIFT 28 -#define DP_LINK_TRAIN_PAT_3_CHV (1 << 14) -#define DP_LINK_TRAIN_MASK_CHV ((3 << 28)|(1<<14)) /* CPT Link training mode */ #define DP_LINK_TRAIN_PAT_1_CPT (0 << 8) @@ -6009,6 +5926,7 @@ enum { #define CURSIZE _MMIO(0x700a0) /* 845/865 */ #define _CUR_FBC_CTL_A 0x700a0 /* ivb+ */ #define CUR_FBC_CTL_EN (1 << 31) +#define _CURASURFLIVE 0x700ac /* g4x+ */ #define _CURBCNTR 0x700c0 #define _CURBBASE 0x700c4 #define _CURBPOS 0x700c8 @@ -6025,6 +5943,7 @@ enum { #define CURBASE(pipe) _CURSOR2(pipe, _CURABASE) #define CURPOS(pipe) _CURSOR2(pipe, _CURAPOS) #define CUR_FBC_CTL(pipe) _CURSOR2(pipe, _CUR_FBC_CTL_A) +#define CURSURFLIVE(pipe) _CURSOR2(pipe, _CURASURFLIVE) #define CURSOR_A_OFFSET 0x70080 #define CURSOR_B_OFFSET 0x700c0 @@ -6492,9 +6411,9 @@ enum { #define _PLANE_COLOR_CTL_1_A 0x701CC /* GLK+ */ #define _PLANE_COLOR_CTL_2_A 0x702CC /* GLK+ */ #define _PLANE_COLOR_CTL_3_A 0x703CC /* GLK+ */ -#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) +#define PLANE_COLOR_PIPE_GAMMA_ENABLE (1 << 30) /* Pre-ICL */ #define PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE (1 << 28) -#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) +#define PLANE_COLOR_PIPE_CSC_ENABLE (1 << 23) /* Pre-ICL */ #define PLANE_COLOR_CSC_MODE_BYPASS (0 << 17) #define PLANE_COLOR_CSC_MODE_YUV601_TO_RGB709 (1 << 17) #define PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709 (2 << 17) @@ -6589,6 +6508,9 @@ enum { #define _PLANE_BUF_CFG_1_B 0x7127c #define _PLANE_BUF_CFG_2_B 0x7137c +#define SKL_DDB_ENTRY_MASK 0x3FF +#define ICL_DDB_ENTRY_MASK 0x7FF +#define DDB_ENTRY_END_SHIFT 16 #define _PLANE_BUF_CFG_1(pipe) \ _PIPE(pipe, _PLANE_BUF_CFG_1_A, _PLANE_BUF_CFG_1_B) #define _PLANE_BUF_CFG_2(pipe) \ @@ -6779,6 +6701,8 @@ enum { #define PS_SCALER_MODE_MASK (3 << 28) #define PS_SCALER_MODE_DYN (0 << 28) #define PS_SCALER_MODE_HQ (1 << 28) +#define SKL_PS_SCALER_MODE_NV12 (2 << 28) +#define PS_SCALER_MODE_PLANAR (1 << 29) #define PS_PLANE_SEL_MASK (7 << 25) #define PS_PLANE_SEL(plane) (((plane) + 1) << 25) #define PS_FILTER_MASK (3 << 23) @@ -6950,6 +6874,7 @@ enum { #define DE_PCH_EVENT_IVB (1<<28) #define DE_DP_A_HOTPLUG_IVB (1<<27) #define DE_AUX_CHANNEL_A_IVB (1<<26) +#define DE_EDP_PSR_INT_HSW (1<<19) #define DE_SPRITEC_FLIP_DONE_IVB (1<<14) #define DE_PLANEC_FLIP_DONE_IVB (1<<13) #define DE_PIPEC_VBLANK_IVB (1<<10) @@ -7074,6 +6999,7 @@ enum { #define GEN8_DE_MISC_IIR _MMIO(0x44468) #define GEN8_DE_MISC_IER _MMIO(0x4446c) #define GEN8_DE_MISC_GSE (1 << 27) +#define GEN8_DE_EDP_PSR (1 << 19) #define GEN8_PCU_ISR _MMIO(0x444e0) #define GEN8_PCU_IMR _MMIO(0x444e4) @@ -7117,7 +7043,9 @@ enum { #define GEN11_INTR_IDENTITY_REG0 _MMIO(0x190060) #define GEN11_INTR_IDENTITY_REG1 _MMIO(0x190064) #define GEN11_INTR_DATA_VALID (1 << 31) -#define GEN11_INTR_ENGINE_MASK (0xffff) +#define GEN11_INTR_ENGINE_CLASS(x) (((x) & GENMASK(18, 16)) >> 16) +#define GEN11_INTR_ENGINE_INSTANCE(x) (((x) & GENMASK(25, 20)) >> 20) +#define GEN11_INTR_ENGINE_INTR(x) ((x) & 0xffff) #define GEN11_INTR_IDENTITY_REG(x) _MMIO(0x190060 + (x * 4)) @@ -7197,6 +7125,7 @@ enum { #define CHICKEN_TRANS_A 0x420c0 #define CHICKEN_TRANS_B 0x420c4 #define CHICKEN_TRANS(trans) _MMIO_TRANS(trans, CHICKEN_TRANS_A, CHICKEN_TRANS_B) +#define VSC_DATA_SEL_SOFTWARE_CONTROL (1<<25) /* GLK and CNL+ */ #define DDI_TRAINING_OVERRIDE_ENABLE (1<<19) #define DDI_TRAINING_OVERRIDE_VALUE (1<<18) #define DDIE_TRAINING_OVERRIDE_ENABLE (1<<17) /* CHICKEN_TRANS_A only */ @@ -7301,18 +7230,22 @@ enum { #define GEN7_L3CNTLREG3 _MMIO(0xB024) #define GEN7_L3_CHICKEN_MODE_REGISTER _MMIO(0xB030) -#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN7_WA_L3_CHICKEN_MODE 0x20000000 +#define GEN10_L3_CHICKEN_MODE_REGISTER _MMIO(0xB114) +#define GEN11_I2M_WRITE_DISABLE (1 << 28) #define GEN7_L3SQCREG4 _MMIO(0xb034) #define L3SQ_URB_READ_CAM_MATCH_DISABLE (1<<27) #define GEN8_L3SQCREG4 _MMIO(0xb118) -#define GEN8_LQSC_RO_PERF_DIS (1<<27) -#define GEN8_LQSC_FLUSH_COHERENT_LINES (1<<21) +#define GEN11_LQSC_CLEAN_EVICT_DISABLE (1 << 6) +#define GEN8_LQSC_RO_PERF_DIS (1 << 27) +#define GEN8_LQSC_FLUSH_COHERENT_LINES (1 << 21) /* GEN8 chicken */ #define HDC_CHICKEN0 _MMIO(0x7300) #define CNL_HDC_CHICKEN0 _MMIO(0xE5F0) +#define ICL_HDC_MODE _MMIO(0xE5F4) #define HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE (1<<15) #define HDC_FENCE_DEST_SLM_DISABLE (1<<14) #define HDC_DONOT_FETCH_MEM_WHEN_MASKED (1<<11) @@ -7326,6 +7259,9 @@ enum { #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) +#define GEN9_WM_CHICKEN3 _MMIO(0x5588) +#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) @@ -8324,8 +8260,30 @@ enum { #define GEN8_DOP_CLOCK_GATE_GUC_ENABLE (1<<4) #define GEN8_DOP_CLOCK_GATE_MEDIA_ENABLE (1<<6) -#define GEN8_GARBCNTL _MMIO(0xB004) -#define GEN9_GAPS_TSV_CREDIT_DISABLE (1<<7) +#define GEN8_GARBCNTL _MMIO(0xB004) +#define GEN9_GAPS_TSV_CREDIT_DISABLE (1 << 7) +#define GEN11_ARBITRATION_PRIO_ORDER_MASK (0x3f << 22) +#define GEN11_HASH_CTRL_EXCL_MASK (0x7f << 0) +#define GEN11_HASH_CTRL_EXCL_BIT0 (1 << 0) + +#define GEN11_GLBLINVL _MMIO(0xB404) +#define GEN11_BANK_HASH_ADDR_EXCL_MASK (0x7f << 5) +#define GEN11_BANK_HASH_ADDR_EXCL_BIT0 (1 << 5) + +#define GEN10_DFR_RATIO_EN_AND_CHICKEN _MMIO(0x9550) +#define DFR_DISABLE (1 << 9) + +#define GEN11_GACB_PERF_CTRL _MMIO(0x4B80) +#define GEN11_HASH_CTRL_MASK (0x3 << 12 | 0xf << 0) +#define GEN11_HASH_CTRL_BIT0 (1 << 0) +#define GEN11_HASH_CTRL_BIT4 (1 << 12) + +#define GEN11_LSN_UNSLCVC _MMIO(0xB43C) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC (1 << 9) +#define GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC (1 << 7) + +#define GAMW_ECO_DEV_RW_IA_REG _MMIO(0x4080) +#define GAMW_ECO_DEV_CTX_RELOAD_DISABLE (1 << 7) /* IVYBRIDGE DPF */ #define GEN7_L3CDERRST1(slice) _MMIO(0xB008 + (slice) * 0x200) /* L3CD Error Status 1 */ @@ -8834,6 +8792,12 @@ enum skl_power_gate { #define PORT_CLK_SEL_NONE (7<<29) #define PORT_CLK_SEL_MASK (7<<29) +/* On ICL+ this is the same as PORT_CLK_SEL, but all bits change. */ +#define DDI_CLK_SEL(port) PORT_CLK_SEL(port) +#define DDI_CLK_SEL_NONE (0x0 << 28) +#define DDI_CLK_SEL_MG (0x8 << 28) +#define DDI_CLK_SEL_MASK (0xF << 28) + /* Transcoder clock selection */ #define _TRANS_CLK_SEL_A 0x46140 #define _TRANS_CLK_SEL_B 0x46144 @@ -8964,6 +8928,7 @@ enum skl_power_gate { * CNL Clocks */ #define DPCLKA_CFGCR0 _MMIO(0x6C200) +#define DPCLKA_CFGCR0_ICL _MMIO(0x164280) #define DPCLKA_CFGCR0_DDI_CLK_OFF(port) (1 << ((port) == PORT_F ? 23 : \ (port)+10)) #define DPCLKA_CFGCR0_DDI_CLK_SEL_SHIFT(port) ((port) == PORT_F ? 21 : \ @@ -8980,10 +8945,141 @@ enum skl_power_gate { #define PLL_POWER_STATE (1 << 26) #define CNL_DPLL_ENABLE(pll) _MMIO_PLL(pll, DPLL0_ENABLE, DPLL1_ENABLE) +#define _MG_PLL1_ENABLE 0x46030 +#define _MG_PLL2_ENABLE 0x46034 +#define _MG_PLL3_ENABLE 0x46038 +#define _MG_PLL4_ENABLE 0x4603C +/* Bits are the same as DPLL0_ENABLE */ +#define MG_PLL_ENABLE(port) _MMIO_PORT((port) - PORT_C, _MG_PLL1_ENABLE, \ + _MG_PLL2_ENABLE) + +#define _MG_REFCLKIN_CTL_PORT1 0x16892C +#define _MG_REFCLKIN_CTL_PORT2 0x16992C +#define _MG_REFCLKIN_CTL_PORT3 0x16A92C +#define _MG_REFCLKIN_CTL_PORT4 0x16B92C +#define MG_REFCLKIN_CTL_OD_2_MUX(x) ((x) << 8) +#define MG_REFCLKIN_CTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_REFCLKIN_CTL_PORT1, \ + _MG_REFCLKIN_CTL_PORT2) + +#define _MG_CLKTOP2_CORECLKCTL1_PORT1 0x1688D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT2 0x1698D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT3 0x16A8D8 +#define _MG_CLKTOP2_CORECLKCTL1_PORT4 0x16B8D8 +#define MG_CLKTOP2_CORECLKCTL1_B_DIVRATIO(x) ((x) << 16) +#define MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(x) ((x) << 8) +#define MG_CLKTOP2_CORECLKCTL1(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_CORECLKCTL1_PORT1, \ + _MG_CLKTOP2_CORECLKCTL1_PORT2) + +#define _MG_CLKTOP2_HSCLKCTL_PORT1 0x1688D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT2 0x1698D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT3 0x16A8D4 +#define _MG_CLKTOP2_HSCLKCTL_PORT4 0x16B8D4 +#define MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(x) ((x) << 16) +#define MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(x) ((x) << 14) +#define MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(x) ((x) << 12) +#define MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(x) ((x) << 8) +#define MG_CLKTOP2_HSCLKCTL(port) _MMIO_PORT((port) - PORT_C, \ + _MG_CLKTOP2_HSCLKCTL_PORT1, \ + _MG_CLKTOP2_HSCLKCTL_PORT2) + +#define _MG_PLL_DIV0_PORT1 0x168A00 +#define _MG_PLL_DIV0_PORT2 0x169A00 +#define _MG_PLL_DIV0_PORT3 0x16AA00 +#define _MG_PLL_DIV0_PORT4 0x16BA00 +#define MG_PLL_DIV0_FRACNEN_H (1 << 30) +#define MG_PLL_DIV0_FBDIV_FRAC(x) ((x) << 8) +#define MG_PLL_DIV0_FBDIV_INT(x) ((x) << 0) +#define MG_PLL_DIV0(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV0_PORT1, \ + _MG_PLL_DIV0_PORT2) + +#define _MG_PLL_DIV1_PORT1 0x168A04 +#define _MG_PLL_DIV1_PORT2 0x169A04 +#define _MG_PLL_DIV1_PORT3 0x16AA04 +#define _MG_PLL_DIV1_PORT4 0x16BA04 +#define MG_PLL_DIV1_IREF_NDIVRATIO(x) ((x) << 16) +#define MG_PLL_DIV1_DITHER_DIV_1 (0 << 12) +#define MG_PLL_DIV1_DITHER_DIV_2 (1 << 12) +#define MG_PLL_DIV1_DITHER_DIV_4 (2 << 12) +#define MG_PLL_DIV1_DITHER_DIV_8 (3 << 12) +#define MG_PLL_DIV1_NDIVRATIO(x) ((x) << 4) +#define MG_PLL_DIV1_FBPREDIV(x) ((x) << 0) +#define MG_PLL_DIV1(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_DIV1_PORT1, \ + _MG_PLL_DIV1_PORT2) + +#define _MG_PLL_LF_PORT1 0x168A08 +#define _MG_PLL_LF_PORT2 0x169A08 +#define _MG_PLL_LF_PORT3 0x16AA08 +#define _MG_PLL_LF_PORT4 0x16BA08 +#define MG_PLL_LF_TDCTARGETCNT(x) ((x) << 24) +#define MG_PLL_LF_AFCCNTSEL_256 (0 << 20) +#define MG_PLL_LF_AFCCNTSEL_512 (1 << 20) +#define MG_PLL_LF_GAINCTRL(x) ((x) << 16) +#define MG_PLL_LF_INT_COEFF(x) ((x) << 8) +#define MG_PLL_LF_PROP_COEFF(x) ((x) << 0) +#define MG_PLL_LF(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_LF_PORT1, \ + _MG_PLL_LF_PORT2) + +#define _MG_PLL_FRAC_LOCK_PORT1 0x168A0C +#define _MG_PLL_FRAC_LOCK_PORT2 0x169A0C +#define _MG_PLL_FRAC_LOCK_PORT3 0x16AA0C +#define _MG_PLL_FRAC_LOCK_PORT4 0x16BA0C +#define MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 (1 << 18) +#define MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 (1 << 16) +#define MG_PLL_FRAC_LOCK_LOCKTHRESH(x) ((x) << 11) +#define MG_PLL_FRAC_LOCK_DCODITHEREN (1 << 10) +#define MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN (1 << 8) +#define MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(x) ((x) << 0) +#define MG_PLL_FRAC_LOCK(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_FRAC_LOCK_PORT1, \ + _MG_PLL_FRAC_LOCK_PORT2) + +#define _MG_PLL_SSC_PORT1 0x168A10 +#define _MG_PLL_SSC_PORT2 0x169A10 +#define _MG_PLL_SSC_PORT3 0x16AA10 +#define _MG_PLL_SSC_PORT4 0x16BA10 +#define MG_PLL_SSC_EN (1 << 28) +#define MG_PLL_SSC_TYPE(x) ((x) << 26) +#define MG_PLL_SSC_STEPLENGTH(x) ((x) << 16) +#define MG_PLL_SSC_STEPNUM(x) ((x) << 10) +#define MG_PLL_SSC_FLLEN (1 << 9) +#define MG_PLL_SSC_STEPSIZE(x) ((x) << 0) +#define MG_PLL_SSC(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_SSC_PORT1, \ + _MG_PLL_SSC_PORT2) + +#define _MG_PLL_BIAS_PORT1 0x168A14 +#define _MG_PLL_BIAS_PORT2 0x169A14 +#define _MG_PLL_BIAS_PORT3 0x16AA14 +#define _MG_PLL_BIAS_PORT4 0x16BA14 +#define MG_PLL_BIAS_BIAS_GB_SEL(x) ((x) << 30) +#define MG_PLL_BIAS_INIT_DCOAMP(x) ((x) << 24) +#define MG_PLL_BIAS_BIAS_BONUS(x) ((x) << 16) +#define MG_PLL_BIAS_BIASCAL_EN (1 << 15) +#define MG_PLL_BIAS_CTRIM(x) ((x) << 8) +#define MG_PLL_BIAS_VREF_RDAC(x) ((x) << 5) +#define MG_PLL_BIAS_IREFTRIM(x) ((x) << 0) +#define MG_PLL_BIAS(port) _MMIO_PORT((port) - PORT_C, _MG_PLL_BIAS_PORT1, \ + _MG_PLL_BIAS_PORT2) + +#define _MG_PLL_TDC_COLDST_BIAS_PORT1 0x168A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT2 0x169A18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT3 0x16AA18 +#define _MG_PLL_TDC_COLDST_BIAS_PORT4 0x16BA18 +#define MG_PLL_TDC_COLDST_IREFINT_EN (1 << 27) +#define MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(x) ((x) << 17) +#define MG_PLL_TDC_COLDST_COLDSTART (1 << 16) +#define MG_PLL_TDC_TDCOVCCORR_EN (1 << 2) +#define MG_PLL_TDC_TDCSEL(x) ((x) << 0) +#define MG_PLL_TDC_COLDST_BIAS(port) _MMIO_PORT((port) - PORT_C, \ + _MG_PLL_TDC_COLDST_BIAS_PORT1, \ + _MG_PLL_TDC_COLDST_BIAS_PORT2) + #define _CNL_DPLL0_CFGCR0 0x6C000 #define _CNL_DPLL1_CFGCR0 0x6C080 #define DPLL_CFGCR0_HDMI_MODE (1 << 30) #define DPLL_CFGCR0_SSC_ENABLE (1 << 29) +#define DPLL_CFGCR0_SSC_ENABLE_ICL (1 << 25) #define DPLL_CFGCR0_LINK_RATE_MASK (0xf << 25) #define DPLL_CFGCR0_LINK_RATE_2700 (0 << 25) #define DPLL_CFGCR0_LINK_RATE_1350 (1 << 25) @@ -9017,8 +9113,19 @@ enum skl_power_gate { #define DPLL_CFGCR1_PDIV_5 (4 << 2) #define DPLL_CFGCR1_PDIV_7 (8 << 2) #define DPLL_CFGCR1_CENTRAL_FREQ (3 << 0) +#define DPLL_CFGCR1_CENTRAL_FREQ_8400 (3 << 0) #define CNL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _CNL_DPLL0_CFGCR1, _CNL_DPLL1_CFGCR1) +#define _ICL_DPLL0_CFGCR0 0x164000 +#define _ICL_DPLL1_CFGCR0 0x164080 +#define ICL_DPLL_CFGCR0(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR0, \ + _ICL_DPLL1_CFGCR0) + +#define _ICL_DPLL0_CFGCR1 0x164004 +#define _ICL_DPLL1_CFGCR1 0x164084 +#define ICL_DPLL_CFGCR1(pll) _MMIO_PLL(pll, _ICL_DPLL0_CFGCR1, \ + _ICL_DPLL1_CFGCR1) + /* BXT display engine PLL */ #define BXT_DE_PLL_CTL _MMIO(0x6d000) #define BXT_DE_PLL_RATIO(x) (x) /* {60,65,100} * 19.2MHz */ @@ -9790,6 +9897,13 @@ enum skl_power_gate { #define GEN9_MFX1_MOCS(i) _MMIO(0xca00 + (i) * 4) /* Media 1 MOCS registers */ #define GEN9_VEBOX_MOCS(i) _MMIO(0xcb00 + (i) * 4) /* Video MOCS registers */ #define GEN9_BLT_MOCS(i) _MMIO(0xcc00 + (i) * 4) /* Blitter MOCS registers */ +/* Media decoder 2 MOCS registers */ +#define GEN11_MFX2_MOCS(i) _MMIO(0x10000 + (i) * 4) + +#define GEN10_SCRATCH_LNCF2 _MMIO(0xb0a0) +#define PMFLUSHDONE_LNICRSDROP (1 << 20) +#define PMFLUSH_GAPL3UNBLOCK (1 << 21) +#define PMFLUSHDONE_LNEBLK (1 << 22) /* gamt regs */ #define GEN8_L3_LRA_1_GPGPU _MMIO(0x4dd4) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 282f576..8928894 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -49,7 +49,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->timeline->common->name; + return to_request(fence)->timeline->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -59,11 +59,7 @@ static bool i915_fence_signaled(struct dma_fence *fence) static bool i915_fence_enable_signaling(struct dma_fence *fence) { - if (i915_fence_signaled(fence)) - return false; - - intel_engine_enable_signaling(to_request(fence), true); - return !i915_fence_signaled(fence); + return intel_engine_enable_signaling(to_request(fence), true); } static signed long i915_fence_wait(struct dma_fence *fence, @@ -129,22 +125,22 @@ i915_dependency_free(struct drm_i915_private *i915, } static void -__i915_priotree_add_dependency(struct i915_priotree *pt, - struct i915_priotree *signal, - struct i915_dependency *dep, - unsigned long flags) +__i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) { INIT_LIST_HEAD(&dep->dfs_link); list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &pt->signalers_list); + list_add(&dep->signal_link, &node->signalers_list); dep->signaler = signal; dep->flags = flags; } static int -i915_priotree_add_dependency(struct drm_i915_private *i915, - struct i915_priotree *pt, - struct i915_priotree *signal) +i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) { struct i915_dependency *dep; @@ -152,16 +148,18 @@ i915_priotree_add_dependency(struct drm_i915_private *i915, if (!dep) return -ENOMEM; - __i915_priotree_add_dependency(pt, signal, dep, I915_DEPENDENCY_ALLOC); + __i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC); return 0; } static void -i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) +i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) { - struct i915_dependency *dep, *next; + struct i915_dependency *dep, *tmp; - GEM_BUG_ON(!list_empty(&pt->link)); + GEM_BUG_ON(!list_empty(&node->link)); /* * Everyone we depended upon (the fences we wait to be signaled) @@ -169,8 +167,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) * However, retirement is run independently on each timeline and * so we may be called out-of-order. */ - list_for_each_entry_safe(dep, next, &pt->signalers_list, signal_link) { - GEM_BUG_ON(!i915_priotree_signaled(dep->signaler)); + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->wait_link); @@ -179,8 +177,8 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, next, &pt->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != pt); + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); GEM_BUG_ON(!list_empty(&dep->dfs_link)); list_del(&dep->signal_link); @@ -190,17 +188,18 @@ i915_priotree_fini(struct drm_i915_private *i915, struct i915_priotree *pt) } static void -i915_priotree_init(struct i915_priotree *pt) +i915_sched_node_init(struct i915_sched_node *node) { - INIT_LIST_HEAD(&pt->signalers_list); - INIT_LIST_HEAD(&pt->waiters_list); - INIT_LIST_HEAD(&pt->link); - pt->priority = I915_PRIORITY_INVALID; + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; } static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; + struct i915_timeline *timeline; enum intel_engine_id id; int ret; @@ -211,30 +210,37 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) if (ret) return ret; + GEM_BUG_ON(i915->gt.active_requests); + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ for_each_engine(engine, i915, id) { - struct i915_gem_timeline *timeline; - struct intel_timeline *tl = engine->timeline; + GEM_TRACE("%s seqno %d (current %d) -> %d\n", + engine->name, + engine->timeline.seqno, + intel_engine_get_seqno(engine), + seqno); - if (!i915_seqno_passed(seqno, tl->seqno)) { + if (!i915_seqno_passed(seqno, engine->timeline.seqno)) { /* Flush any waiters before we reuse the seqno */ intel_engine_disarm_breadcrumbs(engine); + intel_engine_init_hangcheck(engine); GEM_BUG_ON(!list_empty(&engine->breadcrumbs.signals)); } /* Check we are idle before we fiddle with hw state! */ GEM_BUG_ON(!intel_engine_is_idle(engine)); - GEM_BUG_ON(i915_gem_active_isset(&engine->timeline->last_request)); + GEM_BUG_ON(i915_gem_active_isset(&engine->timeline.last_request)); /* Finally reset hw state */ intel_engine_init_global_seqno(engine, seqno); - tl->seqno = seqno; - - list_for_each_entry(timeline, &i915->gt.timelines, link) - memset(timeline->engine[id].global_sync, 0, - sizeof(timeline->engine[id].global_sync)); + engine->timeline.seqno = seqno; } + list_for_each_entry(timeline, &i915->gt.timelines, link) + memset(timeline->global_sync, 0, sizeof(timeline->global_sync)); + + i915->gt.request_serial = seqno; + return 0; } @@ -251,83 +257,37 @@ int i915_gem_set_global_seqno(struct drm_device *dev, u32 seqno) return reset_all_global_seqno(i915, seqno - 1); } -static void mark_busy(struct drm_i915_private *i915) +static int reserve_gt(struct drm_i915_private *i915) { - if (i915->gt.awake) - return; - - GEM_BUG_ON(!i915->gt.active_requests); - - intel_runtime_pm_get_noresume(i915); + int ret; /* - * It seems that the DMC likes to transition between the DC states a lot - * when there are no connected displays (no active power domains) during - * command submission. - * - * This activity has negative impact on the performance of the chip with - * huge latencies observed in the interrupt handler and elsewhere. + * Reservation is fine until we may need to wrap around * - * Work around it by grabbing a GT IRQ power domain whilst there is any - * GT activity, preventing any DC state transitions. + * By incrementing the serial for every request, we know that no + * individual engine may exceed that serial (as each is reset to 0 + * on any wrap). This protects even the most pessimistic of migrations + * of every request from all engines onto just one. */ - intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); - - i915->gt.awake = true; - if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ - i915->gt.epoch = 1; - - intel_enable_gt_powersave(i915); - i915_update_gfx_val(i915); - if (INTEL_GEN(i915) >= 6) - gen6_rps_busy(i915); - i915_pmu_gt_unparked(i915); - - intel_engines_unpark(i915); - - i915_queue_hangcheck(i915); - - queue_delayed_work(i915->wq, - &i915->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -static int reserve_engine(struct intel_engine_cs *engine) -{ - struct drm_i915_private *i915 = engine->i915; - u32 active = ++engine->timeline->inflight_seqnos; - u32 seqno = engine->timeline->seqno; - int ret; - - /* Reservation is fine until we need to wrap around */ - if (unlikely(add_overflows(seqno, active))) { + while (unlikely(++i915->gt.request_serial == 0)) { ret = reset_all_global_seqno(i915, 0); if (ret) { - engine->timeline->inflight_seqnos--; + i915->gt.request_serial--; return ret; } } if (!i915->gt.active_requests++) - mark_busy(i915); + i915_gem_unpark(i915); return 0; } -static void unreserve_engine(struct intel_engine_cs *engine) +static void unreserve_gt(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = engine->i915; - - if (!--i915->gt.active_requests) { - /* Cancel the mark_busy() from our reserve_engine() */ - GEM_BUG_ON(!i915->gt.awake); - mod_delayed_work(i915->wq, - &i915->gt.idle_work, - msecs_to_jiffies(100)); - } - - GEM_BUG_ON(!engine->timeline->inflight_seqnos); - engine->timeline->inflight_seqnos--; + GEM_BUG_ON(!i915->gt.active_requests); + if (!--i915->gt.active_requests) + i915_gem_park(i915); } void i915_gem_retire_noop(struct i915_gem_active *active, @@ -338,6 +298,7 @@ void i915_gem_retire_noop(struct i915_gem_active *active, static void advance_ring(struct i915_request *request) { + struct intel_ring *ring = request->ring; unsigned int tail; /* @@ -349,7 +310,8 @@ static void advance_ring(struct i915_request *request) * Note this requires that we are always called in request * completion order. */ - if (list_is_last(&request->ring_link, &request->ring->request_list)) { + GEM_BUG_ON(!list_is_first(&request->ring_link, &ring->request_list)); + if (list_is_last(&request->ring_link, &ring->request_list)) { /* * We may race here with execlists resubmitting this request * as we retire it. The resubmission will move the ring->tail @@ -358,13 +320,14 @@ static void advance_ring(struct i915_request *request) * is just about to be. Either works, if we miss the last two * noops - they are safe to be replayed on a reset. */ - tail = READ_ONCE(request->ring->tail); + tail = READ_ONCE(request->tail); + list_del(&ring->active_link); } else { tail = request->postfix; } - list_del(&request->ring_link); + list_del_init(&request->ring_link); - request->ring->head = tail; + ring->head = tail; } static void free_capture_list(struct i915_request *request) @@ -380,25 +343,84 @@ static void free_capture_list(struct i915_request *request) } } +static void __retire_engine_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_TRACE("%s(%s) fence %llx:%d, global=%d, current %d\n", + __func__, engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(engine)); + + GEM_BUG_ON(!i915_request_completed(rq)); + + local_irq_disable(); + + spin_lock(&engine->timeline.lock); + GEM_BUG_ON(!list_is_first(&rq->link, &engine->timeline.requests)); + list_del_init(&rq->link); + spin_unlock(&engine->timeline.lock); + + spin_lock(&rq->lock); + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + dma_fence_signal_locked(&rq->fence); + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + intel_engine_cancel_signaling(rq); + if (rq->waitboost) { + GEM_BUG_ON(!atomic_read(&rq->i915->gt_pm.rps.num_waiters)); + atomic_dec(&rq->i915->gt_pm.rps.num_waiters); + } + spin_unlock(&rq->lock); + + local_irq_enable(); + + /* + * The backing object for the context is done after switching to the + * *next* context. Therefore we cannot retire the previous context until + * the next context has already started running. However, since we + * cannot take the required locks at i915_request_submit() we + * defer the unpinning of the active context to now, retirement of + * the subsequent request. + */ + if (engine->last_retired_context) + intel_context_unpin(engine->last_retired_context, engine); + engine->last_retired_context = rq->ctx; +} + +static void __retire_engine_upto(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + struct i915_request *tmp; + + if (list_empty(&rq->link)) + return; + + do { + tmp = list_first_entry(&engine->timeline.requests, + typeof(*tmp), link); + + GEM_BUG_ON(tmp->engine != engine); + __retire_engine_request(engine, tmp); + } while (tmp != rq); +} + static void i915_request_retire(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; struct i915_gem_active *active, *next; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + request->engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno, + intel_engine_get_seqno(request->engine)); + lockdep_assert_held(&request->i915->drm.struct_mutex); GEM_BUG_ON(!i915_sw_fence_signaled(&request->submit)); GEM_BUG_ON(!i915_request_completed(request)); - GEM_BUG_ON(!request->i915->gt.active_requests); trace_i915_request_retire(request); - spin_lock_irq(&engine->timeline->lock); - list_del_init(&request->link); - spin_unlock_irq(&engine->timeline->lock); - - unreserve_engine(request->engine); advance_ring(request); - free_capture_list(request); /* @@ -434,73 +456,74 @@ static void i915_request_retire(struct i915_request *request) /* Retirement decays the ban score as it is a sign of ctx progress */ atomic_dec_if_positive(&request->ctx->ban_score); + intel_context_unpin(request->ctx, request->engine); - /* - * The backing object for the context is done after switching to the - * *next* context. Therefore we cannot retire the previous context until - * the next context has already started running. However, since we - * cannot take the required locks at i915_request_submit() we - * defer the unpinning of the active context to now, retirement of - * the subsequent request. - */ - if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); - engine->last_retired_context = request->ctx; + __retire_engine_upto(request->engine, request); - spin_lock_irq(&request->lock); - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags)) - dma_fence_signal_locked(&request->fence); - if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags)) - intel_engine_cancel_signaling(request); - if (request->waitboost) { - GEM_BUG_ON(!atomic_read(&request->i915->gt_pm.rps.num_waiters)); - atomic_dec(&request->i915->gt_pm.rps.num_waiters); - } - spin_unlock_irq(&request->lock); + unreserve_gt(request->i915); - i915_priotree_fini(request->i915, &request->priotree); + i915_sched_node_fini(request->i915, &request->sched); i915_request_put(request); } void i915_request_retire_upto(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; + struct intel_ring *ring = rq->ring; struct i915_request *tmp; + GEM_TRACE("%s fence %llx:%d, global=%d, current %d\n", + rq->engine->name, + rq->fence.context, rq->fence.seqno, + rq->global_seqno, + intel_engine_get_seqno(rq->engine)); + lockdep_assert_held(&rq->i915->drm.struct_mutex); GEM_BUG_ON(!i915_request_completed(rq)); - if (list_empty(&rq->link)) + if (list_empty(&rq->ring_link)) return; do { - tmp = list_first_entry(&engine->timeline->requests, - typeof(*tmp), link); + tmp = list_first_entry(&ring->request_list, + typeof(*tmp), ring_link); i915_request_retire(tmp); } while (tmp != rq); } -static u32 timeline_get_seqno(struct intel_timeline *tl) +static u32 timeline_get_seqno(struct i915_timeline *tl) { return ++tl->seqno; } +static void move_to_timeline(struct i915_request *request, + struct i915_timeline *timeline) +{ + GEM_BUG_ON(request->timeline == &request->engine->timeline); + lockdep_assert_held(&request->engine->timeline.lock); + + spin_lock_nested(&request->timeline->lock, SINGLE_DEPTH_NESTING); + list_move_tail(&request->link, &timeline->requests); + spin_unlock(&request->timeline->lock); +} + void __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; u32 seqno; + GEM_TRACE("%s fence %llx:%d -> global=%d, current %d\n", + engine->name, + request->fence.context, request->fence.seqno, + engine->timeline.seqno + 1, + intel_engine_get_seqno(engine)); + GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); - /* Transfer from per-context onto the global per-engine timeline */ - timeline = engine->timeline; - GEM_BUG_ON(timeline == request->timeline); GEM_BUG_ON(request->global_seqno); - seqno = timeline_get_seqno(timeline); + seqno = timeline_get_seqno(&engine->timeline); GEM_BUG_ON(!seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), seqno)); @@ -514,9 +537,8 @@ void __i915_request_submit(struct i915_request *request) engine->emit_breadcrumb(request, request->ring->vaddr + request->postfix); - spin_lock(&request->timeline->lock); - list_move_tail(&request->link, &timeline->requests); - spin_unlock(&request->timeline->lock); + /* Transfer from per-context onto the global per-engine timeline */ + move_to_timeline(request, &engine->timeline); trace_i915_request_execute(request); @@ -529,30 +551,35 @@ void i915_request_submit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_submit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - struct intel_timeline *timeline; + + GEM_TRACE("%s fence %llx:%d <- global=%d, current %d\n", + engine->name, + request->fence.context, request->fence.seqno, + request->global_seqno, + intel_engine_get_seqno(engine)); GEM_BUG_ON(!irqs_disabled()); - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); /* * Only unwind in reverse order, required so that the per-context list * is kept in seqno/ring order. */ GEM_BUG_ON(!request->global_seqno); - GEM_BUG_ON(request->global_seqno != engine->timeline->seqno); + GEM_BUG_ON(request->global_seqno != engine->timeline.seqno); GEM_BUG_ON(i915_seqno_passed(intel_engine_get_seqno(engine), request->global_seqno)); - engine->timeline->seqno--; + engine->timeline.seqno--; /* We may be recursing from the signal callback of another i915 fence */ spin_lock_nested(&request->lock, SINGLE_DEPTH_NESTING); @@ -562,12 +589,7 @@ void __i915_request_unsubmit(struct i915_request *request) spin_unlock(&request->lock); /* Transfer back from the global per-engine timeline to per-context */ - timeline = request->timeline; - GEM_BUG_ON(timeline == engine->timeline); - - spin_lock(&timeline->lock); - list_move(&request->link, &timeline->requests); - spin_unlock(&timeline->lock); + move_to_timeline(request, request->timeline); /* * We don't need to wake_up any waiters on request->execute, they @@ -584,11 +606,11 @@ void i915_request_unsubmit(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); __i915_request_unsubmit(request); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static int __i915_sw_fence_call @@ -659,12 +681,12 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) * GGTT space, so do this first before we reserve a seqno for * ourselves. */ - ring = engine->context_pin(engine, ctx); + ring = intel_context_pin(ctx, engine); if (IS_ERR(ring)) return ERR_CAST(ring); GEM_BUG_ON(!ring); - ret = reserve_engine(engine); + ret = reserve_gt(i915); if (ret) goto err_unpin; @@ -672,10 +694,10 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unreserve; - /* Move the oldest request to the slab-cache (if not in use!) */ - rq = list_first_entry_or_null(&engine->timeline->requests, - typeof(*rq), link); - if (rq && i915_request_completed(rq)) + /* Move our oldest request to the slab-cache (if not in use!) */ + rq = list_first_entry(&ring->request_list, typeof(*rq), ring_link); + if (!list_is_last(&rq->ring_link, &ring->request_list) && + i915_request_completed(rq)) i915_request_retire(rq); /* @@ -735,8 +757,13 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) } } - rq->timeline = i915_gem_context_lookup_timeline(ctx, engine); - GEM_BUG_ON(rq->timeline == engine->timeline); + INIT_LIST_HEAD(&rq->active_list); + rq->i915 = i915; + rq->engine = engine; + rq->ctx = ctx; + rq->ring = ring; + rq->timeline = ring->timeline; + GEM_BUG_ON(rq->timeline == &engine->timeline); spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, @@ -749,13 +776,7 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); init_waitqueue_head(&rq->execute); - i915_priotree_init(&rq->priotree); - - INIT_LIST_HEAD(&rq->active_list); - rq->i915 = i915; - rq->engine = engine; - rq->ctx = ctx; - rq->ring = ring; + i915_sched_node_init(&rq->sched); /* No zalloc, must clear what we need by hand */ rq->global_seqno = 0; @@ -792,6 +813,9 @@ i915_request_alloc(struct intel_engine_cs *engine, struct i915_gem_context *ctx) if (ret) goto err_unwind; + /* Keep a second pin for the dual retirement along engine and ring */ + __intel_context_pin(rq->ctx, engine); + /* Check that we didn't interrupt ourselves with a new request */ GEM_BUG_ON(rq->timeline->seqno != rq->fence.seqno); return rq; @@ -801,14 +825,14 @@ err_unwind: /* Make sure we didn't add ourselves to external state before freeing */ GEM_BUG_ON(!list_empty(&rq->active_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.signalers_list)); - GEM_BUG_ON(!list_empty(&rq->priotree.waiters_list)); + GEM_BUG_ON(!list_empty(&rq->sched.signalers_list)); + GEM_BUG_ON(!list_empty(&rq->sched.waiters_list)); kmem_cache_free(i915->requests, rq); err_unreserve: - unreserve_engine(engine); + unreserve_gt(i915); err_unpin: - engine->context_unpin(engine, ctx); + intel_context_unpin(ctx, engine); return ERR_PTR(ret); } @@ -824,9 +848,9 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return 0; if (to->engine->schedule) { - ret = i915_priotree_add_dependency(to->i915, - &to->priotree, - &from->priotree); + ret = i915_sched_node_add_dependency(to->i915, + &to->sched, + &from->sched); if (ret < 0) return ret; } @@ -904,7 +928,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Squash repeated waits to the same timelines */ if (fence->context != rq->i915->mm.unordered_timeline && - intel_timeline_sync_is_later(rq->timeline, fence)) + i915_timeline_sync_is_later(rq->timeline, fence)) continue; if (dma_fence_is_i915(fence)) @@ -918,7 +942,7 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) /* Record the latest fence used against each timeline */ if (fence->context != rq->i915->mm.unordered_timeline) - intel_timeline_sync_set(rq->timeline, fence); + i915_timeline_sync_set(rq->timeline, fence); } while (--nchild); return 0; @@ -995,11 +1019,14 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) { struct intel_engine_cs *engine = request->engine; struct intel_ring *ring = request->ring; - struct intel_timeline *timeline = request->timeline; + struct i915_timeline *timeline = request->timeline; struct i915_request *prev; u32 *cs; int err; + GEM_TRACE("%s fence %llx:%d\n", + engine->name, request->fence.context, request->fence.seqno); + lockdep_assert_held(&request->i915->drm.struct_mutex); trace_i915_request_add(request); @@ -1054,10 +1081,10 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_sw_fence_await_sw_fence(&request->submit, &prev->submit, &request->submitq); if (engine->schedule) - __i915_priotree_add_dependency(&request->priotree, - &prev->priotree, - &request->dep, - 0); + __i915_sched_node_add_dependency(&request->sched, + &prev->sched, + &request->dep, + 0); } spin_lock_irq(&timeline->lock); @@ -1068,6 +1095,8 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) i915_gem_active_set(&timeline->last_request, request); list_add_tail(&request->ring_link, &ring->request_list); + if (list_is_first(&request->ring_link, &ring->request_list)) + list_add(&ring->active_link, &request->i915->gt.active_rings); request->emitted_jiffies = jiffies; /* @@ -1081,12 +1110,11 @@ void __i915_request_add(struct i915_request *request, bool flush_caches) * decide whether to preempt the entire chain so that it is ready to * run at the earliest possible convenience. */ - rcu_read_lock(); + local_bh_disable(); + rcu_read_lock(); /* RCU serialisation for set-wedged protection */ if (engine->schedule) - engine->schedule(request, request->ctx->priority); + engine->schedule(request, &request->ctx->sched); rcu_read_unlock(); - - local_bh_disable(); i915_sw_fence_commit(&request->submit); local_bh_enable(); /* Kick the execlists tasklet if just scheduled */ @@ -1206,11 +1234,13 @@ static bool __i915_spin_request(const struct i915_request *rq, static bool __i915_wait_request_check_and_reset(struct i915_request *request) { - if (likely(!i915_reset_handoff(&request->i915->gpu_error))) + struct i915_gpu_error *error = &request->i915->gpu_error; + + if (likely(!i915_reset_handoff(error))) return false; __set_current_state(TASK_RUNNING); - i915_reset(request->i915, 0); + i915_reset(request->i915, error->stalled_mask, error->reason); return true; } @@ -1373,38 +1403,30 @@ complete: return timeout; } -static void engine_retire_requests(struct intel_engine_cs *engine) +static void ring_retire_requests(struct intel_ring *ring) { struct i915_request *request, *next; - u32 seqno = intel_engine_get_seqno(engine); - LIST_HEAD(retire); - spin_lock_irq(&engine->timeline->lock); list_for_each_entry_safe(request, next, - &engine->timeline->requests, link) { - if (!i915_seqno_passed(seqno, request->global_seqno)) + &ring->request_list, ring_link) { + if (!i915_request_completed(request)) break; - list_move_tail(&request->link, &retire); - } - spin_unlock_irq(&engine->timeline->lock); - - list_for_each_entry_safe(request, next, &retire, link) i915_request_retire(request); + } } void i915_retire_requests(struct drm_i915_private *i915) { - struct intel_engine_cs *engine; - enum intel_engine_id id; + struct intel_ring *ring, *tmp; lockdep_assert_held(&i915->drm.struct_mutex); if (!i915->gt.active_requests) return; - for_each_engine(engine, i915, id) - engine_retire_requests(engine); + list_for_each_entry_safe(ring, tmp, &i915->gt.active_rings, active_link) + ring_retire_requests(ring); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7d6eb82..eddbd424 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,13 +28,16 @@ #include <linux/dma-fence.h> #include "i915_gem.h" +#include "i915_scheduler.h" #include "i915_sw_fence.h" +#include "i915_scheduler.h" #include <uapi/drm/i915_drm.h> struct drm_file; struct drm_i915_gem_object; struct i915_request; +struct i915_timeline; struct intel_wait { struct rb_node node; @@ -48,44 +51,6 @@ struct intel_signal_node { struct list_head link; }; -struct i915_dependency { - struct i915_priotree *signaler; - struct list_head signal_link; - struct list_head wait_link; - struct list_head dfs_link; - unsigned long flags; -#define I915_DEPENDENCY_ALLOC BIT(0) -}; - -/* - * "People assume that time is a strict progression of cause to effect, but - * actually, from a nonlinear, non-subjective viewpoint, it's more like a big - * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 - * - * Requests exist in a complex web of interdependencies. Each request - * has to wait for some other request to complete before it is ready to be run - * (e.g. we have to wait until the pixels have been rendering into a texture - * before we can copy from it). We track the readiness of a request in terms - * of fences, but we also need to keep the dependency tree for the lifetime - * of the request (beyond the life of an individual fence). We use the tree - * at various points to reorder the requests whilst keeping the requests - * in order with respect to their various dependencies. - */ -struct i915_priotree { - struct list_head signalers_list; /* those before us, we depend upon */ - struct list_head waiters_list; /* those after us, they depend upon us */ - struct list_head link; - int priority; -}; - -enum { - I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, - I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, - I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, - - I915_PRIORITY_INVALID = INT_MIN -}; - struct i915_capture_list { struct i915_capture_list *next; struct i915_vma *vma; @@ -131,7 +96,7 @@ struct i915_request { struct i915_gem_context *ctx; struct intel_engine_cs *engine; struct intel_ring *ring; - struct intel_timeline *timeline; + struct i915_timeline *timeline; struct intel_signal_node signaling; /* @@ -154,7 +119,7 @@ struct i915_request { * to retirement), i.e. bidirectional dependency information for the * request not tied to individual fences. */ - struct i915_priotree priotree; + struct i915_sched_node sched; struct i915_dependency dep; /** @@ -343,10 +308,10 @@ static inline bool i915_request_started(const struct i915_request *rq) seqno - 1); } -static inline bool i915_priotree_signaled(const struct i915_priotree *pt) +static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) { const struct i915_request *rq = - container_of(pt, const struct i915_request, priotree); + container_of(node, const struct i915_request, sched); return i915_request_completed(rq); } diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h new file mode 100644 index 0000000..70a4222 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef _I915_SCHEDULER_H_ +#define _I915_SCHEDULER_H_ + +#include <linux/bitops.h> + +#include <uapi/drm/i915_drm.h> + +enum { + I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, + I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, + I915_PRIORITY_MAX = I915_CONTEXT_MAX_USER_PRIORITY + 1, + + I915_PRIORITY_INVALID = INT_MIN +}; + +struct i915_sched_attr { + /** + * @priority: execution and service priority + * + * All clients are equal, but some are more equal than others! + * + * Requests from a context with a greater (more positive) value of + * @priority will be executed before those with a lower @priority + * value, forming a simple QoS. + * + * The &drm_i915_private.kernel_context is assigned the lowest priority. + */ + int priority; +}; + +/* + * "People assume that time is a strict progression of cause to effect, but + * actually, from a nonlinear, non-subjective viewpoint, it's more like a big + * ball of wibbly-wobbly, timey-wimey ... stuff." -The Doctor, 2015 + * + * Requests exist in a complex web of interdependencies. Each request + * has to wait for some other request to complete before it is ready to be run + * (e.g. we have to wait until the pixels have been rendering into a texture + * before we can copy from it). We track the readiness of a request in terms + * of fences, but we also need to keep the dependency tree for the lifetime + * of the request (beyond the life of an individual fence). We use the tree + * at various points to reorder the requests whilst keeping the requests + * in order with respect to their various dependencies. + * + * There is no active component to the "scheduler". As we know the dependency + * DAG of each request, we are able to insert it into a sorted queue when it + * is ready, and are able to reorder its portion of the graph to accommodate + * dynamic priority changes. + */ +struct i915_sched_node { + struct list_head signalers_list; /* those before us, we depend upon */ + struct list_head waiters_list; /* those after us, they depend upon us */ + struct list_head link; + struct i915_sched_attr attr; +}; + +struct i915_dependency { + struct i915_sched_node *signaler; + struct list_head signal_link; + struct list_head wait_link; + struct list_head dfs_link; + unsigned long flags; +#define I915_DEPENDENCY_ALLOC BIT(0) +}; + +#endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_timeline.c b/drivers/gpu/drm/i915/i915_timeline.c new file mode 100644 index 0000000..4667cc0 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_timeline.c @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2016-2018 Intel Corporation + */ + +#include "i915_drv.h" + +#include "i915_timeline.h" +#include "i915_syncmap.h" + +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *timeline, + const char *name) +{ + lockdep_assert_held(&i915->drm.struct_mutex); + + /* + * Ideally we want a set of engines on a single leaf as we expect + * to mostly be tracking synchronisation between engines. It is not + * a huge issue if this is not the case, but we may want to mitigate + * any page crossing penalties if they become an issue. + */ + BUILD_BUG_ON(KSYNCMAP < I915_NUM_ENGINES); + + timeline->name = name; + + list_add(&timeline->link, &i915->gt.timelines); + + /* Called during early_init before we know how many engines there are */ + + timeline->fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&timeline->lock); + + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); + + i915_syncmap_init(&timeline->sync); +} + +/** + * i915_timelines_park - called when the driver idles + * @i915: the drm_i915_private device + * + * When the driver is completely idle, we know that all of our sync points + * have been signaled and our tracking is then entirely redundant. Any request + * to wait upon an older sync point will be completed instantly as we know + * the fence is signaled and therefore we will not even look them up in the + * sync point map. + */ +void i915_timelines_park(struct drm_i915_private *i915) +{ + struct i915_timeline *timeline; + + lockdep_assert_held(&i915->drm.struct_mutex); + + list_for_each_entry(timeline, &i915->gt.timelines, link) { + /* + * All known fences are completed so we can scrap + * the current sync point tracking and start afresh, + * any attempt to wait upon a previous sync point + * will be skipped as the fence was signaled. + */ + i915_syncmap_free(&timeline->sync); + } +} + +void i915_timeline_fini(struct i915_timeline *timeline) +{ + GEM_BUG_ON(!list_empty(&timeline->requests)); + + i915_syncmap_free(&timeline->sync); + + list_del(&timeline->link); +} + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name) +{ + struct i915_timeline *timeline; + + timeline = kzalloc(sizeof(*timeline), GFP_KERNEL); + if (!timeline) + return ERR_PTR(-ENOMEM); + + i915_timeline_init(i915, timeline, name); + kref_init(&timeline->kref); + + return timeline; +} + +void __i915_timeline_free(struct kref *kref) +{ + struct i915_timeline *timeline = + container_of(kref, typeof(*timeline), kref); + + i915_timeline_fini(timeline); + kfree(timeline); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/mock_timeline.c" +#include "selftests/i915_timeline.c" +#endif diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.h b/drivers/gpu/drm/i915/i915_timeline.h index 33e01bf..dc2a463 100644 --- a/drivers/gpu/drm/i915/i915_gem_timeline.h +++ b/drivers/gpu/drm/i915/i915_timeline.h @@ -22,27 +22,20 @@ * */ -#ifndef I915_GEM_TIMELINE_H -#define I915_GEM_TIMELINE_H +#ifndef I915_TIMELINE_H +#define I915_TIMELINE_H #include <linux/list.h> +#include <linux/kref.h> #include "i915_request.h" #include "i915_syncmap.h" #include "i915_utils.h" -struct i915_gem_timeline; - -struct intel_timeline { +struct i915_timeline { u64 fence_context; u32 seqno; - /** - * Count of outstanding requests, from the time they are constructed - * to the moment they are retired. Loosely coupled to hardware. - */ - u32 inflight_seqnos; - spinlock_t lock; /** @@ -77,47 +70,57 @@ struct intel_timeline { */ u32 global_sync[I915_NUM_ENGINES]; - struct i915_gem_timeline *common; -}; - -struct i915_gem_timeline { struct list_head link; - - struct drm_i915_private *i915; const char *name; - struct intel_timeline engine[I915_NUM_ENGINES]; + struct kref kref; }; -int i915_gem_timeline_init(struct drm_i915_private *i915, - struct i915_gem_timeline *tl, - const char *name); -int i915_gem_timeline_init__global(struct drm_i915_private *i915); -void i915_gem_timelines_park(struct drm_i915_private *i915); -void i915_gem_timeline_fini(struct i915_gem_timeline *tl); +void i915_timeline_init(struct drm_i915_private *i915, + struct i915_timeline *tl, + const char *name); +void i915_timeline_fini(struct i915_timeline *tl); + +struct i915_timeline * +i915_timeline_create(struct drm_i915_private *i915, const char *name); -static inline int __intel_timeline_sync_set(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline struct i915_timeline * +i915_timeline_get(struct i915_timeline *timeline) +{ + kref_get(&timeline->kref); + return timeline; +} + +void __i915_timeline_free(struct kref *kref); +static inline void i915_timeline_put(struct i915_timeline *timeline) +{ + kref_put(&timeline->kref, __i915_timeline_free); +} + +static inline int __i915_timeline_sync_set(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_set(&tl->sync, context, seqno); } -static inline int intel_timeline_sync_set(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline int i915_timeline_sync_set(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_set(tl, fence->context, fence->seqno); + return __i915_timeline_sync_set(tl, fence->context, fence->seqno); } -static inline bool __intel_timeline_sync_is_later(struct intel_timeline *tl, - u64 context, u32 seqno) +static inline bool __i915_timeline_sync_is_later(struct i915_timeline *tl, + u64 context, u32 seqno) { return i915_syncmap_is_later(&tl->sync, context, seqno); } -static inline bool intel_timeline_sync_is_later(struct intel_timeline *tl, - const struct dma_fence *fence) +static inline bool i915_timeline_sync_is_later(struct i915_timeline *tl, + const struct dma_fence *fence) { - return __intel_timeline_sync_is_later(tl, fence->context, fence->seqno); + return __i915_timeline_sync_is_later(tl, fence->context, fence->seqno); } +void i915_timelines_park(struct drm_i915_private *i915); + #endif diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h index 408827b..8cc3a25 100644 --- a/drivers/gpu/drm/i915/i915_trace.h +++ b/drivers/gpu/drm/i915/i915_trace.h @@ -679,45 +679,68 @@ DEFINE_EVENT(i915_request, i915_request_execute, TP_ARGS(rq) ); -DECLARE_EVENT_CLASS(i915_request_hw, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port), - - TP_STRUCT__entry( - __field(u32, dev) - __field(u32, hw_id) - __field(u32, ring) - __field(u32, ctx) - __field(u32, seqno) - __field(u32, global_seqno) - __field(u32, port) - ), - - TP_fast_assign( - __entry->dev = rq->i915->drm.primary->index; - __entry->hw_id = rq->ctx->hw_id; - __entry->ring = rq->engine->id; - __entry->ctx = rq->fence.context; - __entry->seqno = rq->fence.seqno; - __entry->global_seqno = rq->global_seqno; - __entry->port = port; - ), - - TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u", - __entry->dev, __entry->hw_id, __entry->ring, - __entry->ctx, __entry->seqno, - __entry->global_seqno, __entry->port) -); +TRACE_EVENT(i915_request_in, + TP_PROTO(struct i915_request *rq, unsigned int port), + TP_ARGS(rq, port), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, port) + __field(u32, prio) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->prio = rq->sched.attr.priority; + __entry->port = port; + ), -DEFINE_EVENT(i915_request_hw, i915_request_in, - TP_PROTO(struct i915_request *rq, unsigned int port), - TP_ARGS(rq, port) + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, prio=%u, global=%u, port=%u", + __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx, + __entry->seqno, __entry->prio, __entry->global_seqno, + __entry->port) ); -DEFINE_EVENT(i915_request, i915_request_out, - TP_PROTO(struct i915_request *rq), - TP_ARGS(rq) +TRACE_EVENT(i915_request_out, + TP_PROTO(struct i915_request *rq), + TP_ARGS(rq), + + TP_STRUCT__entry( + __field(u32, dev) + __field(u32, hw_id) + __field(u32, ring) + __field(u32, ctx) + __field(u32, seqno) + __field(u32, global_seqno) + __field(u32, completed) + ), + + TP_fast_assign( + __entry->dev = rq->i915->drm.primary->index; + __entry->hw_id = rq->ctx->hw_id; + __entry->ring = rq->engine->id; + __entry->ctx = rq->fence.context; + __entry->seqno = rq->fence.seqno; + __entry->global_seqno = rq->global_seqno; + __entry->completed = i915_request_completed(rq); + ), + + TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, completed?=%u", + __entry->dev, __entry->hw_id, __entry->ring, + __entry->ctx, __entry->seqno, + __entry->global_seqno, __entry->completed) ); + #else #if !defined(TRACE_HEADER_MULTI_READ) static inline void @@ -811,42 +834,6 @@ DEFINE_EVENT(i915_request, i915_request_wait_end, TP_ARGS(rq) ); -TRACE_EVENT(i915_flip_request, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - -TRACE_EVENT(i915_flip_complete, - TP_PROTO(int plane, struct drm_i915_gem_object *obj), - - TP_ARGS(plane, obj), - - TP_STRUCT__entry( - __field(int, plane) - __field(struct drm_i915_gem_object *, obj) - ), - - TP_fast_assign( - __entry->plane = plane; - __entry->obj = obj; - ), - - TP_printk("plane=%d, obj=%p", __entry->plane, __entry->obj) -); - TRACE_EVENT_CONDITION(i915_reg_rw, TP_PROTO(bool write, i915_reg_t reg, u64 val, int len, bool trace), diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 51dbfe5..00165ad 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -40,8 +40,8 @@ #undef WARN_ON_ONCE #define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")") -#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \ - (long)(x), __func__) +#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \ + __stringify(x), (long)(x)) #if GCC_VERSION >= 70000 #define add_overflows(A, B) \ @@ -120,6 +120,12 @@ static inline u64 ptr_to_u64(const void *ptr) #include <linux/list.h> +static inline int list_is_first(const struct list_head *list, + const struct list_head *head) +{ + return head->next == list; +} + static inline void __list_del_many(struct list_head *head, struct list_head *first) { diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4bda3bd..9324d47 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -46,8 +46,6 @@ i915_vma_retire(struct i915_gem_active *active, struct i915_request *rq) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); - if (unlikely(i915_vma_is_closed(vma) && !i915_vma_is_pinned(vma))) - WARN_ON(i915_vma_unbind(vma)); GEM_BUG_ON(!i915_gem_object_is_active(obj)); if (--obj->active_count) @@ -232,7 +230,6 @@ i915_vma_instance(struct drm_i915_gem_object *obj, if (!vma) vma = vma_create(obj, vm, view); - GEM_BUG_ON(!IS_ERR(vma) && i915_vma_is_closed(vma)); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; @@ -684,13 +681,43 @@ err_unpin: return ret; } -static void i915_vma_destroy(struct i915_vma *vma) +void i915_vma_close(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + GEM_BUG_ON(i915_vma_is_closed(vma)); + vma->flags |= I915_VMA_CLOSED; + + /* + * We defer actually closing, unbinding and destroying the VMA until + * the next idle point, or if the object is freed in the meantime. By + * postponing the unbind, we allow for it to be resurrected by the + * client, avoiding the work required to rebind the VMA. This is + * advantageous for DRI, where the client/server pass objects + * between themselves, temporarily opening a local VMA to the + * object, and then closing it again. The same object is then reused + * on the next frame (or two, depending on the depth of the swap queue) + * causing us to rebind the VMA once more. This ends up being a lot + * of wasted work for the steady state. + */ + list_add_tail(&vma->closed_link, &vma->vm->i915->gt.closed_vma); +} + +void i915_vma_reopen(struct i915_vma *vma) +{ + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); + + if (vma->flags & I915_VMA_CLOSED) { + vma->flags &= ~I915_VMA_CLOSED; + list_del(&vma->closed_link); + } +} + +static void __i915_vma_destroy(struct i915_vma *vma) { int i; GEM_BUG_ON(vma->node.allocated); - GEM_BUG_ON(i915_vma_is_active(vma)); - GEM_BUG_ON(!i915_vma_is_closed(vma)); GEM_BUG_ON(vma->fence); for (i = 0; i < ARRAY_SIZE(vma->last_read); i++) @@ -699,6 +726,7 @@ static void i915_vma_destroy(struct i915_vma *vma) list_del(&vma->obj_link); list_del(&vma->vm_link); + rb_erase(&vma->obj_node, &vma->obj->vma_tree); if (!i915_vma_is_ggtt(vma)) i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm)); @@ -706,15 +734,30 @@ static void i915_vma_destroy(struct i915_vma *vma) kmem_cache_free(to_i915(vma->obj->base.dev)->vmas, vma); } -void i915_vma_close(struct i915_vma *vma) +void i915_vma_destroy(struct i915_vma *vma) { - GEM_BUG_ON(i915_vma_is_closed(vma)); - vma->flags |= I915_VMA_CLOSED; + lockdep_assert_held(&vma->vm->i915->drm.struct_mutex); - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + GEM_BUG_ON(i915_vma_is_active(vma)); + GEM_BUG_ON(i915_vma_is_pinned(vma)); + + if (i915_vma_is_closed(vma)) + list_del(&vma->closed_link); + + WARN_ON(i915_vma_unbind(vma)); + __i915_vma_destroy(vma); +} + +void i915_vma_parked(struct drm_i915_private *i915) +{ + struct i915_vma *vma, *next; - if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma)) - WARN_ON(i915_vma_unbind(vma)); + list_for_each_entry_safe(vma, next, &i915->gt.closed_vma, closed_link) { + GEM_BUG_ON(!i915_vma_is_closed(vma)); + i915_vma_destroy(vma); + } + + GEM_BUG_ON(!list_empty(&i915->gt.closed_vma)); } static void __i915_vma_iounmap(struct i915_vma *vma) @@ -804,7 +847,7 @@ int i915_vma_unbind(struct i915_vma *vma) return -EBUSY; if (!drm_mm_node_allocated(&vma->node)) - goto destroy; + return 0; GEM_BUG_ON(obj->bind_count == 0); GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); @@ -841,10 +884,6 @@ int i915_vma_unbind(struct i915_vma *vma) i915_vma_remove(vma); -destroy: - if (unlikely(i915_vma_is_closed(vma))) - i915_vma_destroy(vma); - return 0; } diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 8c50220..fc4294c 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -119,6 +119,8 @@ struct i915_vma { /** This vma's place in the eviction list */ struct list_head evict_link; + struct list_head closed_link; + /** * Used for performing relocations during execbuffer insertion. */ @@ -285,6 +287,8 @@ void i915_vma_revoke_mmap(struct i915_vma *vma); int __must_check i915_vma_unbind(struct i915_vma *vma); void i915_vma_unlink_ctx(struct i915_vma *vma); void i915_vma_close(struct i915_vma *vma); +void i915_vma_reopen(struct i915_vma *vma); +void i915_vma_destroy(struct i915_vma *vma); int __i915_vma_do_pin(struct i915_vma *vma, u64 size, u64 alignment, u64 flags); @@ -408,6 +412,8 @@ i915_vma_unpin_fence(struct i915_vma *vma) __i915_vma_unpin_fence(vma); } +void i915_vma_parked(struct drm_i915_private *i915); + #define for_each_until(cond) if (cond) break; else /** diff --git a/drivers/gpu/drm/i915/intel_atomic.c b/drivers/gpu/drm/i915/intel_atomic.c index e9fb6920..40285d1 100644 --- a/drivers/gpu/drm/i915/intel_atomic.c +++ b/drivers/gpu/drm/i915/intel_atomic.c @@ -227,6 +227,7 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; struct drm_atomic_state *drm_state = crtc_state->base.state; + struct intel_atomic_state *intel_state = to_intel_atomic_state(drm_state); int num_scalers_need; int i, j; @@ -304,8 +305,8 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, continue; } - plane_state = intel_atomic_get_existing_plane_state(drm_state, - intel_plane); + plane_state = intel_atomic_get_new_plane_state(intel_state, + intel_plane); scaler_id = &plane_state->scaler_id; } @@ -328,8 +329,18 @@ int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, } /* set scaler mode */ - if (IS_GEMINILAKE(dev_priv) || IS_CANNONLAKE(dev_priv)) { - scaler_state->scalers[*scaler_id].mode = 0; + if ((INTEL_GEN(dev_priv) >= 9) && + plane_state && plane_state->base.fb && + plane_state->base.fb->format->format == + DRM_FORMAT_NV12) { + if (INTEL_GEN(dev_priv) == 9 && + !IS_GEMINILAKE(dev_priv) && + !IS_SKYLAKE(dev_priv)) + scaler_state->scalers[*scaler_id].mode = + SKL_PS_SCALER_MODE_NV12; + else + scaler_state->scalers[*scaler_id].mode = + PS_SCALER_MODE_PLANAR; } else if (num_scalers_need == 1 && intel_crtc->pipe != PIPE_C) { /* * when only 1 scaler is in use on either pipe A or B, diff --git a/drivers/gpu/drm/i915/intel_atomic_plane.c b/drivers/gpu/drm/i915/intel_atomic_plane.c index 7481ce8..6d06878 100644 --- a/drivers/gpu/drm/i915/intel_atomic_plane.c +++ b/drivers/gpu/drm/i915/intel_atomic_plane.c @@ -183,11 +183,16 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_ } /* FIXME pre-g4x don't work like this */ - if (intel_state->base.visible) + if (state->visible) crtc_state->active_planes |= BIT(intel_plane->id); else crtc_state->active_planes &= ~BIT(intel_plane->id); + if (state->visible && state->fb->format->format == DRM_FORMAT_NV12) + crtc_state->nv12_planes |= BIT(intel_plane->id); + else + crtc_state->nv12_planes &= ~BIT(intel_plane->id); + return intel_plane_atomic_calc_changes(old_crtc_state, &crtc_state->base, old_plane_state, diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 709d6ca..3ea566f 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -729,7 +729,7 @@ static void i915_audio_component_codec_wake_override(struct device *kdev, struct drm_i915_private *dev_priv = kdev_to_i915(kdev); u32 tmp; - if (!IS_GEN9_BC(dev_priv)) + if (!IS_GEN9(dev_priv)) return; i915_audio_component_get_power(kdev); diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index c5c7530..54270bd 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -530,6 +530,7 @@ parse_driver_features(struct drm_i915_private *dev_priv, */ if (!driver->drrs_enabled) dev_priv->vbt.drrs_type = DRRS_NOT_SUPPORTED; + dev_priv->vbt.psr.enable = driver->psr_enabled; } static void @@ -1215,10 +1216,8 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, { struct child_device_config *it, *child = NULL; struct ddi_vbt_port_info *info = &dev_priv->vbt.ddi_port_info[port]; - uint8_t hdmi_level_shift; int i, j; bool is_dvi, is_hdmi, is_dp, is_edp, is_crt; - uint8_t aux_channel, ddc_pin; /* Each DDI port can have more than one value on the "DVO Port" field, * so look for all the possible values for each port. */ @@ -1255,9 +1254,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, if (!child) return; - aux_channel = child->aux_channel; - ddc_pin = child->ddc_pin; - is_dvi = child->device_type & DEVICE_TYPE_TMDS_DVI_SIGNALING; is_dp = child->device_type & DEVICE_TYPE_DISPLAYPORT_OUTPUT; is_crt = child->device_type & DEVICE_TYPE_ANALOG_OUTPUT; @@ -1271,13 +1267,6 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, is_hdmi = false; } - if (port == PORT_A && is_dvi) { - DRM_DEBUG_KMS("VBT claims port A supports DVI%s, ignoring\n", - is_hdmi ? "/HDMI" : ""); - is_dvi = false; - is_hdmi = false; - } - info->supports_dvi = is_dvi; info->supports_hdmi = is_hdmi; info->supports_dp = is_dp; @@ -1303,20 +1292,28 @@ static void parse_ddi_port(struct drm_i915_private *dev_priv, enum port port, DRM_DEBUG_KMS("Port %c is internal DP\n", port_name(port)); if (is_dvi) { - info->alternate_ddc_pin = map_ddc_pin(dev_priv, ddc_pin); + u8 ddc_pin; - sanitize_ddc_pin(dev_priv, port); + ddc_pin = map_ddc_pin(dev_priv, child->ddc_pin); + if (intel_gmbus_is_valid_pin(dev_priv, ddc_pin)) { + info->alternate_ddc_pin = ddc_pin; + sanitize_ddc_pin(dev_priv, port); + } else { + DRM_DEBUG_KMS("Port %c has invalid DDC pin %d, " + "sticking to defaults\n", + port_name(port), ddc_pin); + } } if (is_dp) { - info->alternate_aux_channel = aux_channel; + info->alternate_aux_channel = child->aux_channel; sanitize_aux_ch(dev_priv, port); } if (bdb_version >= 158) { /* The VBT HDMI level shift values match the table we have. */ - hdmi_level_shift = child->hdmi_level_shifter_value; + u8 hdmi_level_shift = child->hdmi_level_shifter_value; DRM_DEBUG_KMS("VBT HDMI level shift for port %c: %d\n", port_name(port), hdmi_level_shift); diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 1f79e7a..18e643d 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -82,7 +82,7 @@ static unsigned long wait_timeout(void) static noinline void missed_breadcrumb(struct intel_engine_cs *engine) { - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer(__func__); intel_engine_dump(engine, &p, @@ -130,11 +130,12 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t) static void intel_breadcrumbs_fake_irq(struct timer_list *t) { - struct intel_engine_cs *engine = from_timer(engine, t, - breadcrumbs.fake_irq); + struct intel_engine_cs *engine = + from_timer(engine, t, breadcrumbs.fake_irq); struct intel_breadcrumbs *b = &engine->breadcrumbs; - /* The timer persists in case we cannot enable interrupts, + /* + * The timer persists in case we cannot enable interrupts, * or if we have previously seen seqno/interrupt incoherency * ("missed interrupt" syndrome, better known as a "missed breadcrumb"). * Here the worker will wake up every jiffie in order to kick the @@ -148,6 +149,12 @@ static void intel_breadcrumbs_fake_irq(struct timer_list *t) if (!b->irq_armed) return; + /* If the user has disabled the fake-irq, restore the hangchecking */ + if (!test_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings)) { + mod_timer(&b->hangcheck, wait_timeout()); + return; + } + mod_timer(&b->fake_irq, jiffies + 1); } @@ -730,10 +737,11 @@ static void insert_signal(struct intel_breadcrumbs *b, list_add(&request->signaling.link, &iter->signaling.link); } -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; + struct intel_wait *wait = &request->signaling.wait; u32 seqno; /* @@ -750,12 +758,12 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) seqno = i915_request_global_seqno(request); if (!seqno) /* will be enabled later upon execution */ - return; + return true; - GEM_BUG_ON(request->signaling.wait.seqno); - request->signaling.wait.tsk = b->signaler; - request->signaling.wait.request = request; - request->signaling.wait.seqno = seqno; + GEM_BUG_ON(wait->seqno); + wait->tsk = b->signaler; + wait->request = request; + wait->seqno = seqno; /* * Add ourselves into the list of waiters, but registering our @@ -768,11 +776,15 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) */ spin_lock(&b->rb_lock); insert_signal(b, request, seqno); - wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); + wakeup &= __intel_engine_add_wait(engine, wait); spin_unlock(&b->rb_lock); - if (wakeup) + if (wakeup) { wake_up_process(b->signaler); + return !intel_wait_complete(wait); + } + + return true; } void intel_engine_cancel_signaling(struct i915_request *request) @@ -826,8 +838,8 @@ static void cancel_fake_irq(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; + del_timer_sync(&b->fake_irq); /* may queue b->hangcheck */ del_timer_sync(&b->hangcheck); - del_timer_sync(&b->fake_irq); clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); } @@ -835,15 +847,22 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) { struct intel_breadcrumbs *b = &engine->breadcrumbs; - cancel_fake_irq(engine); spin_lock_irq(&b->irq_lock); + /* + * Leave the fake_irq timer enabled (if it is running), but clear the + * bit so that it turns itself off on its next wake up and goes back + * to the long hangcheck interval if still required. + */ + clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings); + if (b->irq_enabled) irq_enable(engine); else irq_disable(engine); - /* We set the IRQ_BREADCRUMB bit when we enable the irq presuming the + /* + * We set the IRQ_BREADCRUMB bit when we enable the irq presuming the * GPU is active and may have already executed the MI_USER_INTERRUPT * before the CPU is ready to receive. However, the engine is currently * idle (we haven't started it yet), there is no possibility for a @@ -852,9 +871,6 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine) */ clear_bit(ENGINE_IRQ_BREADCRUMB, &engine->irq_posted); - if (b->irq_armed) - enable_fake_irq(b); - spin_unlock_irq(&b->irq_lock); } diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c index dc7db8a..704ddb4 100644 --- a/drivers/gpu/drm/i915/intel_cdclk.c +++ b/drivers/gpu/drm/i915/intel_cdclk.c @@ -1626,7 +1626,7 @@ static void cnl_cdclk_pll_disable(struct drm_i915_private *dev_priv) /* Timeout 200us */ if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) == 0, 1)) - DRM_ERROR("timout waiting for CDCLK PLL unlock\n"); + DRM_ERROR("timeout waiting for CDCLK PLL unlock\n"); dev_priv->cdclk.hw.vco = 0; } @@ -1644,7 +1644,7 @@ static void cnl_cdclk_pll_enable(struct drm_i915_private *dev_priv, int vco) /* Timeout 200us */ if (wait_for((I915_READ(BXT_DE_PLL_ENABLE) & BXT_DE_PLL_LOCK) != 0, 1)) - DRM_ERROR("timout waiting for CDCLK PLL lock\n"); + DRM_ERROR("timeout waiting for CDCLK PLL lock\n"); dev_priv->cdclk.hw.vco = vco; } @@ -2140,10 +2140,22 @@ int intel_crtc_compute_min_cdclk(const struct intel_crtc_state *crtc_state) } } - /* According to BSpec, "The CD clock frequency must be at least twice + /* + * According to BSpec, "The CD clock frequency must be at least twice * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default. + * + * FIXME: Check the actual, not default, BCLK being used. + * + * FIXME: This does not depend on ->has_audio because the higher CDCLK + * is required for audio probe, also when there are no audio capable + * displays connected at probe time. This leads to unnecessarily high + * CDCLK when audio is not required. + * + * FIXME: This limit is only applied when there are displays connected + * at probe time. If we probe without displays, we'll still end up using + * the platform minimum CDCLK, failing audio probe. */ - if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) + if (INTEL_GEN(dev_priv) >= 9) min_cdclk = max(2 * 96000, min_cdclk); /* @@ -2290,9 +2302,44 @@ static int bdw_modeset_calc_cdclk(struct drm_atomic_state *state) return 0; } +static int skl_dpll0_vco(struct intel_atomic_state *intel_state) +{ + struct drm_i915_private *dev_priv = to_i915(intel_state->base.dev); + struct intel_crtc *crtc; + struct intel_crtc_state *crtc_state; + int vco, i; + + vco = intel_state->cdclk.logical.vco; + if (!vco) + vco = dev_priv->skl_preferred_vco_freq; + + for_each_new_intel_crtc_in_state(intel_state, crtc, crtc_state, i) { + if (!crtc_state->base.enable) + continue; + + if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_EDP)) + continue; + + /* + * DPLL0 VCO may need to be adjusted to get the correct + * clock for eDP. This will affect cdclk as well. + */ + switch (crtc_state->port_clock / 2) { + case 108000: + case 216000: + vco = 8640000; + break; + default: + vco = 8100000; + break; + } + } + + return vco; +} + static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) { - struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); int min_cdclk, cdclk, vco; @@ -2300,9 +2347,7 @@ static int skl_modeset_calc_cdclk(struct drm_atomic_state *state) if (min_cdclk < 0) return min_cdclk; - vco = intel_state->cdclk.logical.vco; - if (!vco) - vco = dev_priv->skl_preferred_vco_freq; + vco = skl_dpll0_vco(intel_state); /* * FIXME should also account for plane ratio diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index c0a8805..de0e223 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -748,6 +748,11 @@ intel_crt_detect(struct drm_connector *connector, connector->base.id, connector->name, force); + if (i915_modparams.load_detect_test) { + intel_display_power_get(dev_priv, intel_encoder->power_domain); + goto load_detect; + } + /* Skip machines without VGA that falsely report hotplug events */ if (dmi_check_system(intel_spurious_crt_detect)) return connector_status_disconnected; @@ -776,11 +781,12 @@ intel_crt_detect(struct drm_connector *connector, * broken monitor (without edid) to work behind a broken kvm (that fails * to have the right resistors for HP detection) needs to fix this up. * For now just bail out. */ - if (I915_HAS_HOTPLUG(dev_priv) && !i915_modparams.load_detect_test) { + if (I915_HAS_HOTPLUG(dev_priv)) { status = connector_status_disconnected; goto out; } +load_detect: if (!force) { status = connector->status; goto out; diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 41e6c75..cf9b600 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -35,6 +35,7 @@ */ #define I915_CSR_GLK "i915/glk_dmc_ver1_04.bin" +MODULE_FIRMWARE(I915_CSR_GLK); #define GLK_CSR_VERSION_REQUIRED CSR_VERSION(1, 4) #define I915_CSR_CNL "i915/cnl_dmc_ver1_07.bin" @@ -297,7 +298,10 @@ static uint32_t *parse_csr_fw(struct drm_i915_private *dev_priv, csr->version = css_header->version; - if (IS_CANNONLAKE(dev_priv)) { + if (csr->fw_path == i915_modparams.dmc_firmware_path) { + /* Bypass version check for firmware override. */ + required_version = csr->version; + } else if (IS_CANNONLAKE(dev_priv)) { required_version = CNL_CSR_VERSION_REQUIRED; } else if (IS_GEMINILAKE(dev_priv)) { required_version = GLK_CSR_VERSION_REQUIRED; @@ -452,7 +456,9 @@ void intel_csr_ucode_init(struct drm_i915_private *dev_priv) if (!HAS_CSR(dev_priv)) return; - if (IS_CANNONLAKE(dev_priv)) + if (i915_modparams.dmc_firmware_path) + csr->fw_path = i915_modparams.dmc_firmware_path; + else if (IS_CANNONLAKE(dev_priv)) csr->fw_path = I915_CSR_CNL; else if (IS_GEMINILAKE(dev_priv)) csr->fw_path = I915_CSR_GLK; diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 8c2d778..b98ac054 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -493,6 +493,125 @@ static const struct cnl_ddi_buf_trans cnl_ddi_translations_edp_1_05V[] = { { 0x2, 0x7F, 0x3F, 0x00, 0x00 }, /* 400 400 0.0 */ }; +struct icl_combo_phy_ddi_buf_trans { + u32 dw2_swing_select; + u32 dw2_swing_scalar; + u32 dw4_scaling; +}; + +/* Voltage Swing Programming for VccIO 0.85V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_85V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0xB, 0x70, 0x0018 }, /* 600 0.0 */ + { 0xB, 0x70, 0x3015 }, /* 600 3.5 */ + { 0xB, 0x70, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x00, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x00, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 0.85V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_85V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 0.95V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_0_95V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ + { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ + { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x76, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x76, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 0.95V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_0_95V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +/* Voltage Swing Programming for VccIO 1.05V for DP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_dp_hdmi_1_05V[] = { + /* Voltage mV db */ + { 0x2, 0x98, 0x0018 }, /* 400 0.0 */ + { 0x2, 0x98, 0x3015 }, /* 400 3.5 */ + { 0x2, 0x98, 0x6012 }, /* 400 6.0 */ + { 0x2, 0x98, 0x900F }, /* 400 9.5 */ + { 0x4, 0x98, 0x0018 }, /* 600 0.0 */ + { 0x4, 0x98, 0x3015 }, /* 600 3.5 */ + { 0x4, 0x98, 0x6012 }, /* 600 6.0 */ + { 0x5, 0x71, 0x0018 }, /* 800 0.0 */ + { 0x5, 0x71, 0x3015 }, /* 800 3.5 */ + { 0x6, 0x98, 0x0018 }, /* 1200 0.0 */ +}; + +/* FIXME - After table is updated in Bspec */ +/* Voltage Swing Programming for VccIO 1.05V for eDP */ +static const struct icl_combo_phy_ddi_buf_trans icl_combo_phy_ddi_translations_edp_1_05V[] = { + /* Voltage mV db */ + { 0x0, 0x00, 0x00 }, /* 200 0.0 */ + { 0x0, 0x00, 0x00 }, /* 200 1.5 */ + { 0x0, 0x00, 0x00 }, /* 200 4.0 */ + { 0x0, 0x00, 0x00 }, /* 200 6.0 */ + { 0x0, 0x00, 0x00 }, /* 250 0.0 */ + { 0x0, 0x00, 0x00 }, /* 250 1.5 */ + { 0x0, 0x00, 0x00 }, /* 250 4.0 */ + { 0x0, 0x00, 0x00 }, /* 300 0.0 */ + { 0x0, 0x00, 0x00 }, /* 300 1.5 */ + { 0x0, 0x00, 0x00 }, /* 350 0.0 */ +}; + +struct icl_mg_phy_ddi_buf_trans { + u32 cri_txdeemph_override_5_0; + u32 cri_txdeemph_override_11_6; + u32 cri_txdeemph_override_17_12; +}; + +static const struct icl_mg_phy_ddi_buf_trans icl_mg_phy_ddi_translations[] = { + /* Voltage swing pre-emphasis */ + { 0x0, 0x1B, 0x00 }, /* 0 0 */ + { 0x0, 0x23, 0x08 }, /* 0 1 */ + { 0x0, 0x2D, 0x12 }, /* 0 2 */ + { 0x0, 0x00, 0x00 }, /* 0 3 */ + { 0x0, 0x23, 0x00 }, /* 1 0 */ + { 0x0, 0x2B, 0x09 }, /* 1 1 */ + { 0x0, 0x2E, 0x11 }, /* 1 2 */ + { 0x0, 0x2F, 0x00 }, /* 2 0 */ + { 0x0, 0x33, 0x0C }, /* 2 1 */ + { 0x0, 0x00, 0x00 }, /* 3 0 */ +}; + static const struct ddi_buf_trans * bdw_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) { @@ -751,6 +870,45 @@ cnl_get_buf_trans_edp(struct drm_i915_private *dev_priv, int *n_entries) } } +static const struct icl_combo_phy_ddi_buf_trans * +icl_get_combo_buf_trans(struct drm_i915_private *dev_priv, enum port port, + int type, int *n_entries) +{ + u32 voltage = I915_READ(ICL_PORT_COMP_DW3(port)) & VOLTAGE_INFO_MASK; + + if (type == INTEL_OUTPUT_EDP && dev_priv->vbt.edp.low_vswing) { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_85V); + return icl_combo_phy_ddi_translations_edp_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_0_95V); + return icl_combo_phy_ddi_translations_edp_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_edp_1_05V); + return icl_combo_phy_ddi_translations_edp_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } else { + switch (voltage) { + case VOLTAGE_INFO_0_85V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_85V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_85V; + case VOLTAGE_INFO_0_95V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_0_95V); + return icl_combo_phy_ddi_translations_dp_hdmi_0_95V; + case VOLTAGE_INFO_1_05V: + *n_entries = ARRAY_SIZE(icl_combo_phy_ddi_translations_dp_hdmi_1_05V); + return icl_combo_phy_ddi_translations_dp_hdmi_1_05V; + default: + MISSING_CASE(voltage); + return NULL; + } + } +} + static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) { int n_entries, level, default_entry; @@ -875,7 +1033,7 @@ static void intel_wait_ddi_buf_idle(struct drm_i915_private *dev_priv, static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) { - switch (pll->id) { + switch (pll->info->id) { case DPLL_ID_WRPLL1: return PORT_CLK_SEL_WRPLL1; case DPLL_ID_WRPLL2: @@ -889,11 +1047,30 @@ static uint32_t hsw_pll_to_ddi_pll_sel(const struct intel_shared_dpll *pll) case DPLL_ID_LCPLL_2700: return PORT_CLK_SEL_LCPLL_2700; default: - MISSING_CASE(pll->id); + MISSING_CASE(pll->info->id); return PORT_CLK_SEL_NONE; } } +static uint32_t icl_pll_to_ddi_pll_sel(struct intel_encoder *encoder, + const struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return DDI_CLK_SEL_NONE; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return DDI_CLK_SEL_MG; + } +} + /* Starting with Haswell, different DDI ports can work in FDI mode for * connection to the PCH-located connectors. For this, it is necessary to train * both the DDI port and PCH receiver for the desired DDI buffer settings. @@ -1906,7 +2083,13 @@ u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder) enum port port = encoder->port; int n_entries; - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port == PORT_A || port == PORT_B) + icl_get_combo_buf_trans(dev_priv, port, encoder->type, + &n_entries); + else + n_entries = ARRAY_SIZE(icl_mg_phy_ddi_translations); + } else if (IS_CANNONLAKE(dev_priv)) { if (encoder->type == INTEL_OUTPUT_EDP) cnl_get_buf_trans_edp(dev_priv, &n_entries); else @@ -2063,6 +2246,146 @@ static void cnl_ddi_vswing_sequence(struct intel_encoder *encoder, I915_WRITE(CNL_PORT_TX_DW5_GRP(port), val); } +static void icl_ddi_combo_vswing_program(struct drm_i915_private *dev_priv, + u32 level, enum port port, int type) +{ + const struct icl_combo_phy_ddi_buf_trans *ddi_translations = NULL; + u32 n_entries, val; + int ln; + + ddi_translations = icl_get_combo_buf_trans(dev_priv, port, type, + &n_entries); + if (!ddi_translations) + return; + + if (level >= n_entries) { + DRM_DEBUG_KMS("DDI translation not found for level %d. Using %d instead.", level, n_entries - 1); + level = n_entries - 1; + } + + /* Set PORT_TX_DW5 Rterm Sel to 110b. */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~RTERM_SELECT_MASK; + val |= RTERM_SELECT(0x6); + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW5 */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + /* Set DisableTap2 and DisableTap3 if MIPI DSI + * Clear DisableTap2 and DisableTap3 for all other Ports + */ + if (type == INTEL_OUTPUT_DSI) { + val |= TAP2_DISABLE; + val |= TAP3_DISABLE; + } else { + val &= ~TAP2_DISABLE; + val &= ~TAP3_DISABLE; + } + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* Program PORT_TX_DW2 */ + val = I915_READ(ICL_PORT_TX_DW2_LN0(port)); + val &= ~(SWING_SEL_LOWER_MASK | SWING_SEL_UPPER_MASK | + RCOMP_SCALAR_MASK); + val |= SWING_SEL_UPPER(ddi_translations[level].dw2_swing_select); + val |= SWING_SEL_LOWER(ddi_translations[level].dw2_swing_select); + /* Program Rcomp scalar for every table entry */ + val |= RCOMP_SCALAR(ddi_translations[level].dw2_swing_scalar); + I915_WRITE(ICL_PORT_TX_DW2_GRP(port), val); + + /* Program PORT_TX_DW4 */ + /* We cannot write to GRP. It would overwrite individual loadgen. */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~(POST_CURSOR_1_MASK | POST_CURSOR_2_MASK | + CURSOR_COEFF_MASK); + val |= ddi_translations[level].dw4_scaling; + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } +} + +static void icl_combo_phy_ddi_vswing_sequence(struct intel_encoder *encoder, + u32 level, + enum intel_output_type type) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + enum port port = encoder->port; + int width = 0; + int rate = 0; + u32 val; + int ln = 0; + + if (type == INTEL_OUTPUT_HDMI) { + width = 4; + /* Rate is always < than 6GHz for HDMI */ + } else { + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + + width = intel_dp->lane_count; + rate = intel_dp->link_rate; + } + + /* + * 1. If port type is eDP or DP, + * set PORT_PCS_DW1 cmnkeeper_enable to 1b, + * else clear to 0b. + */ + val = I915_READ(ICL_PORT_PCS_DW1_LN0(port)); + if (type == INTEL_OUTPUT_HDMI) + val &= ~COMMON_KEEPER_EN; + else + val |= COMMON_KEEPER_EN; + I915_WRITE(ICL_PORT_PCS_DW1_GRP(port), val); + + /* 2. Program loadgen select */ + /* + * Program PORT_TX_DW4_LN depending on Bit rate and used lanes + * <= 6 GHz and 4 lanes (LN0=0, LN1=1, LN2=1, LN3=1) + * <= 6 GHz and 1,2 lanes (LN0=0, LN1=1, LN2=1, LN3=0) + * > 6 GHz (LN0=0, LN1=0, LN2=0, LN3=0) + */ + for (ln = 0; ln <= 3; ln++) { + val = I915_READ(ICL_PORT_TX_DW4_LN(port, ln)); + val &= ~LOADGEN_SELECT; + + if ((rate <= 600000 && width == 4 && ln >= 1) || + (rate <= 600000 && width < 4 && (ln == 1 || ln == 2))) { + val |= LOADGEN_SELECT; + } + I915_WRITE(ICL_PORT_TX_DW4_LN(port, ln), val); + } + + /* 3. Set PORT_CL_DW5 SUS Clock Config to 11b */ + val = I915_READ(ICL_PORT_CL_DW5(port)); + val |= SUS_CLOCK_CONFIG; + I915_WRITE(ICL_PORT_CL_DW5(port), val); + + /* 4. Clear training enable to change swing values */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val &= ~TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); + + /* 5. Program swing and de-emphasis */ + icl_ddi_combo_vswing_program(dev_priv, level, port, type); + + /* 6. Set training enable to trigger update */ + val = I915_READ(ICL_PORT_TX_DW5_LN0(port)); + val |= TX_TRAINING_EN; + I915_WRITE(ICL_PORT_TX_DW5_GRP(port), val); +} + +static void icl_ddi_vswing_sequence(struct intel_encoder *encoder, u32 level, + enum intel_output_type type) +{ + enum port port = encoder->port; + + if (port == PORT_A || port == PORT_B) + icl_combo_phy_ddi_vswing_sequence(encoder, level, type); + else + /* Not Implemented Yet */ + WARN_ON(1); +} + static uint32_t translate_signal_level(int signal_levels) { int i; @@ -2094,7 +2417,9 @@ u32 bxt_signal_levels(struct intel_dp *intel_dp) struct intel_encoder *encoder = &dport->base; int level = intel_ddi_dp_level(intel_dp); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2115,6 +2440,69 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) return DDI_BUF_TRANS_SELECT(level); } +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct intel_shared_dpll *pll = crtc_state->shared_dpll; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *conn_state; + struct drm_connector *conn; + int i; + + for_each_new_connector_in_state(old_state, conn, conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(conn_state->best_encoder); + enum port port = encoder->port; + uint32_t val; + + if (conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + + val = I915_READ(DPCLKA_CFGCR0_ICL); + WARN_ON((val & DPCLKA_CFGCR0_DDI_CLK_OFF(port)) == 0); + + if (port == PORT_A || port == PORT_B) { + val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + POSTING_READ(DPCLKA_CFGCR0_ICL); + } + + val &= ~DPCLKA_CFGCR0_DDI_CLK_OFF(port); + I915_WRITE(DPCLKA_CFGCR0_ICL, val); + + mutex_unlock(&dev_priv->dpll_lock); + } +} + +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct drm_connector_state *old_conn_state; + struct drm_connector *conn; + int i; + + for_each_old_connector_in_state(old_state, conn, old_conn_state, i) { + struct intel_encoder *encoder = + to_intel_encoder(old_conn_state->best_encoder); + enum port port = encoder->port; + + if (old_conn_state->crtc != crtc) + continue; + + mutex_lock(&dev_priv->dpll_lock); + I915_WRITE(DPCLKA_CFGCR0_ICL, + I915_READ(DPCLKA_CFGCR0_ICL) | + DPCLKA_CFGCR0_DDI_CLK_OFF(port)); + mutex_unlock(&dev_priv->dpll_lock); + } +} + static void intel_ddi_clk_select(struct intel_encoder *encoder, const struct intel_shared_dpll *pll) { @@ -2127,11 +2515,15 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, mutex_lock(&dev_priv->dpll_lock); - if (IS_CANNONLAKE(dev_priv)) { + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), + icl_pll_to_ddi_pll_sel(encoder, pll)); + } else if (IS_CANNONLAKE(dev_priv)) { /* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */ val = I915_READ(DPCLKA_CFGCR0); val &= ~DPCLKA_CFGCR0_DDI_CLK_SEL_MASK(port); - val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->id, port); + val |= DPCLKA_CFGCR0_DDI_CLK_SEL(pll->info->id, port); I915_WRITE(DPCLKA_CFGCR0, val); /* @@ -2148,7 +2540,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder, val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) | DPLL_CTRL2_DDI_CLK_SEL_MASK(port)); - val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) | + val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->info->id, port) | DPLL_CTRL2_DDI_SEL_OVERRIDE(port)); I915_WRITE(DPLL_CTRL2, val); @@ -2165,14 +2557,18 @@ static void intel_ddi_clk_disable(struct intel_encoder *encoder) struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); enum port port = encoder->port; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) { + if (port >= PORT_C) + I915_WRITE(DDI_CLK_SEL(port), DDI_CLK_SEL_NONE); + } else if (IS_CANNONLAKE(dev_priv)) { I915_WRITE(DPCLKA_CFGCR0, I915_READ(DPCLKA_CFGCR0) | DPCLKA_CFGCR0_DDI_CLK_OFF(port)); - else if (IS_GEN9_BC(dev_priv)) + } else if (IS_GEN9_BC(dev_priv)) { I915_WRITE(DPLL_CTRL2, I915_READ(DPLL_CTRL2) | DPLL_CTRL2_DDI_CLK_OFF(port)); - else if (INTEL_GEN(dev_priv) < 9) + } else if (INTEL_GEN(dev_priv) < 9) { I915_WRITE(PORT_CLK_SEL(port), PORT_CLK_SEL_NONE); + } } static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, @@ -2197,7 +2593,9 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, encoder->type); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, encoder->type); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, encoder->type); @@ -2205,7 +2603,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder, intel_prepare_dp_ddi_buffers(encoder, crtc_state); intel_ddi_init_dp_buf_reg(encoder); - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); intel_dp_start_link_train(intel_dp); if (port != PORT_A || INTEL_GEN(dev_priv) >= 9) intel_dp_stop_link_train(intel_dp); @@ -2227,7 +2626,9 @@ static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder, intel_display_power_get(dev_priv, dig_port->ddi_io_power_domain); - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + icl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); + else if (IS_CANNONLAKE(dev_priv)) cnl_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); else if (IS_GEN9_LP(dev_priv)) bxt_ddi_vswing_sequence(encoder, level, INTEL_OUTPUT_HDMI); @@ -2303,12 +2704,15 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder, struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); struct intel_dp *intel_dp = &dig_port->dp; + bool is_mst = intel_crtc_has_type(old_crtc_state, + INTEL_OUTPUT_DP_MST); /* * Power down sink before disabling the port, otherwise we end * up getting interrupts from the sink on detecting link loss. */ - intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); + if (!is_mst) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_disable_ddi_buf(encoder); @@ -2424,12 +2828,14 @@ static void intel_enable_ddi_hdmi(struct intel_encoder *encoder, { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_digital_port *dig_port = enc_to_dig_port(&encoder->base); + struct drm_connector *connector = conn_state->connector; enum port port = encoder->port; - intel_hdmi_handle_sink_scrambling(encoder, - conn_state->connector, - crtc_state->hdmi_high_tmds_clock_ratio, - crtc_state->hdmi_scrambling); + if (!intel_hdmi_handle_sink_scrambling(encoder, connector, + crtc_state->hdmi_high_tmds_clock_ratio, + crtc_state->hdmi_scrambling)) + DRM_ERROR("[CONNECTOR:%d:%s] Failed to configure sink scrambling/TMDS bit clock ratio\n", + connector->base.id, connector->name); /* Display WA #1143: skl,kbl,cfl */ if (IS_GEN9_BC(dev_priv)) { @@ -2520,13 +2926,16 @@ static void intel_disable_ddi_hdmi(struct intel_encoder *encoder, const struct intel_crtc_state *old_crtc_state, const struct drm_connector_state *old_conn_state) { + struct drm_connector *connector = old_conn_state->connector; + if (old_crtc_state->has_audio) intel_audio_codec_disable(encoder, old_crtc_state, old_conn_state); - intel_hdmi_handle_sink_scrambling(encoder, - old_conn_state->connector, - false, false); + if (!intel_hdmi_handle_sink_scrambling(encoder, connector, + false, false)) + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] Failed to reset sink scrambling/TMDS bit clock ratio\n", + connector->base.id, connector->name); } static void intel_disable_ddi(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index 3dd350f..0fd13df 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -83,11 +83,11 @@ static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) { int s; - drm_printf(p, "slice mask: %04x\n", sseu->slice_mask); - drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask)); + drm_printf(p, "slice total: %u, mask=%04x\n", + hweight8(sseu->slice_mask), sseu->slice_mask); drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu)); - for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) { - drm_printf(p, "slice%d %u subslices mask=%04x\n", + for (s = 0; s < sseu->max_slices; s++) { + drm_printf(p, "slice%d: %u subslices, mask=%04x\n", s, hweight8(sseu->subslice_mask[s]), sseu->subslice_mask[s]); } @@ -158,6 +158,45 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu) return total; } +static void gen11_sseu_info_init(struct drm_i915_private *dev_priv) +{ + struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; + u8 s_en; + u32 ss_en, ss_en_mask; + u8 eu_en; + int s; + + sseu->max_slices = 1; + sseu->max_subslices = 8; + sseu->max_eus_per_subslice = 8; + + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK; + ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE); + ss_en_mask = BIT(sseu->max_subslices) - 1; + eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK); + + for (s = 0; s < sseu->max_slices; s++) { + if (s_en & BIT(s)) { + int ss_idx = sseu->max_subslices * s; + int ss; + + sseu->slice_mask |= BIT(s); + sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask; + for (ss = 0; ss < sseu->max_subslices; ss++) { + if (sseu->subslice_mask[s] & BIT(ss)) + sseu_set_eus(sseu, s, ss, eu_en); + } + } + } + sseu->eu_per_subslice = hweight8(eu_en); + sseu->eu_total = compute_eu_total(sseu); + + /* ICL has no power gating restrictions. */ + sseu->has_slice_pg = 1; + sseu->has_subslice_pg = 1; + sseu->has_eu_pg = 1; +} + static void gen10_sseu_info_init(struct drm_i915_private *dev_priv) { struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu; @@ -557,6 +596,52 @@ static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) return base_freq + frac_freq; } +static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, + u32 rpm_config_reg) +{ + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; + u32 crystal_clock = (rpm_config_reg & + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + +static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, + u32 rpm_config_reg) +{ + u32 f19_2_mhz = 19200; + u32 f24_mhz = 24000; + u32 f25_mhz = 25000; + u32 f38_4_mhz = 38400; + u32 crystal_clock = (rpm_config_reg & + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) { u32 f12_5_mhz = 12500; @@ -597,10 +682,9 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) } return freq; - } else if (INTEL_GEN(dev_priv) <= 10) { + } else if (INTEL_GEN(dev_priv) <= 11) { u32 ctc_reg = I915_READ(CTC_MODE); u32 freq = 0; - u32 rpm_config_reg = 0; /* First figure out the reference frequency. There are 2 ways * we can compute the frequency, either through the @@ -610,20 +694,14 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { freq = read_reference_ts_freq(dev_priv); } else { - u32 crystal_clock; - - rpm_config_reg = I915_READ(RPM_CONFIG0); - crystal_clock = (rpm_config_reg & - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - switch (crystal_clock) { - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - freq = f19_2_mhz; - break; - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - freq = f24_mhz; - break; - } + u32 rpm_config_reg = I915_READ(RPM_CONFIG0); + + if (INTEL_GEN(dev_priv) <= 10) + freq = gen10_get_crystal_clock_freq(dev_priv, + rpm_config_reg); + else + freq = gen11_get_crystal_clock_freq(dev_priv, + rpm_config_reg); /* Now figure out how the command stream's timestamp * register increments from this frequency (it might @@ -768,8 +846,10 @@ void intel_device_info_runtime_init(struct intel_device_info *info) broadwell_sseu_info_init(dev_priv); else if (INTEL_GEN(dev_priv) == 9) gen9_sseu_info_init(dev_priv); - else if (INTEL_GEN(dev_priv) >= 10) + else if (INTEL_GEN(dev_priv) == 10) gen10_sseu_info_init(dev_priv); + else if (INTEL_GEN(dev_priv) >= 11) + gen11_sseu_info_init(dev_priv); /* Initialize command stream timestamp frequency */ info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv); @@ -780,3 +860,50 @@ void intel_driver_caps_print(const struct intel_driver_caps *caps, { drm_printf(p, "scheduler: %x\n", caps->scheduler); } + +/* + * Determine which engines are fused off in our particular hardware. Since the + * fuse register is in the blitter powerwell, we need forcewake to be ready at + * this point (but later we need to prune the forcewake domains for engines that + * are indeed fused off). + */ +void intel_device_info_init_mmio(struct drm_i915_private *dev_priv) +{ + struct intel_device_info *info = mkwrite_device_info(dev_priv); + u8 vdbox_disable, vebox_disable; + u32 media_fuse; + int i; + + if (INTEL_GEN(dev_priv) < 11) + return; + + media_fuse = I915_READ(GEN11_GT_VEBOX_VDBOX_DISABLE); + + vdbox_disable = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; + vebox_disable = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> + GEN11_GT_VEBOX_DISABLE_SHIFT; + + DRM_DEBUG_DRIVER("vdbox disable: %04x\n", vdbox_disable); + for (i = 0; i < I915_MAX_VCS; i++) { + if (!HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + if (!(BIT(i) & vdbox_disable)) + continue; + + info->ring_mask &= ~ENGINE_MASK(_VCS(i)); + DRM_DEBUG_DRIVER("vcs%u fused off\n", i); + } + + DRM_DEBUG_DRIVER("vebox disable: %04x\n", vebox_disable); + for (i = 0; i < I915_MAX_VECS; i++) { + if (!HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + if (!(BIT(i) & vebox_disable)) + continue; + + info->ring_mask &= ~ENGINE_MASK(_VECS(i)); + DRM_DEBUG_DRIVER("vecs%u fused off\n", i); + } +} diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index 0835752..933e316 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -114,7 +114,7 @@ enum intel_platform { func(has_ipc); #define GEN_MAX_SLICES (6) /* CNL upper bound */ -#define GEN_MAX_SUBSLICES (7) +#define GEN_MAX_SUBSLICES (8) /* ICL upper bound */ struct sseu_dev_info { u8 slice_mask; @@ -247,6 +247,8 @@ void intel_device_info_dump_runtime(const struct intel_device_info *info, void intel_device_info_dump_topology(const struct sseu_dev_info *sseu, struct drm_printer *p); +void intel_device_info_init_mmio(struct drm_i915_private *dev_priv); + void intel_driver_caps_print(const struct intel_driver_caps *caps, struct drm_printer *p); diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 3b48fd2..ad588d5 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -88,6 +88,22 @@ static const uint32_t skl_primary_formats[] = { DRM_FORMAT_VYUY, }; +static const uint32_t skl_pri_planar_formats[] = { + DRM_FORMAT_C8, + DRM_FORMAT_RGB565, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_XRGB2101010, + DRM_FORMAT_XBGR2101010, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -488,6 +504,33 @@ static const struct intel_limit intel_limits_bxt = { .p2 = { .p2_slow = 1, .p2_fast = 20 }, }; +static void +skl_wa_528(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CHICKEN_PIPESL_1(pipe), HSW_FBCQ_DIS); + else + I915_WRITE(CHICKEN_PIPESL_1(pipe), 0); +} + +static void +skl_wa_clkgate(struct drm_i915_private *dev_priv, int pipe, bool enable) +{ + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return; + + if (enable) + I915_WRITE(CLKGATE_DIS_PSL(pipe), + DUPS1_GATING_DIS | DUPS2_GATING_DIS); + else + I915_WRITE(CLKGATE_DIS_PSL(pipe), + I915_READ(CLKGATE_DIS_PSL(pipe)) & + ~(DUPS1_GATING_DIS | DUPS2_GATING_DIS)); +} + static bool needs_modeset(const struct drm_crtc_state *state) { @@ -2657,11 +2700,13 @@ static int i9xx_format_to_fourcc(int format) } } -static int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) +int skl_format_to_fourcc(int format, bool rgb_order, bool alpha) { switch (format) { case PLANE_CTL_FORMAT_RGB_565: return DRM_FORMAT_RGB565; + case PLANE_CTL_FORMAT_NV12: + return DRM_FORMAT_NV12; default: case PLANE_CTL_FORMAT_XRGB_8888: if (rgb_order) { @@ -2824,7 +2869,7 @@ intel_find_initial_plane_obj(struct intel_crtc *intel_crtc, continue; if (intel_plane_ggtt_offset(state) == plane_config->base) { - fb = c->primary->fb; + fb = state->base.fb; drm_framebuffer_get(fb); goto valid_fb; } @@ -2858,6 +2903,9 @@ valid_fb: return; } + obj = intel_fb_obj(fb); + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + plane_state->src_x = 0; plane_state->src_y = 0; plane_state->src_w = fb->width << 16; @@ -2871,7 +2919,6 @@ valid_fb: intel_state->base.src = drm_plane_state_src(plane_state); intel_state->base.dst = drm_plane_state_dest(plane_state); - obj = intel_fb_obj(fb); if (i915_gem_object_is_tiled(obj)) dev_priv->preserve_bios_swizzle = true; @@ -3071,6 +3118,29 @@ static int skl_check_main_surface(const struct intel_crtc_state *crtc_state, return 0; } +static int +skl_check_nv12_surface(const struct intel_crtc_state *crtc_state, + struct intel_plane_state *plane_state) +{ + /* Display WA #1106 */ + if (plane_state->base.rotation != + (DRM_MODE_REFLECT_X | DRM_MODE_ROTATE_90) && + plane_state->base.rotation != DRM_MODE_ROTATE_270) + return 0; + + /* + * src coordinates are rotated here. + * We check height but report it as width + */ + if (((drm_rect_height(&plane_state->base.src) >> 16) % 4) != 0) { + DRM_DEBUG_KMS("src width must be multiple " + "of 4 for rotated NV12\n"); + return -EINVAL; + } + + return 0; +} + static int skl_check_nv12_aux_surface(struct intel_plane_state *plane_state) { const struct drm_framebuffer *fb = plane_state->base.fb; @@ -3154,6 +3224,9 @@ int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, * the main surface setup depends on it. */ if (fb->format->format == DRM_FORMAT_NV12) { + ret = skl_check_nv12_surface(crtc_state, plane_state); + if (ret) + return ret; ret = skl_check_nv12_aux_surface(plane_state); if (ret) return ret; @@ -3464,6 +3537,8 @@ static u32 skl_plane_ctl_format(uint32_t pixel_format) return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_UYVY; case DRM_FORMAT_VYUY: return PLANE_CTL_FORMAT_YUV422 | PLANE_CTL_YUV422_VYUY; + case DRM_FORMAT_NV12: + return PLANE_CTL_FORMAT_NV12; default: MISSING_CASE(pixel_format); } @@ -3602,15 +3677,24 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state, u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, const struct intel_plane_state *plane_state) { + struct drm_i915_private *dev_priv = + to_i915(plane_state->base.plane->dev); const struct drm_framebuffer *fb = plane_state->base.fb; u32 plane_color_ctl = 0; - plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; - plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + if (INTEL_GEN(dev_priv) < 11) { + plane_color_ctl |= PLANE_COLOR_PIPE_GAMMA_ENABLE; + plane_color_ctl |= PLANE_COLOR_PIPE_CSC_ENABLE; + } plane_color_ctl |= PLANE_COLOR_PLANE_GAMMA_DISABLE; plane_color_ctl |= glk_plane_color_ctl_alpha(fb->format->format); if (intel_format_is_yuv(fb->format->format)) { + if (fb->format->format == DRM_FORMAT_NV12) { + plane_color_ctl |= + PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; + goto out; + } if (plane_state->base.color_encoding == DRM_COLOR_YCBCR_BT709) plane_color_ctl |= PLANE_COLOR_CSC_MODE_YUV709_TO_RGB709; else @@ -3619,7 +3703,7 @@ u32 glk_plane_color_ctl(const struct intel_crtc_state *crtc_state, if (plane_state->base.color_range == DRM_COLOR_YCBCR_FULL_RANGE) plane_color_ctl |= PLANE_COLOR_YUV_RANGE_CORRECTION_DISABLE; } - +out: return plane_color_ctl; } @@ -3675,7 +3759,6 @@ void intel_prepare_reset(struct drm_i915_private *dev_priv) struct drm_atomic_state *state; int ret; - /* reset doesn't touch the display */ if (!i915_modparams.force_reset_modeset_test && !gpu_reset_clobbers_display(dev_priv)) @@ -3729,19 +3812,17 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) { struct drm_device *dev = &dev_priv->drm; struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; - struct drm_atomic_state *state = dev_priv->modeset_restore_state; + struct drm_atomic_state *state; int ret; /* reset doesn't touch the display */ - if (!i915_modparams.force_reset_modeset_test && - !gpu_reset_clobbers_display(dev_priv)) + if (!test_bit(I915_RESET_MODESET, &dev_priv->gpu_error.flags)) return; + state = fetch_and_zero(&dev_priv->modeset_restore_state); if (!state) goto unlock; - dev_priv->modeset_restore_state = NULL; - /* reset doesn't touch the display */ if (!gpu_reset_clobbers_display(dev_priv)) { /* for testing only restore the display */ @@ -4703,7 +4784,9 @@ static void cpt_verify_modeset(struct drm_device *dev, int pipe) static int skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, unsigned int scaler_user, int *scaler_id, - int src_w, int src_h, int dst_w, int dst_h) + int src_w, int src_h, int dst_w, int dst_h, + bool plane_scaler_check, + uint32_t pixel_format) { struct intel_crtc_scaler_state *scaler_state = &crtc_state->scaler_state; @@ -4721,6 +4804,10 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, */ need_scaling = src_w != dst_w || src_h != dst_h; + if (plane_scaler_check) + if (pixel_format == DRM_FORMAT_NV12) + need_scaling = true; + if (crtc_state->ycbcr420 && scaler_user == SKL_CRTC_INDEX) need_scaling = true; @@ -4760,12 +4847,21 @@ skl_update_scaler(struct intel_crtc_state *crtc_state, bool force_detach, return 0; } + if (plane_scaler_check && pixel_format == DRM_FORMAT_NV12 && + (src_h < SKL_MIN_YUV_420_SRC_H || src_w < SKL_MIN_YUV_420_SRC_W)) { + DRM_DEBUG_KMS("NV12: src dimensions not met\n"); + return -EINVAL; + } + /* range checks */ if (src_w < SKL_MIN_SRC_W || src_h < SKL_MIN_SRC_H || - dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H || - - src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H || - dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H) { + dst_w < SKL_MIN_DST_W || dst_h < SKL_MIN_DST_H || + (IS_GEN11(dev_priv) && + (src_w > ICL_MAX_SRC_W || src_h > ICL_MAX_SRC_H || + dst_w > ICL_MAX_DST_W || dst_h > ICL_MAX_DST_H)) || + (!IS_GEN11(dev_priv) && + (src_w > SKL_MAX_SRC_W || src_h > SKL_MAX_SRC_H || + dst_w > SKL_MAX_DST_W || dst_h > SKL_MAX_DST_H))) { DRM_DEBUG_KMS("scaler_user index %u.%u: src %ux%u dst %ux%u " "size is out of scaler range\n", intel_crtc->pipe, scaler_user, src_w, src_h, dst_w, dst_h); @@ -4796,9 +4892,10 @@ int skl_update_scaler_crtc(struct intel_crtc_state *state) const struct drm_display_mode *adjusted_mode = &state->base.adjusted_mode; return skl_update_scaler(state, !state->base.active, SKL_CRTC_INDEX, - &state->scaler_state.scaler_id, - state->pipe_src_w, state->pipe_src_h, - adjusted_mode->crtc_hdisplay, adjusted_mode->crtc_vdisplay); + &state->scaler_state.scaler_id, + state->pipe_src_w, state->pipe_src_h, + adjusted_mode->crtc_hdisplay, + adjusted_mode->crtc_vdisplay, false, 0); } /** @@ -4827,7 +4924,8 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, drm_rect_width(&plane_state->base.src) >> 16, drm_rect_height(&plane_state->base.src) >> 16, drm_rect_width(&plane_state->base.dst), - drm_rect_height(&plane_state->base.dst)); + drm_rect_height(&plane_state->base.dst), + fb ? true : false, fb ? fb->format->format : 0); if (ret || plane_state->scaler_id < 0) return ret; @@ -4853,6 +4951,7 @@ static int skl_update_scaler_plane(struct intel_crtc_state *crtc_state, case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: break; default: DRM_DEBUG_KMS("[PLANE:%d:%s] FB:%d unsupported scaling format 0x%x\n", @@ -5096,16 +5195,34 @@ static bool hsw_post_update_enable_ips(const struct intel_crtc_state *old_crtc_s return !old_crtc_state->ips_enabled; } +static bool needs_nv12_wa(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *crtc_state) +{ + if (!crtc_state->nv12_planes) + return false; + + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + + if ((INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) || + IS_CANNONLAKE(dev_priv)) + return true; + + return false; +} + static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) { struct intel_crtc *crtc = to_intel_crtc(old_crtc_state->base.crtc); + struct drm_device *dev = crtc->base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct intel_crtc_state *pipe_config = intel_atomic_get_new_crtc_state(to_intel_atomic_state(old_state), crtc); struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_pri_state = - drm_atomic_get_existing_plane_state(old_state, primary); + struct drm_plane_state *old_primary_state = + drm_atomic_get_old_plane_state(old_state, primary); intel_frontbuffer_flip(to_i915(crtc->base.dev), pipe_config->fb_bits); @@ -5115,20 +5232,24 @@ static void intel_post_plane_update(struct intel_crtc_state *old_crtc_state) if (hsw_post_update_enable_ips(old_crtc_state, pipe_config)) hsw_enable_ips(pipe_config); - if (old_pri_state) { - struct intel_plane_state *primary_state = - intel_atomic_get_new_plane_state(to_intel_atomic_state(old_state), - to_intel_plane(primary)); - struct intel_plane_state *old_primary_state = - to_intel_plane_state(old_pri_state); + if (old_primary_state) { + struct drm_plane_state *new_primary_state = + drm_atomic_get_new_plane_state(old_state, primary); intel_fbc_post_update(crtc); - if (primary_state->base.visible && + if (new_primary_state->visible && (needs_modeset(&pipe_config->base) || - !old_primary_state->base.visible)) + !old_primary_state->visible)) intel_post_enable_primary(&crtc->base, pipe_config); } + + /* Display WA 827 */ + if (needs_nv12_wa(dev_priv, old_crtc_state) && + !needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, false); + skl_wa_528(dev_priv, crtc->pipe, false); + } } static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, @@ -5139,8 +5260,8 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, struct drm_i915_private *dev_priv = to_i915(dev); struct drm_atomic_state *old_state = old_crtc_state->base.state; struct drm_plane *primary = crtc->base.primary; - struct drm_plane_state *old_pri_state = - drm_atomic_get_existing_plane_state(old_state, primary); + struct drm_plane_state *old_primary_state = + drm_atomic_get_old_plane_state(old_state, primary); bool modeset = needs_modeset(&pipe_config->base); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_state); @@ -5148,23 +5269,28 @@ static void intel_pre_plane_update(struct intel_crtc_state *old_crtc_state, if (hsw_pre_update_disable_ips(old_crtc_state, pipe_config)) hsw_disable_ips(old_crtc_state); - if (old_pri_state) { - struct intel_plane_state *primary_state = + if (old_primary_state) { + struct intel_plane_state *new_primary_state = intel_atomic_get_new_plane_state(old_intel_state, to_intel_plane(primary)); - struct intel_plane_state *old_primary_state = - to_intel_plane_state(old_pri_state); - intel_fbc_pre_update(crtc, pipe_config, primary_state); + intel_fbc_pre_update(crtc, pipe_config, new_primary_state); /* * Gen2 reports pipe underruns whenever all planes are disabled. * So disable underrun reporting before all the planes get disabled. */ - if (IS_GEN2(dev_priv) && old_primary_state->base.visible && - (modeset || !primary_state->base.visible)) + if (IS_GEN2(dev_priv) && old_primary_state->visible && + (modeset || !new_primary_state->base.visible)) intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, false); } + /* Display WA 827 */ + if (!needs_nv12_wa(dev_priv, old_crtc_state) && + needs_nv12_wa(dev_priv, pipe_config)) { + skl_wa_clkgate(dev_priv, crtc->pipe, true); + skl_wa_528(dev_priv, crtc->pipe, true); + } + /* * Vblank time updates from the shadow to live plane control register * are blocked if the memory self-refresh mode is active at that @@ -5499,6 +5625,9 @@ static void haswell_crtc_enable(struct intel_crtc_state *pipe_config, if (intel_crtc->config->shared_dpll) intel_enable_shared_dpll(intel_crtc); + if (INTEL_GEN(dev_priv) >= 11) + icl_map_plls_to_ports(crtc, pipe_config, old_state); + if (intel_crtc_has_dp_encoder(intel_crtc->config)) intel_dp_set_m_n(intel_crtc, M1_N1); @@ -5696,6 +5825,9 @@ static void haswell_crtc_disable(struct intel_crtc_state *old_crtc_state, intel_ddi_disable_pipe_clock(intel_crtc->config); intel_encoders_post_disable(crtc, old_crtc_state, old_state); + + if (INTEL_GEN(dev_priv) >= 11) + icl_unmap_plls_to_ports(crtc, old_crtc_state, old_state); } static void i9xx_pfit_enable(struct intel_crtc *crtc) @@ -8766,8 +8898,8 @@ static bool ironlake_get_pipe_config(struct intel_crtc *crtc, intel_get_shared_dpll_by_id(dev_priv, pll_id); pll = pipe_config->shared_dpll; - WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state)); + WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll, + &pipe_config->dpll_hw_state)); tmp = pipe_config->dpll_hw_state.dpll; pipe_config->pixel_multiplier = @@ -9243,8 +9375,8 @@ static void haswell_get_ddi_port_state(struct intel_crtc *crtc, pll = pipe_config->shared_dpll; if (pll) { - WARN_ON(!pll->funcs.get_hw_state(dev_priv, pll, - &pipe_config->dpll_hw_state)); + WARN_ON(!pll->info->funcs->get_hw_state(dev_priv, pll, + &pipe_config->dpll_hw_state)); } /* @@ -9974,6 +10106,8 @@ found: ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector)); if (!ret) ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc)); + if (!ret) + ret = drm_atomic_add_affected_planes(restore_state, crtc); if (ret) { DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret); goto fail; @@ -10773,7 +10907,7 @@ static bool check_digital_port_conflicts(struct drm_atomic_state *state) struct drm_connector_state *connector_state; struct intel_encoder *encoder; - connector_state = drm_atomic_get_existing_connector_state(state, connector); + connector_state = drm_atomic_get_new_connector_state(state, connector); if (!connector_state) connector_state = connector->state; @@ -11085,39 +11219,42 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, (current_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED) && !(pipe_config->base.mode.private_flags & I915_MODE_FLAG_INHERITED); -#define PIPE_CONF_CHECK_X(name) \ +#define PIPE_CONF_CHECK_X(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected 0x%08x, found 0x%08x)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_I(name) \ +#define PIPE_CONF_CHECK_I(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_BOOL(name) \ +#define PIPE_CONF_CHECK_BOOL(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %s, found %s)\n", \ yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) /* * Checks state where we only read out the enabling, but not the entire * state itself (like full infoframes or ELD for audio). These states * require a full modeset on bootup to fix up. */ -#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) \ +#define PIPE_CONF_CHECK_BOOL_INCOMPLETE(name) do { \ if (!fixup_inherited || (!current_config->name && !pipe_config->name)) { \ PIPE_CONF_CHECK_BOOL(name); \ } else { \ @@ -11126,18 +11263,20 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, yesno(current_config->name), \ yesno(pipe_config->name)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_P(name) \ +#define PIPE_CONF_CHECK_P(name) do { \ if (current_config->name != pipe_config->name) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %p, found %p)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_M_N(name) \ +#define PIPE_CONF_CHECK_M_N(name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name,\ adjust)) { \ @@ -11155,14 +11294,15 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) /* This is required for BDW+ where there is only one set of registers for * switching between high and low RR. * This macro can be used whenever a comparison has to be made between one * hw state and multiple sw state variables. */ -#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) \ +#define PIPE_CONF_CHECK_M_N_ALT(name, alt_name) do { \ if (!intel_compare_link_m_n(¤t_config->name, \ &pipe_config->name, adjust) && \ !intel_compare_link_m_n(¤t_config->alt_name, \ @@ -11187,9 +11327,10 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, pipe_config->name.link_m, \ pipe_config->name.link_n); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_FLAGS(name, mask) \ +#define PIPE_CONF_CHECK_FLAGS(name, mask) do { \ if ((current_config->name ^ pipe_config->name) & (mask)) { \ pipe_config_err(adjust, __stringify(name), \ "(%x) (expected %i, found %i)\n", \ @@ -11197,16 +11338,18 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, current_config->name & (mask), \ pipe_config->name & (mask)); \ ret = false; \ - } + } \ +} while (0) -#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) \ +#define PIPE_CONF_CHECK_CLOCK_FUZZY(name) do { \ if (!intel_fuzzy_clock_check(current_config->name, pipe_config->name)) { \ pipe_config_err(adjust, __stringify(name), \ "(expected %i, found %i)\n", \ current_config->name, \ pipe_config->name); \ ret = false; \ - } + } \ +} while (0) #define PIPE_CONF_QUIRK(quirk) \ ((current_config->quirks | pipe_config->quirks) & (quirk)) @@ -11315,6 +11458,16 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv, PIPE_CONF_CHECK_X(dpll_hw_state.pll9); PIPE_CONF_CHECK_X(dpll_hw_state.pll10); PIPE_CONF_CHECK_X(dpll_hw_state.pcsdw12); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_refclkin_ctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_coreclkctl1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_clktop2_hsclkctl); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div0); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_div1); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_lf); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_frac_lock); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_ssc); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_bias); + PIPE_CONF_CHECK_X(dpll_hw_state.mg_pll_tdc_coldst_bias); PIPE_CONF_CHECK_X(dsi_pll.ctrl); PIPE_CONF_CHECK_X(dsi_pll.div); @@ -11378,6 +11531,11 @@ static void verify_wm_state(struct drm_crtc *crtc, skl_ddb_get_hw_state(dev_priv, &hw_ddb); sw_ddb = &dev_priv->wm.skl_hw.ddb; + if (INTEL_GEN(dev_priv) >= 11) + if (hw_ddb.enabled_slices != sw_ddb->enabled_slices) + DRM_ERROR("mismatch in DBUF Slices (expected %u, got %u)\n", + sw_ddb->enabled_slices, + hw_ddb.enabled_slices); /* planes */ for_each_universal_plane(dev_priv, pipe, plane) { hw_plane_wm = &hw_wm.planes[plane]; @@ -11643,11 +11801,11 @@ verify_single_dpll_state(struct drm_i915_private *dev_priv, memset(&dpll_hw_state, 0, sizeof(dpll_hw_state)); - DRM_DEBUG_KMS("%s\n", pll->name); + DRM_DEBUG_KMS("%s\n", pll->info->name); - active = pll->funcs.get_hw_state(dev_priv, pll, &dpll_hw_state); + active = pll->info->funcs->get_hw_state(dev_priv, pll, &dpll_hw_state); - if (!(pll->flags & INTEL_DPLL_ALWAYS_ON)) { + if (!(pll->info->flags & INTEL_DPLL_ALWAYS_ON)) { I915_STATE_WARN(!pll->on && pll->active_mask, "pll in active use but not on in sw tracking\n"); I915_STATE_WARN(pll->on && !pll->active_mask, @@ -12136,20 +12294,23 @@ static void intel_update_crtc(struct drm_crtc *crtc, struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_crtc_state *pipe_config = to_intel_crtc_state(new_crtc_state); bool modeset = needs_modeset(new_crtc_state); + struct intel_plane_state *new_plane_state = + intel_atomic_get_new_plane_state(to_intel_atomic_state(state), + to_intel_plane(crtc->primary)); if (modeset) { update_scanline_offset(intel_crtc); dev_priv->display.crtc_enable(pipe_config, state); + + /* vblanks work again, re-enable pipe CRC. */ + intel_crtc_enable_pipe_crc(intel_crtc); } else { intel_pre_plane_update(to_intel_crtc_state(old_crtc_state), pipe_config); } - if (drm_atomic_get_existing_plane_state(state, crtc->primary)) { - intel_fbc_enable( - intel_crtc, pipe_config, - to_intel_plane_state(crtc->primary->state)); - } + if (new_plane_state) + intel_fbc_enable(intel_crtc, pipe_config, new_plane_state); drm_atomic_helper_commit_planes_on_crtc(old_crtc_state); } @@ -12181,6 +12342,8 @@ static void skl_update_crtcs(struct drm_atomic_state *state) bool progress; enum pipe pipe; int i; + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u8 required_slices = intel_state->wm_results.ddb.enabled_slices; const struct skl_ddb_entry *entries[I915_MAX_PIPES] = {}; @@ -12189,6 +12352,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) if (new_crtc_state->active) entries[i] = &to_intel_crtc_state(old_crtc_state)->wm.skl.ddb; + /* If 2nd DBuf slice required, enable it here */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices > hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); + /* * Whenever the number of active pipes changes, we need to make sure we * update the pipes in the right order so that their ddb allocations @@ -12239,6 +12406,10 @@ static void skl_update_crtcs(struct drm_atomic_state *state) progress = true; } } while (progress); + + /* If 2nd DBuf slice is no more required disable it */ + if (INTEL_GEN(dev_priv) >= 11 && required_slices < hw_enabled_slices) + icl_dbuf_slices_update(dev_priv, required_slices); } static void intel_atomic_helper_free_state(struct drm_i915_private *dev_priv) @@ -12320,6 +12491,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) if (old_crtc_state->active) { intel_crtc_disable_planes(crtc, old_crtc_state->plane_mask); + + /* + * We need to disable pipe CRC before disabling the pipe, + * or we race against vblank off. + */ + intel_crtc_disable_pipe_crc(intel_crtc); + dev_priv->display.crtc_disable(to_intel_crtc_state(old_crtc_state), state); intel_crtc->active = false; intel_fbc_disable(intel_crtc); @@ -12695,6 +12873,15 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state) intel_unpin_fb_vma(vma, old_plane_state->flags); } +static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_DISPLAY, + }; + + i915_gem_object_wait_priority(obj, 0, &attr); +} + /** * intel_prepare_plane_fb - Prepare fb for usage on plane * @plane: drm plane to prepare for @@ -12723,8 +12910,8 @@ intel_prepare_plane_fb(struct drm_plane *plane, if (old_obj) { struct drm_crtc_state *crtc_state = - drm_atomic_get_existing_crtc_state(new_state->state, - plane->state->crtc); + drm_atomic_get_new_crtc_state(new_state->state, + plane->state->crtc); /* Big Hammer, we also need to ensure that any pending * MI_WAIT_FOR_EVENT inside a user batch buffer on the @@ -12771,13 +12958,15 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - i915_gem_object_wait_priority(obj, 0, I915_PRIORITY_DISPLAY); + fb_obj_bump_render_priority(obj); mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; + intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); + if (!new_state->fence) { /* implicit fencing */ struct dma_fence *fence; @@ -12822,11 +13011,13 @@ intel_cleanup_plane_fb(struct drm_plane *plane, } int -skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state) +skl_max_scale(struct intel_crtc *intel_crtc, + struct intel_crtc_state *crtc_state, + uint32_t pixel_format) { struct drm_i915_private *dev_priv; - int max_scale; - int crtc_clock, max_dotclk; + int max_scale, mult; + int crtc_clock, max_dotclk, tmpclk1, tmpclk2; if (!intel_crtc || !crtc_state->base.enable) return DRM_PLANE_HELPER_NO_SCALING; @@ -12848,8 +13039,10 @@ skl_max_scale(struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state * or * cdclk/crtc_clock */ - max_scale = min((1 << 16) * 3 - 1, - (1 << 8) * ((max_dotclk << 8) / crtc_clock)); + mult = pixel_format == DRM_FORMAT_NV12 ? 2 : 3; + tmpclk1 = (1 << 16) * mult - 1; + tmpclk2 = (1 << 8) * ((max_dotclk << 8) / crtc_clock); + max_scale = min(tmpclk1, tmpclk2); return max_scale; } @@ -12865,12 +13058,16 @@ intel_check_primary_plane(struct intel_plane *plane, int max_scale = DRM_PLANE_HELPER_NO_SCALING; bool can_position = false; int ret; + uint32_t pixel_format = 0; if (INTEL_GEN(dev_priv) >= 9) { /* use scaler when colorkey is not required */ if (!state->ckey.flags) { min_scale = 1; - max_scale = skl_max_scale(to_intel_crtc(crtc), crtc_state); + if (state->base.fb) + pixel_format = state->base.fb->format->format; + max_scale = skl_max_scale(to_intel_crtc(crtc), + crtc_state, pixel_format); } can_position = true; } @@ -12943,10 +13140,25 @@ out: intel_cstate); } +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state) +{ + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev); + + if (!IS_GEN2(dev_priv)) + intel_set_cpu_fifo_underrun_reporting(dev_priv, crtc->pipe, true); + + if (crtc_state->has_pch_encoder) { + enum pipe pch_transcoder = + intel_crtc_pch_transcoder(crtc); + + intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); + } +} + static void intel_finish_crtc_commit(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { - struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_crtc *intel_crtc = to_intel_crtc(crtc); struct intel_atomic_state *old_intel_state = to_intel_atomic_state(old_crtc_state->state); @@ -12957,17 +13169,8 @@ static void intel_finish_crtc_commit(struct drm_crtc *crtc, if (new_crtc_state->update_pipe && !needs_modeset(&new_crtc_state->base) && - old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) { - if (!IS_GEN2(dev_priv)) - intel_set_cpu_fifo_underrun_reporting(dev_priv, intel_crtc->pipe, true); - - if (new_crtc_state->has_pch_encoder) { - enum pipe pch_transcoder = - intel_crtc_pch_transcoder(intel_crtc); - - intel_set_pch_fifo_underrun_reporting(dev_priv, pch_transcoder, true); - } - } + old_crtc_state->mode.private_flags & I915_MODE_FLAG_INHERITED) + intel_crtc_arm_fifo_underrun(intel_crtc, new_crtc_state); } /** @@ -13031,6 +13234,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -13165,8 +13369,9 @@ intel_legacy_cursor_update(struct drm_plane *plane, if (ret) goto out_unlock; - old_fb = old_plane_state->fb; + intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_FLIP); + old_fb = old_plane_state->fb; i915_gem_track_fb(intel_fb_obj(old_fb), intel_fb_obj(fb), intel_plane->frontbuffer_bit); @@ -13237,6 +13442,30 @@ static bool skl_plane_has_fbc(struct drm_i915_private *dev_priv, return pipe == PIPE_A && plane_id == PLANE_PRIMARY; } +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id) +{ + if (plane_id == PLANE_PRIMARY) { + if (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + else if ((INTEL_GEN(dev_priv) == 9 && pipe == PIPE_C) && + !IS_GEMINILAKE(dev_priv)) + return false; + } else if (plane_id >= PLANE_SPRITE0) { + if (plane_id == PLANE_CURSOR) + return false; + if (IS_GEMINILAKE(dev_priv) || INTEL_GEN(dev_priv) == 10) { + if (plane_id != PLANE_SPRITE0) + return false; + } else { + if (plane_id != PLANE_SPRITE0 || pipe == PIPE_C || + IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv)) + return false; + } + } + return true; +} + static struct intel_plane * intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) { @@ -13297,8 +13526,13 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe) primary->check_plane = intel_check_primary_plane; if (INTEL_GEN(dev_priv) >= 9) { - intel_primary_formats = skl_primary_formats; - num_formats = ARRAY_SIZE(skl_primary_formats); + if (skl_plane_has_planar(dev_priv, pipe, PLANE_PRIMARY)) { + intel_primary_formats = skl_pri_planar_formats; + num_formats = ARRAY_SIZE(skl_pri_planar_formats); + } else { + intel_primary_formats = skl_primary_formats; + num_formats = ARRAY_SIZE(skl_primary_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_PRIMARY)) modifiers = skl_format_modifiers_ccs; @@ -13553,10 +13787,17 @@ static int intel_crtc_init(struct drm_i915_private *dev_priv, enum pipe pipe) /* initialize shared scalers */ intel_crtc_init_scalers(intel_crtc, crtc_state); - BUG_ON(pipe >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || - dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] != NULL); - dev_priv->plane_to_crtc_mapping[primary->i9xx_plane] = intel_crtc; - dev_priv->pipe_to_crtc_mapping[intel_crtc->pipe] = intel_crtc; + BUG_ON(pipe >= ARRAY_SIZE(dev_priv->pipe_to_crtc_mapping) || + dev_priv->pipe_to_crtc_mapping[pipe] != NULL); + dev_priv->pipe_to_crtc_mapping[pipe] = intel_crtc; + + if (INTEL_GEN(dev_priv) < 9) { + enum i9xx_plane_id i9xx_plane = primary->i9xx_plane; + + BUG_ON(i9xx_plane >= ARRAY_SIZE(dev_priv->plane_to_crtc_mapping) || + dev_priv->plane_to_crtc_mapping[i9xx_plane] != NULL); + dev_priv->plane_to_crtc_mapping[i9xx_plane] = intel_crtc; + } drm_crtc_helper_add(&intel_crtc->base, &intel_helper_funcs); @@ -14112,6 +14353,20 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, goto err; } break; + case DRM_FORMAT_NV12: + if (mode_cmd->modifier[0] == I915_FORMAT_MOD_Y_TILED_CCS || + mode_cmd->modifier[0] == I915_FORMAT_MOD_Yf_TILED_CCS) { + DRM_DEBUG_KMS("RC not to be enabled with NV12\n"); + goto err; + } + if (INTEL_GEN(dev_priv) < 9 || IS_SKYLAKE(dev_priv) || + IS_BROXTON(dev_priv)) { + DRM_DEBUG_KMS("unsupported pixel format: %s\n", + drm_get_format_name(mode_cmd->pixel_format, + &format_name)); + goto err; + } + break; default: DRM_DEBUG_KMS("unsupported pixel format: %s\n", drm_get_format_name(mode_cmd->pixel_format, &format_name)); @@ -14124,6 +14379,14 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb, drm_helper_mode_fill_fb_struct(&dev_priv->drm, fb, mode_cmd); + if (fb->format->format == DRM_FORMAT_NV12 && + (fb->width < SKL_MIN_YUV_420_SRC_W || + fb->height < SKL_MIN_YUV_420_SRC_H || + (fb->width % 4) != 0 || (fb->height % 4) != 0)) { + DRM_DEBUG_KMS("src dimensions not correct for NV12\n"); + return -EINVAL; + } + for (i = 0; i < fb->format->num_planes; i++) { u32 stride_alignment; @@ -15101,8 +15364,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) for (i = 0; i < dev_priv->num_shared_dpll; i++) { struct intel_shared_dpll *pll = &dev_priv->shared_dplls[i]; - pll->on = pll->funcs.get_hw_state(dev_priv, pll, - &pll->state.hw_state); + pll->on = pll->info->funcs->get_hw_state(dev_priv, pll, + &pll->state.hw_state); pll->state.crtc_mask = 0; for_each_intel_crtc(dev, crtc) { struct intel_crtc_state *crtc_state = @@ -15115,7 +15378,7 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) pll->active_mask = pll->state.crtc_mask; DRM_DEBUG_KMS("%s hw state readout: crtc_mask 0x%08x, on %i\n", - pll->name, pll->state.crtc_mask, pll->on); + pll->info->name, pll->state.crtc_mask, pll->on); } for_each_intel_encoder(dev, encoder) { @@ -15178,6 +15441,8 @@ static void intel_modeset_readout_hw_state(struct drm_device *dev) memset(&crtc->base.mode, 0, sizeof(crtc->base.mode)); if (crtc_state->base.active) { intel_mode_from_pipe_config(&crtc->base.mode, crtc_state); + crtc->base.mode.hdisplay = crtc_state->pipe_src_w; + crtc->base.mode.vdisplay = crtc_state->pipe_src_h; intel_mode_from_pipe_config(&crtc_state->base.adjusted_mode, crtc_state); WARN_ON(drm_atomic_set_mode_for_crtc(crtc->base.state, &crtc->base.mode)); @@ -15289,9 +15554,10 @@ intel_modeset_setup_hw_state(struct drm_device *dev, if (!pll->on || pll->active_mask) continue; - DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", pll->name); + DRM_DEBUG_KMS("%s enabled but not in use, disabling\n", + pll->info->name); - pll->funcs.disable(dev_priv, pll); + pll->info->funcs->disable(dev_priv, pll); pll->on = false; } diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h index 4e7418b..2ef3161 100644 --- a/drivers/gpu/drm/i915/intel_display.h +++ b/drivers/gpu/drm/i915/intel_display.h @@ -218,6 +218,10 @@ struct intel_link_m_n { for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \ for_each_if((__mask) & BIT(__p)) +#define for_each_cpu_transcoder_masked(__dev_priv, __t, __mask) \ + for ((__t) = 0; (__t) < I915_MAX_TRANSCODERS; (__t)++) \ + for_each_if ((__mask) & (1 << (__t))) + #define for_each_universal_plane(__dev_priv, __pipe, __p) \ for ((__p) = 0; \ (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1; \ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 9a4a51e..dde92e4 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -43,7 +43,6 @@ #include <drm/i915_drm.h> #include "i915_drv.h" -#define DP_LINK_CHECK_TIMEOUT (10 * 1000) #define DP_DPRX_ESI_LEN 14 /* Compliance test status bits */ @@ -92,8 +91,6 @@ static const struct dp_link_dpll chv_dpll[] = { { .p1 = 4, .p2 = 2, .n = 1, .m1 = 2, .m2 = 0x819999a } }, { 270000, /* m2_int = 27, m2_fraction = 0 */ { .p1 = 4, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } }, - { 540000, /* m2_int = 27, m2_fraction = 0 */ - { .p1 = 2, .p2 = 1, .n = 1, .m1 = 2, .m2 = 0x6c00000 } } }; /** @@ -1650,9 +1647,17 @@ void intel_dp_compute_rate(struct intel_dp *intel_dp, int port_clock, } } +struct link_config_limits { + int min_clock, max_clock; + int min_lane_count, max_lane_count; + int min_bpp, max_bpp; +}; + static int intel_dp_compute_bpp(struct intel_dp *intel_dp, struct intel_crtc_state *pipe_config) { + struct drm_i915_private *dev_priv = to_i915(intel_dp_to_dev(intel_dp)); + struct intel_connector *intel_connector = intel_dp->attached_connector; int bpp, bpc; bpp = pipe_config->pipe_bpp; @@ -1661,13 +1666,16 @@ static int intel_dp_compute_bpp(struct intel_dp *intel_dp, if (bpc > 0) bpp = min(bpp, 3*bpc); - /* For DP Compliance we override the computed bpp for the pipe */ - if (intel_dp->compliance.test_data.bpc != 0) { - pipe_config->pipe_bpp = 3*intel_dp->compliance.test_data.bpc; - pipe_config->dither_force_disable = pipe_config->pipe_bpp == 6*3; - DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", - pipe_config->pipe_bpp); + if (intel_dp_is_edp(intel_dp)) { + /* Get bpp from vbt only for panels that dont have bpp in edid */ + if (intel_connector->base.display_info.bpc == 0 && + dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp) { + DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", + dev_priv->vbt.edp.bpp); + bpp = dev_priv->vbt.edp.bpp; + } } + return bpp; } @@ -1688,6 +1696,142 @@ static bool intel_edp_compare_alt_mode(struct drm_display_mode *m1, return bres; } +/* Adjust link config limits based on compliance test requests. */ +static void +intel_dp_adjust_compliance_config(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + struct link_config_limits *limits) +{ + /* For DP Compliance we override the computed bpp for the pipe */ + if (intel_dp->compliance.test_data.bpc != 0) { + int bpp = 3 * intel_dp->compliance.test_data.bpc; + + limits->min_bpp = limits->max_bpp = bpp; + pipe_config->dither_force_disable = bpp == 6 * 3; + + DRM_DEBUG_KMS("Setting pipe_bpp to %d\n", bpp); + } + + /* Use values requested by Compliance Test Request */ + if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { + int index; + + /* Validate the compliance test data since max values + * might have changed due to link train fallback. + */ + if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, + intel_dp->compliance.test_lane_count)) { + index = intel_dp_rate_index(intel_dp->common_rates, + intel_dp->num_common_rates, + intel_dp->compliance.test_link_rate); + if (index >= 0) + limits->min_clock = limits->max_clock = index; + limits->min_lane_count = limits->max_lane_count = + intel_dp->compliance.test_lane_count; + } + } +} + +/* Optimize link config in order: max bpp, min clock, min lanes */ +static bool +intel_dp_compute_link_config_wide(struct intel_dp *intel_dp, + struct intel_crtc_state *pipe_config, + const struct link_config_limits *limits) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + int bpp, clock, lane_count; + int mode_rate, link_clock, link_avail; + + for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) { + mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, + bpp); + + for (clock = limits->min_clock; clock <= limits->max_clock; clock++) { + for (lane_count = limits->min_lane_count; + lane_count <= limits->max_lane_count; + lane_count <<= 1) { + link_clock = intel_dp->common_rates[clock]; + link_avail = intel_dp_max_data_rate(link_clock, + lane_count); + + if (mode_rate <= link_avail) { + pipe_config->lane_count = lane_count; + pipe_config->pipe_bpp = bpp; + pipe_config->port_clock = link_clock; + + return true; + } + } + } + } + + return false; +} + +static bool +intel_dp_compute_link_config(struct intel_encoder *encoder, + struct intel_crtc_state *pipe_config) +{ + struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode; + struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base); + struct link_config_limits limits; + int common_len; + + common_len = intel_dp_common_len_rate_limit(intel_dp, + intel_dp->max_link_rate); + + /* No common link rates between source and sink */ + WARN_ON(common_len <= 0); + + limits.min_clock = 0; + limits.max_clock = common_len - 1; + + limits.min_lane_count = 1; + limits.max_lane_count = intel_dp_max_lane_count(intel_dp); + + limits.min_bpp = 6 * 3; + limits.max_bpp = intel_dp_compute_bpp(intel_dp, pipe_config); + + if (intel_dp_is_edp(intel_dp)) { + /* + * Use the maximum clock and number of lanes the eDP panel + * advertizes being capable of. The panels are generally + * designed to support only a single clock and lane + * configuration, and typically these values correspond to the + * native resolution of the panel. + */ + limits.min_lane_count = limits.max_lane_count; + limits.min_clock = limits.max_clock; + } + + intel_dp_adjust_compliance_config(intel_dp, pipe_config, &limits); + + DRM_DEBUG_KMS("DP link computation with max lane count %i " + "max rate %d max bpp %d pixel clock %iKHz\n", + limits.max_lane_count, + intel_dp->common_rates[limits.max_clock], + limits.max_bpp, adjusted_mode->crtc_clock); + + /* + * Optimize for slow and wide. This is the place to add alternative + * optimization policy. + */ + if (!intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits)) + return false; + + DRM_DEBUG_KMS("DP lane count %d clock %d bpp %d\n", + pipe_config->lane_count, pipe_config->port_clock, + pipe_config->pipe_bpp); + + DRM_DEBUG_KMS("DP link rate required %i available %i\n", + intel_dp_link_required(adjusted_mode->crtc_clock, + pipe_config->pipe_bpp), + intel_dp_max_data_rate(pipe_config->port_clock, + pipe_config->lane_count)); + + return true; +} + bool intel_dp_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, @@ -1701,27 +1845,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, struct intel_connector *intel_connector = intel_dp->attached_connector; struct intel_digital_connector_state *intel_conn_state = to_intel_digital_connector_state(conn_state); - int lane_count, clock; - int min_lane_count = 1; - int max_lane_count = intel_dp_max_lane_count(intel_dp); - /* Conveniently, the link BW constants become indices with a shift...*/ - int min_clock = 0; - int max_clock; - int bpp, mode_rate; - int link_avail, link_clock; - int common_len; - uint8_t link_bw, rate_select; bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_LIMITED_M_N); - common_len = intel_dp_common_len_rate_limit(intel_dp, - intel_dp->max_link_rate); - - /* No common link rates between source and sink */ - WARN_ON(common_len <= 0); - - max_clock = common_len - 1; - if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv) && port != PORT_A) pipe_config->has_pch_encoder = true; @@ -1747,6 +1873,7 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (INTEL_GEN(dev_priv) >= 9) { int ret; + ret = skl_update_scaler_crtc(pipe_config); if (ret) return ret; @@ -1767,75 +1894,9 @@ intel_dp_compute_config(struct intel_encoder *encoder, if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK) return false; - /* Use values requested by Compliance Test Request */ - if (intel_dp->compliance.test_type == DP_TEST_LINK_TRAINING) { - int index; - - /* Validate the compliance test data since max values - * might have changed due to link train fallback. - */ - if (intel_dp_link_params_valid(intel_dp, intel_dp->compliance.test_link_rate, - intel_dp->compliance.test_lane_count)) { - index = intel_dp_rate_index(intel_dp->common_rates, - intel_dp->num_common_rates, - intel_dp->compliance.test_link_rate); - if (index >= 0) - min_clock = max_clock = index; - min_lane_count = max_lane_count = intel_dp->compliance.test_lane_count; - } - } - DRM_DEBUG_KMS("DP link computation with max lane count %i " - "max bw %d pixel clock %iKHz\n", - max_lane_count, intel_dp->common_rates[max_clock], - adjusted_mode->crtc_clock); - - /* Walk through all bpp values. Luckily they're all nicely spaced with 2 - * bpc in between. */ - bpp = intel_dp_compute_bpp(intel_dp, pipe_config); - if (intel_dp_is_edp(intel_dp)) { - - /* Get bpp from vbt only for panels that dont have bpp in edid */ - if (intel_connector->base.display_info.bpc == 0 && - (dev_priv->vbt.edp.bpp && dev_priv->vbt.edp.bpp < bpp)) { - DRM_DEBUG_KMS("clamping bpp for eDP panel to BIOS-provided %i\n", - dev_priv->vbt.edp.bpp); - bpp = dev_priv->vbt.edp.bpp; - } - - /* - * Use the maximum clock and number of lanes the eDP panel - * advertizes being capable of. The panels are generally - * designed to support only a single clock and lane - * configuration, and typically these values correspond to the - * native resolution of the panel. - */ - min_lane_count = max_lane_count; - min_clock = max_clock; - } - - for (; bpp >= 6*3; bpp -= 2*3) { - mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock, - bpp); - - for (clock = min_clock; clock <= max_clock; clock++) { - for (lane_count = min_lane_count; - lane_count <= max_lane_count; - lane_count <<= 1) { - - link_clock = intel_dp->common_rates[clock]; - link_avail = intel_dp_max_data_rate(link_clock, - lane_count); - - if (mode_rate <= link_avail) { - goto found; - } - } - } - } - - return false; + if (!intel_dp_compute_link_config(encoder, pipe_config)) + return false; -found: if (intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_AUTO) { /* * See: @@ -1843,7 +1904,7 @@ found: * VESA DisplayPort Ver.1.2a - 5.1.1.1 Video Colorimetry */ pipe_config->limited_color_range = - bpp != 18 && + pipe_config->pipe_bpp != 18 && drm_default_rgb_quant_range(adjusted_mode) == HDMI_QUANTIZATION_RANGE_LIMITED; } else { @@ -1851,21 +1912,7 @@ found: intel_conn_state->broadcast_rgb == INTEL_BROADCAST_RGB_LIMITED; } - pipe_config->lane_count = lane_count; - - pipe_config->pipe_bpp = bpp; - pipe_config->port_clock = intel_dp->common_rates[clock]; - - intel_dp_compute_rate(intel_dp, pipe_config->port_clock, - &link_bw, &rate_select); - - DRM_DEBUG_KMS("DP link bw %02x rate select %02x lane count %d clock %d bpp %d\n", - link_bw, rate_select, pipe_config->lane_count, - pipe_config->port_clock, bpp); - DRM_DEBUG_KMS("DP link bw required %i available %i\n", - mode_rate, link_avail); - - intel_link_compute_m_n(bpp, lane_count, + intel_link_compute_m_n(pipe_config->pipe_bpp, pipe_config->lane_count, adjusted_mode->crtc_clock, pipe_config->port_clock, &pipe_config->dp_m_n, @@ -1874,31 +1921,12 @@ found: if (intel_connector->panel.downclock_mode != NULL && dev_priv->drrs.type == SEAMLESS_DRRS_SUPPORT) { pipe_config->has_drrs = true; - intel_link_compute_m_n(bpp, lane_count, - intel_connector->panel.downclock_mode->clock, - pipe_config->port_clock, - &pipe_config->dp_m2_n2, - reduce_m_n); - } - - /* - * DPLL0 VCO may need to be adjusted to get the correct - * clock for eDP. This will affect cdclk as well. - */ - if (intel_dp_is_edp(intel_dp) && IS_GEN9_BC(dev_priv)) { - int vco; - - switch (pipe_config->port_clock / 2) { - case 108000: - case 216000: - vco = 8640000; - break; - default: - vco = 8100000; - break; - } - - to_intel_atomic_state(pipe_config->base.state)->cdclk.logical.vco = vco; + intel_link_compute_m_n(pipe_config->pipe_bpp, + pipe_config->lane_count, + intel_connector->panel.downclock_mode->clock, + pipe_config->port_clock, + &pipe_config->dp_m2_n2, + reduce_m_n); } if (!HAS_DDI(dev_priv)) @@ -2901,10 +2929,7 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, } } else { - if (IS_CHERRYVIEW(dev_priv)) - *DP &= ~DP_LINK_TRAIN_MASK_CHV; - else - *DP &= ~DP_LINK_TRAIN_MASK; + *DP &= ~DP_LINK_TRAIN_MASK; switch (dp_train_pat & DP_TRAINING_PATTERN_MASK) { case DP_TRAINING_PATTERN_DISABLE: @@ -2917,12 +2942,8 @@ _intel_dp_set_link_train(struct intel_dp *intel_dp, *DP |= DP_LINK_TRAIN_PAT_2; break; case DP_TRAINING_PATTERN_3: - if (IS_CHERRYVIEW(dev_priv)) { - *DP |= DP_LINK_TRAIN_PAT_3_CHV; - } else { - DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); - *DP |= DP_LINK_TRAIN_PAT_2; - } + DRM_DEBUG_KMS("TPS3 not supported, using TPS2 instead\n"); + *DP |= DP_LINK_TRAIN_PAT_2; break; } } @@ -3661,10 +3682,7 @@ intel_dp_link_down(struct intel_encoder *encoder, DP &= ~DP_LINK_TRAIN_MASK_CPT; DP |= DP_LINK_TRAIN_PAT_IDLE_CPT; } else { - if (IS_CHERRYVIEW(dev_priv)) - DP &= ~DP_LINK_TRAIN_MASK_CHV; - else - DP &= ~DP_LINK_TRAIN_MASK; + DP &= ~DP_LINK_TRAIN_MASK; DP |= DP_LINK_TRAIN_PAT_IDLE; } I915_WRITE(intel_dp->output_reg, DP); diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c index f59b59b..3fcaa98 100644 --- a/drivers/gpu/drm/i915/intel_dp_link_training.c +++ b/drivers/gpu/drm/i915/intel_dp_link_training.c @@ -139,6 +139,11 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp) intel_dp_compute_rate(intel_dp, intel_dp->link_rate, &link_bw, &rate_select); + if (link_bw) + DRM_DEBUG_KMS("Using LINK_BW_SET value %02x\n", link_bw); + else + DRM_DEBUG_KMS("Using LINK_RATE_SET value %02x\n", rate_select); + /* Write the link configuration data */ link_config[0] = link_bw; link_config[1] = intel_dp->lane_count; diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c index c3de091..9e6956c 100644 --- a/drivers/gpu/drm/i915/intel_dp_mst.c +++ b/drivers/gpu/drm/i915/intel_dp_mst.c @@ -180,9 +180,11 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder, intel_dp->active_mst_links--; intel_mst->connector = NULL; - if (intel_dp->active_mst_links == 0) + if (intel_dp->active_mst_links == 0) { + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF); intel_dig_port->base.post_disable(&intel_dig_port->base, old_crtc_state, NULL); + } DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); } @@ -223,7 +225,11 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder, DRM_DEBUG_KMS("active links %d\n", intel_dp->active_mst_links); + if (intel_dp->active_mst_links == 0) + intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON); + drm_dp_send_power_updown_phy(&intel_dp->mst_mgr, connector->port, true); + if (intel_dp->active_mst_links == 0) intel_dig_port->base.pre_enable(&intel_dig_port->base, pipe_config, NULL); diff --git a/drivers/gpu/drm/i915/intel_dpio_phy.c b/drivers/gpu/drm/i915/intel_dpio_phy.c index c8e9e44..00b3ab6 100644 --- a/drivers/gpu/drm/i915/intel_dpio_phy.c +++ b/drivers/gpu/drm/i915/intel_dpio_phy.c @@ -380,13 +380,14 @@ static void _bxt_ddi_phy_init(struct drm_i915_private *dev_priv, * all 1s. Eventually they become accessible as they power up, then * the reserved bit will give the default 0. Poll on the reserved bit * becoming 0 to find when the PHY is accessible. - * HW team confirmed that the time to reach phypowergood status is - * anywhere between 50 us and 100us. + * The flag should get set in 100us according to the HW team, but + * use 1ms due to occasional timeouts observed with that. */ - if (wait_for_us(((I915_READ(BXT_PORT_CL1CM_DW0(phy)) & - (PHY_RESERVED | PHY_POWER_GOOD)) == PHY_POWER_GOOD), 100)) { + if (intel_wait_for_register_fw(dev_priv, BXT_PORT_CL1CM_DW0(phy), + PHY_RESERVED | PHY_POWER_GOOD, + PHY_POWER_GOOD, + 1)) DRM_ERROR("timeout during PHY%d power on\n", phy); - } /* Program PLL Rcomp code offset */ val = I915_READ(BXT_PORT_CL1CM_DW9(phy)); diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c index 51c5ae4..383fbc1 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c @@ -118,10 +118,10 @@ void assert_shared_dpll(struct drm_i915_private *dev_priv, if (WARN(!pll, "asserting DPLL %s with no DPLL\n", onoff(state))) return; - cur_state = pll->funcs.get_hw_state(dev_priv, pll, &hw_state); + cur_state = pll->info->funcs->get_hw_state(dev_priv, pll, &hw_state); I915_STATE_WARN(cur_state != state, "%s assertion failure (expected %s, current %s)\n", - pll->name, onoff(state), onoff(cur_state)); + pll->info->name, onoff(state), onoff(cur_state)); } /** @@ -143,11 +143,11 @@ void intel_prepare_shared_dpll(struct intel_crtc *crtc) mutex_lock(&dev_priv->dpll_lock); WARN_ON(!pll->state.crtc_mask); if (!pll->active_mask) { - DRM_DEBUG_DRIVER("setting up %s\n", pll->name); + DRM_DEBUG_DRIVER("setting up %s\n", pll->info->name); WARN_ON(pll->on); assert_shared_dpll_disabled(dev_priv, pll); - pll->funcs.prepare(dev_priv, pll); + pll->info->funcs->prepare(dev_priv, pll); } mutex_unlock(&dev_priv->dpll_lock); } @@ -179,7 +179,7 @@ void intel_enable_shared_dpll(struct intel_crtc *crtc) pll->active_mask |= crtc_mask; DRM_DEBUG_KMS("enable %s (active %x, on? %d) for crtc %d\n", - pll->name, pll->active_mask, pll->on, + pll->info->name, pll->active_mask, pll->on, crtc->base.base.id); if (old_mask) { @@ -189,8 +189,8 @@ void intel_enable_shared_dpll(struct intel_crtc *crtc) } WARN_ON(pll->on); - DRM_DEBUG_KMS("enabling %s\n", pll->name); - pll->funcs.enable(dev_priv, pll); + DRM_DEBUG_KMS("enabling %s\n", pll->info->name); + pll->info->funcs->enable(dev_priv, pll); pll->on = true; out: @@ -221,7 +221,7 @@ void intel_disable_shared_dpll(struct intel_crtc *crtc) goto out; DRM_DEBUG_KMS("disable %s (active %x, on? %d) for crtc %d\n", - pll->name, pll->active_mask, pll->on, + pll->info->name, pll->active_mask, pll->on, crtc->base.base.id); assert_shared_dpll_enabled(dev_priv, pll); @@ -231,8 +231,8 @@ void intel_disable_shared_dpll(struct intel_crtc *crtc) if (pll->active_mask) goto out; - DRM_DEBUG_KMS("disabling %s\n", pll->name); - pll->funcs.disable(dev_priv, pll); + DRM_DEBUG_KMS("disabling %s\n", pll->info->name); + pll->info->funcs->disable(dev_priv, pll); pll->on = false; out: @@ -263,7 +263,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc, &shared_dpll[i].hw_state, sizeof(crtc_state->dpll_hw_state)) == 0) { DRM_DEBUG_KMS("[CRTC:%d:%s] sharing existing %s (crtc mask 0x%08x, active %x)\n", - crtc->base.base.id, crtc->base.name, pll->name, + crtc->base.base.id, crtc->base.name, + pll->info->name, shared_dpll[i].crtc_mask, pll->active_mask); return pll; @@ -275,7 +276,8 @@ intel_find_shared_dpll(struct intel_crtc *crtc, pll = &dev_priv->shared_dplls[i]; if (shared_dpll[i].crtc_mask == 0) { DRM_DEBUG_KMS("[CRTC:%d:%s] allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, + pll->info->name); return pll; } } @@ -289,19 +291,19 @@ intel_reference_shared_dpll(struct intel_shared_dpll *pll, { struct intel_shared_dpll_state *shared_dpll; struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); - enum intel_dpll_id i = pll->id; + const enum intel_dpll_id id = pll->info->id; shared_dpll = intel_atomic_get_shared_dpll_state(crtc_state->base.state); - if (shared_dpll[i].crtc_mask == 0) - shared_dpll[i].hw_state = + if (shared_dpll[id].crtc_mask == 0) + shared_dpll[id].hw_state = crtc_state->dpll_hw_state; crtc_state->shared_dpll = pll; - DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->name, + DRM_DEBUG_DRIVER("using %s for pipe %c\n", pll->info->name, pipe_name(crtc->pipe)); - shared_dpll[pll->id].crtc_mask |= 1 << crtc->pipe; + shared_dpll[id].crtc_mask |= 1 << crtc->pipe; } /** @@ -341,15 +343,16 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) return false; - val = I915_READ(PCH_DPLL(pll->id)); + val = I915_READ(PCH_DPLL(id)); hw_state->dpll = val; - hw_state->fp0 = I915_READ(PCH_FP0(pll->id)); - hw_state->fp1 = I915_READ(PCH_FP1(pll->id)); + hw_state->fp0 = I915_READ(PCH_FP0(id)); + hw_state->fp1 = I915_READ(PCH_FP1(id)); intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); @@ -359,8 +362,10 @@ static bool ibx_pch_dpll_get_hw_state(struct drm_i915_private *dev_priv, static void ibx_pch_dpll_prepare(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - I915_WRITE(PCH_FP0(pll->id), pll->state.hw_state.fp0); - I915_WRITE(PCH_FP1(pll->id), pll->state.hw_state.fp1); + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(PCH_FP0(id), pll->state.hw_state.fp0); + I915_WRITE(PCH_FP1(id), pll->state.hw_state.fp1); } static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) @@ -379,13 +384,15 @@ static void ibx_assert_pch_refclk_enabled(struct drm_i915_private *dev_priv) static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; + /* PCH refclock must be enabled first */ ibx_assert_pch_refclk_enabled(dev_priv); - I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll); + I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll); /* Wait for the clocks to stabilize. */ - POSTING_READ(PCH_DPLL(pll->id)); + POSTING_READ(PCH_DPLL(id)); udelay(150); /* The pixel multiplier can only be updated once the @@ -393,14 +400,15 @@ static void ibx_pch_dpll_enable(struct drm_i915_private *dev_priv, * * So write it again. */ - I915_WRITE(PCH_DPLL(pll->id), pll->state.hw_state.dpll); - POSTING_READ(PCH_DPLL(pll->id)); + I915_WRITE(PCH_DPLL(id), pll->state.hw_state.dpll); + POSTING_READ(PCH_DPLL(id)); udelay(200); } static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; struct drm_device *dev = &dev_priv->drm; struct intel_crtc *crtc; @@ -410,8 +418,8 @@ static void ibx_pch_dpll_disable(struct drm_i915_private *dev_priv, assert_pch_transcoder_disabled(dev_priv, crtc->pipe); } - I915_WRITE(PCH_DPLL(pll->id), 0); - POSTING_READ(PCH_DPLL(pll->id)); + I915_WRITE(PCH_DPLL(id), 0); + POSTING_READ(PCH_DPLL(id)); udelay(200); } @@ -429,7 +437,8 @@ ibx_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, pll = &dev_priv->shared_dplls[i]; DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, + pll->info->name); } else { pll = intel_find_shared_dpll(crtc, crtc_state, DPLL_ID_PCH_PLL_A, @@ -466,8 +475,10 @@ static const struct intel_shared_dpll_funcs ibx_pch_dpll_funcs = { static void hsw_ddi_wrpll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - I915_WRITE(WRPLL_CTL(pll->id), pll->state.hw_state.wrpll); - POSTING_READ(WRPLL_CTL(pll->id)); + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(WRPLL_CTL(id), pll->state.hw_state.wrpll); + POSTING_READ(WRPLL_CTL(id)); udelay(20); } @@ -482,11 +493,12 @@ static void hsw_ddi_spll_enable(struct drm_i915_private *dev_priv, static void hsw_ddi_wrpll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; - val = I915_READ(WRPLL_CTL(pll->id)); - I915_WRITE(WRPLL_CTL(pll->id), val & ~WRPLL_PLL_ENABLE); - POSTING_READ(WRPLL_CTL(pll->id)); + val = I915_READ(WRPLL_CTL(id)); + I915_WRITE(WRPLL_CTL(id), val & ~WRPLL_PLL_ENABLE); + POSTING_READ(WRPLL_CTL(id)); } static void hsw_ddi_spll_disable(struct drm_i915_private *dev_priv, @@ -503,12 +515,13 @@ static bool hsw_ddi_wrpll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) return false; - val = I915_READ(WRPLL_CTL(pll->id)); + val = I915_READ(WRPLL_CTL(id)); hw_state->wrpll = val; intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); @@ -914,13 +927,15 @@ static const struct skl_dpll_regs skl_dpll_regs[4] = { static void skl_ddi_pll_write_ctrl1(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; val = I915_READ(DPLL_CTRL1); - val &= ~(DPLL_CTRL1_HDMI_MODE(pll->id) | DPLL_CTRL1_SSC(pll->id) | - DPLL_CTRL1_LINK_RATE_MASK(pll->id)); - val |= pll->state.hw_state.ctrl1 << (pll->id * 6); + val &= ~(DPLL_CTRL1_HDMI_MODE(id) | + DPLL_CTRL1_SSC(id) | + DPLL_CTRL1_LINK_RATE_MASK(id)); + val |= pll->state.hw_state.ctrl1 << (id * 6); I915_WRITE(DPLL_CTRL1, val); POSTING_READ(DPLL_CTRL1); @@ -930,24 +945,25 @@ static void skl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; skl_ddi_pll_write_ctrl1(dev_priv, pll); - I915_WRITE(regs[pll->id].cfgcr1, pll->state.hw_state.cfgcr1); - I915_WRITE(regs[pll->id].cfgcr2, pll->state.hw_state.cfgcr2); - POSTING_READ(regs[pll->id].cfgcr1); - POSTING_READ(regs[pll->id].cfgcr2); + I915_WRITE(regs[id].cfgcr1, pll->state.hw_state.cfgcr1); + I915_WRITE(regs[id].cfgcr2, pll->state.hw_state.cfgcr2); + POSTING_READ(regs[id].cfgcr1); + POSTING_READ(regs[id].cfgcr2); /* the enable bit is always bit 31 */ - I915_WRITE(regs[pll->id].ctl, - I915_READ(regs[pll->id].ctl) | LCPLL_PLL_ENABLE); + I915_WRITE(regs[id].ctl, + I915_READ(regs[id].ctl) | LCPLL_PLL_ENABLE); if (intel_wait_for_register(dev_priv, DPLL_STATUS, - DPLL_LOCK(pll->id), - DPLL_LOCK(pll->id), + DPLL_LOCK(id), + DPLL_LOCK(id), 5)) - DRM_ERROR("DPLL %d not locked\n", pll->id); + DRM_ERROR("DPLL %d not locked\n", id); } static void skl_ddi_dpll0_enable(struct drm_i915_private *dev_priv, @@ -960,11 +976,12 @@ static void skl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; /* the enable bit is always bit 31 */ - I915_WRITE(regs[pll->id].ctl, - I915_READ(regs[pll->id].ctl) & ~LCPLL_PLL_ENABLE); - POSTING_READ(regs[pll->id].ctl); + I915_WRITE(regs[id].ctl, + I915_READ(regs[id].ctl) & ~LCPLL_PLL_ENABLE); + POSTING_READ(regs[id].ctl); } static void skl_ddi_dpll0_disable(struct drm_i915_private *dev_priv, @@ -978,6 +995,7 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, { uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) @@ -985,17 +1003,17 @@ static bool skl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = false; - val = I915_READ(regs[pll->id].ctl); + val = I915_READ(regs[id].ctl); if (!(val & LCPLL_PLL_ENABLE)) goto out; val = I915_READ(DPLL_CTRL1); - hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f; + hw_state->ctrl1 = (val >> (id * 6)) & 0x3f; /* avoid reading back stale values if HDMI mode is not enabled */ - if (val & DPLL_CTRL1_HDMI_MODE(pll->id)) { - hw_state->cfgcr1 = I915_READ(regs[pll->id].cfgcr1); - hw_state->cfgcr2 = I915_READ(regs[pll->id].cfgcr2); + if (val & DPLL_CTRL1_HDMI_MODE(id)) { + hw_state->cfgcr1 = I915_READ(regs[id].cfgcr1); + hw_state->cfgcr2 = I915_READ(regs[id].cfgcr2); } ret = true; @@ -1011,6 +1029,7 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, { uint32_t val; const struct skl_dpll_regs *regs = skl_dpll_regs; + const enum intel_dpll_id id = pll->info->id; bool ret; if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) @@ -1019,12 +1038,12 @@ static bool skl_ddi_dpll0_get_hw_state(struct drm_i915_private *dev_priv, ret = false; /* DPLL0 is always enabled since it drives CDCLK */ - val = I915_READ(regs[pll->id].ctl); + val = I915_READ(regs[id].ctl); if (WARN_ON(!(val & LCPLL_PLL_ENABLE))) goto out; val = I915_READ(DPLL_CTRL1); - hw_state->ctrl1 = (val >> (pll->id * 6)) & 0x3f; + hw_state->ctrl1 = (val >> (id * 6)) & 0x3f; ret = true; @@ -1424,7 +1443,7 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { uint32_t temp; - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ enum dpio_phy phy; enum dpio_channel ch; @@ -1543,7 +1562,7 @@ static void bxt_ddi_pll_enable(struct drm_i915_private *dev_priv, static void bxt_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ uint32_t temp; temp = I915_READ(BXT_PORT_PLL_ENABLE(port)); @@ -1566,7 +1585,7 @@ static bool bxt_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { - enum port port = (enum port)pll->id; /* 1:1 port->PLL mapping */ + enum port port = (enum port)pll->info->id; /* 1:1 port->PLL mapping */ uint32_t val; bool ret; enum dpio_phy phy; @@ -1824,7 +1843,7 @@ bxt_get_dpll(struct intel_crtc *crtc, pll = intel_get_shared_dpll_by_id(dev_priv, i); DRM_DEBUG_KMS("[CRTC:%d:%s] using pre-allocated %s\n", - crtc->base.base.id, crtc->base.name, pll->name); + crtc->base.base.id, crtc->base.name, pll->info->name); intel_reference_shared_dpll(pll, crtc_state); @@ -1877,13 +1896,6 @@ static void intel_ddi_pll_init(struct drm_device *dev) } } -struct dpll_info { - const char *name; - const int id; - const struct intel_shared_dpll_funcs *funcs; - uint32_t flags; -}; - struct intel_dpll_mgr { const struct dpll_info *dpll_info; @@ -1896,9 +1908,9 @@ struct intel_dpll_mgr { }; static const struct dpll_info pch_plls[] = { - { "PCH DPLL A", DPLL_ID_PCH_PLL_A, &ibx_pch_dpll_funcs, 0 }, - { "PCH DPLL B", DPLL_ID_PCH_PLL_B, &ibx_pch_dpll_funcs, 0 }, - { NULL, -1, NULL, 0 }, + { "PCH DPLL A", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_A, 0 }, + { "PCH DPLL B", &ibx_pch_dpll_funcs, DPLL_ID_PCH_PLL_B, 0 }, + { }, }; static const struct intel_dpll_mgr pch_pll_mgr = { @@ -1908,13 +1920,13 @@ static const struct intel_dpll_mgr pch_pll_mgr = { }; static const struct dpll_info hsw_plls[] = { - { "WRPLL 1", DPLL_ID_WRPLL1, &hsw_ddi_wrpll_funcs, 0 }, - { "WRPLL 2", DPLL_ID_WRPLL2, &hsw_ddi_wrpll_funcs, 0 }, - { "SPLL", DPLL_ID_SPLL, &hsw_ddi_spll_funcs, 0 }, - { "LCPLL 810", DPLL_ID_LCPLL_810, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 1350", DPLL_ID_LCPLL_1350, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { "LCPLL 2700", DPLL_ID_LCPLL_2700, &hsw_ddi_lcpll_funcs, INTEL_DPLL_ALWAYS_ON }, - { NULL, -1, NULL, }, + { "WRPLL 1", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL1, 0 }, + { "WRPLL 2", &hsw_ddi_wrpll_funcs, DPLL_ID_WRPLL2, 0 }, + { "SPLL", &hsw_ddi_spll_funcs, DPLL_ID_SPLL, 0 }, + { "LCPLL 810", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_810, INTEL_DPLL_ALWAYS_ON }, + { "LCPLL 1350", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_1350, INTEL_DPLL_ALWAYS_ON }, + { "LCPLL 2700", &hsw_ddi_lcpll_funcs, DPLL_ID_LCPLL_2700, INTEL_DPLL_ALWAYS_ON }, + { }, }; static const struct intel_dpll_mgr hsw_pll_mgr = { @@ -1924,11 +1936,11 @@ static const struct intel_dpll_mgr hsw_pll_mgr = { }; static const struct dpll_info skl_plls[] = { - { "DPLL 0", DPLL_ID_SKL_DPLL0, &skl_ddi_dpll0_funcs, INTEL_DPLL_ALWAYS_ON }, - { "DPLL 1", DPLL_ID_SKL_DPLL1, &skl_ddi_pll_funcs, 0 }, - { "DPLL 2", DPLL_ID_SKL_DPLL2, &skl_ddi_pll_funcs, 0 }, - { "DPLL 3", DPLL_ID_SKL_DPLL3, &skl_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "DPLL 0", &skl_ddi_dpll0_funcs, DPLL_ID_SKL_DPLL0, INTEL_DPLL_ALWAYS_ON }, + { "DPLL 1", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "DPLL 2", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { "DPLL 3", &skl_ddi_pll_funcs, DPLL_ID_SKL_DPLL3, 0 }, + { }, }; static const struct intel_dpll_mgr skl_pll_mgr = { @@ -1938,10 +1950,10 @@ static const struct intel_dpll_mgr skl_pll_mgr = { }; static const struct dpll_info bxt_plls[] = { - { "PORT PLL A", DPLL_ID_SKL_DPLL0, &bxt_ddi_pll_funcs, 0 }, - { "PORT PLL B", DPLL_ID_SKL_DPLL1, &bxt_ddi_pll_funcs, 0 }, - { "PORT PLL C", DPLL_ID_SKL_DPLL2, &bxt_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "PORT PLL A", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, + { "PORT PLL B", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "PORT PLL C", &bxt_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { }, }; static const struct intel_dpll_mgr bxt_pll_mgr = { @@ -1953,38 +1965,39 @@ static const struct intel_dpll_mgr bxt_pll_mgr = { static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; /* 1. Enable DPLL power in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val |= PLL_POWER_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 2. Wait for DPLL power state enabled in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_POWER_STATE, PLL_POWER_STATE, 5)) - DRM_ERROR("PLL %d Power not enabled\n", pll->id); + DRM_ERROR("PLL %d Power not enabled\n", id); /* * 3. Configure DPLL_CFGCR0 to set SSC enable/disable, * select DP mode, and set DP link rate. */ val = pll->state.hw_state.cfgcr0; - I915_WRITE(CNL_DPLL_CFGCR0(pll->id), val); + I915_WRITE(CNL_DPLL_CFGCR0(id), val); /* 4. Reab back to ensure writes completed */ - POSTING_READ(CNL_DPLL_CFGCR0(pll->id)); + POSTING_READ(CNL_DPLL_CFGCR0(id)); /* 3. Configure DPLL_CFGCR0 */ /* Avoid touch CFGCR1 if HDMI mode is not enabled */ if (pll->state.hw_state.cfgcr0 & DPLL_CFGCR0_HDMI_MODE) { val = pll->state.hw_state.cfgcr1; - I915_WRITE(CNL_DPLL_CFGCR1(pll->id), val); + I915_WRITE(CNL_DPLL_CFGCR1(id), val); /* 4. Reab back to ensure writes completed */ - POSTING_READ(CNL_DPLL_CFGCR1(pll->id)); + POSTING_READ(CNL_DPLL_CFGCR1(id)); } /* @@ -1997,17 +2010,17 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, */ /* 6. Enable DPLL in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val |= PLL_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for PLL lock status in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_LOCK, PLL_LOCK, 5)) - DRM_ERROR("PLL %d not locked\n", pll->id); + DRM_ERROR("PLL %d not locked\n", id); /* * 8. If the frequency will result in a change to the voltage @@ -2027,6 +2040,7 @@ static void cnl_ddi_pll_enable(struct drm_i915_private *dev_priv, static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; /* @@ -2044,17 +2058,17 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, */ /* 3. Disable DPLL through DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val &= ~PLL_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 4. Wait for PLL not locked status in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_LOCK, 0, 5)) - DRM_ERROR("PLL %d locked\n", pll->id); + DRM_ERROR("PLL %d locked\n", id); /* * 5. If the frequency will result in a change to the voltage @@ -2066,23 +2080,24 @@ static void cnl_ddi_pll_disable(struct drm_i915_private *dev_priv, */ /* 6. Disable DPLL power in DPLL_ENABLE. */ - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); val &= ~PLL_POWER_ENABLE; - I915_WRITE(CNL_DPLL_ENABLE(pll->id), val); + I915_WRITE(CNL_DPLL_ENABLE(id), val); /* 7. Wait for DPLL power state disabled in DPLL_ENABLE. */ if (intel_wait_for_register(dev_priv, - CNL_DPLL_ENABLE(pll->id), + CNL_DPLL_ENABLE(id), PLL_POWER_STATE, 0, 5)) - DRM_ERROR("PLL %d Power not disabled\n", pll->id); + DRM_ERROR("PLL %d Power not disabled\n", id); } static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, struct intel_shared_dpll *pll, struct intel_dpll_hw_state *hw_state) { + const enum intel_dpll_id id = pll->info->id; uint32_t val; bool ret; @@ -2091,16 +2106,16 @@ static bool cnl_ddi_pll_get_hw_state(struct drm_i915_private *dev_priv, ret = false; - val = I915_READ(CNL_DPLL_ENABLE(pll->id)); + val = I915_READ(CNL_DPLL_ENABLE(id)); if (!(val & PLL_ENABLE)) goto out; - val = I915_READ(CNL_DPLL_CFGCR0(pll->id)); + val = I915_READ(CNL_DPLL_CFGCR0(id)); hw_state->cfgcr0 = val; /* avoid reading back stale values if HDMI mode is not enabled */ if (val & DPLL_CFGCR0_HDMI_MODE) { - hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(pll->id)); + hw_state->cfgcr1 = I915_READ(CNL_DPLL_CFGCR1(id)); } ret = true; @@ -2203,6 +2218,7 @@ cnl_ddi_calculate_wrpll(int clock, struct skl_wrpll_params *wrpll_params) { u32 afe_clock = clock * 5; + uint32_t ref_clock; u32 dco_min = 7998000; u32 dco_max = 10000000; u32 dco_mid = (dco_min + dco_max) / 2; @@ -2235,8 +2251,17 @@ cnl_ddi_calculate_wrpll(int clock, cnl_wrpll_get_multipliers(best_div, &pdiv, &qdiv, &kdiv); - cnl_wrpll_params_populate(wrpll_params, best_dco, - dev_priv->cdclk.hw.ref, pdiv, qdiv, kdiv); + ref_clock = dev_priv->cdclk.hw.ref; + + /* + * For ICL, the spec states: if reference frequency is 38.4, use 19.2 + * because the DPLL automatically divides that by 2. + */ + if (IS_ICELAKE(dev_priv) && ref_clock == 38400) + ref_clock = 19200; + + cnl_wrpll_params_populate(wrpll_params, best_dco, ref_clock, pdiv, qdiv, + kdiv); return true; } @@ -2372,10 +2397,10 @@ static const struct intel_shared_dpll_funcs cnl_ddi_pll_funcs = { }; static const struct dpll_info cnl_plls[] = { - { "DPLL 0", DPLL_ID_SKL_DPLL0, &cnl_ddi_pll_funcs, 0 }, - { "DPLL 1", DPLL_ID_SKL_DPLL1, &cnl_ddi_pll_funcs, 0 }, - { "DPLL 2", DPLL_ID_SKL_DPLL2, &cnl_ddi_pll_funcs, 0 }, - { NULL, -1, NULL, }, + { "DPLL 0", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL0, 0 }, + { "DPLL 1", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL1, 0 }, + { "DPLL 2", &cnl_ddi_pll_funcs, DPLL_ID_SKL_DPLL2, 0 }, + { }, }; static const struct intel_dpll_mgr cnl_pll_mgr = { @@ -2384,6 +2409,644 @@ static const struct intel_dpll_mgr cnl_pll_mgr = { .dump_hw_state = cnl_dump_hw_state, }; +/* + * These values alrea already adjusted: they're the bits we write to the + * registers, not the logical values. + */ +static const struct skl_wrpll_params icl_dp_combo_pll_24MHz_values[] = { + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x168, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x195, .dco_fraction = 0x0000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x151, .dco_fraction = 0x4000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +/* Also used for 38.4 MHz values. */ +static const struct skl_wrpll_params icl_dp_combo_pll_19_2MHz_values[] = { + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [0]: 5.4 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [1]: 2.7 */ + .pdiv = 0x2 /* 3 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [2]: 1.62 */ + .pdiv = 0x4 /* 5 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [3]: 3.24 */ + .pdiv = 0x4 /* 5 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [4]: 2.16 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 1, .qdiv_ratio = 2}, + { .dco_integer = 0x1C2, .dco_fraction = 0x0000, /* [5]: 4.32 */ + .pdiv = 0x1 /* 2 */, .kdiv = 2, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1FA, .dco_fraction = 0x2000, /* [6]: 6.48 */ + .pdiv = 0x2 /* 3 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, + { .dco_integer = 0x1A5, .dco_fraction = 0x7000, /* [7]: 8.1 */ + .pdiv = 0x1 /* 2 */, .kdiv = 1, .qdiv_mode = 0, .qdiv_ratio = 0}, +}; + +static bool icl_calc_dp_combo_pll(struct drm_i915_private *dev_priv, int clock, + struct skl_wrpll_params *pll_params) +{ + const struct skl_wrpll_params *params; + + params = dev_priv->cdclk.hw.ref == 24000 ? + icl_dp_combo_pll_24MHz_values : + icl_dp_combo_pll_19_2MHz_values; + + switch (clock) { + case 540000: + *pll_params = params[0]; + break; + case 270000: + *pll_params = params[1]; + break; + case 162000: + *pll_params = params[2]; + break; + case 324000: + *pll_params = params[3]; + break; + case 216000: + *pll_params = params[4]; + break; + case 432000: + *pll_params = params[5]; + break; + case 648000: + *pll_params = params[6]; + break; + case 810000: + *pll_params = params[7]; + break; + default: + MISSING_CASE(clock); + return false; + } + + return true; +} + +static bool icl_calc_dpll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + uint32_t cfgcr0, cfgcr1; + struct skl_wrpll_params pll_params = { 0 }; + bool ret; + + if (intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI)) + ret = cnl_ddi_calculate_wrpll(clock, dev_priv, &pll_params); + else + ret = icl_calc_dp_combo_pll(dev_priv, clock, &pll_params); + + if (!ret) + return false; + + cfgcr0 = DPLL_CFGCR0_DCO_FRACTION(pll_params.dco_fraction) | + pll_params.dco_integer; + + cfgcr1 = DPLL_CFGCR1_QDIV_RATIO(pll_params.qdiv_ratio) | + DPLL_CFGCR1_QDIV_MODE(pll_params.qdiv_mode) | + DPLL_CFGCR1_KDIV(pll_params.kdiv) | + DPLL_CFGCR1_PDIV(pll_params.pdiv) | + DPLL_CFGCR1_CENTRAL_FREQ_8400; + + pll_state->cfgcr0 = cfgcr0; + pll_state->cfgcr1 = cfgcr1; + return true; +} + +static enum port icl_mg_pll_id_to_port(enum intel_dpll_id id) +{ + return id - DPLL_ID_ICL_MGPLL1 + PORT_C; +} + +static enum intel_dpll_id icl_port_to_mg_pll_id(enum port port) +{ + return port - PORT_C + DPLL_ID_ICL_MGPLL1; +} + +static bool icl_mg_pll_find_divisors(int clock_khz, bool is_dp, bool use_ssc, + uint32_t *target_dco_khz, + struct intel_dpll_hw_state *state) +{ + uint32_t dco_min_freq, dco_max_freq; + int div1_vals[] = {7, 5, 3, 2}; + unsigned int i; + int div2; + + dco_min_freq = is_dp ? 8100000 : use_ssc ? 8000000 : 7992000; + dco_max_freq = is_dp ? 8100000 : 10000000; + + for (i = 0; i < ARRAY_SIZE(div1_vals); i++) { + int div1 = div1_vals[i]; + + for (div2 = 10; div2 > 0; div2--) { + int dco = div1 * div2 * clock_khz * 5; + int a_divratio, tlinedrv, inputsel, hsdiv; + + if (dco < dco_min_freq || dco > dco_max_freq) + continue; + + if (div2 >= 2) { + a_divratio = is_dp ? 10 : 5; + tlinedrv = 2; + } else { + a_divratio = 5; + tlinedrv = 0; + } + inputsel = is_dp ? 0 : 1; + + switch (div1) { + default: + MISSING_CASE(div1); + case 2: + hsdiv = 0; + break; + case 3: + hsdiv = 1; + break; + case 5: + hsdiv = 2; + break; + case 7: + hsdiv = 3; + break; + } + + *target_dco_khz = dco; + + state->mg_refclkin_ctl = MG_REFCLKIN_CTL_OD_2_MUX(1); + + state->mg_clktop2_coreclkctl1 = + MG_CLKTOP2_CORECLKCTL1_A_DIVRATIO(a_divratio); + + state->mg_clktop2_hsclkctl = + MG_CLKTOP2_HSCLKCTL_TLINEDRV_CLKSEL(tlinedrv) | + MG_CLKTOP2_HSCLKCTL_CORE_INPUTSEL(inputsel) | + MG_CLKTOP2_HSCLKCTL_HSDIV_RATIO(hsdiv) | + MG_CLKTOP2_HSCLKCTL_DSDIV_RATIO(div2); + + return true; + } + } + + return false; +} + +/* + * The specification for this function uses real numbers, so the math had to be + * adapted to integer-only calculation, that's why it looks so different. + */ +static bool icl_calc_mg_pll_state(struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder, int clock, + struct intel_dpll_hw_state *pll_state) +{ + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); + int refclk_khz = dev_priv->cdclk.hw.ref; + uint32_t dco_khz, m1div, m2div_int, m2div_rem, m2div_frac; + uint32_t iref_ndiv, iref_trim, iref_pulse_w; + uint32_t prop_coeff, int_coeff; + uint32_t tdc_targetcnt, feedfwgain; + uint64_t ssc_stepsize, ssc_steplen, ssc_steplog; + uint64_t tmp; + bool use_ssc = false; + bool is_dp = !intel_crtc_has_type(crtc_state, INTEL_OUTPUT_HDMI); + + if (!icl_mg_pll_find_divisors(clock, is_dp, use_ssc, &dco_khz, + pll_state)) { + DRM_DEBUG_KMS("Failed to find divisors for clock %d\n", clock); + return false; + } + + m1div = 2; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + m1div = 4; + m2div_int = dco_khz / (refclk_khz * m1div); + if (m2div_int > 255) { + DRM_DEBUG_KMS("Failed to find mdiv for clock %d\n", + clock); + return false; + } + } + m2div_rem = dco_khz % (refclk_khz * m1div); + + tmp = (uint64_t)m2div_rem * (1 << 22); + do_div(tmp, refclk_khz * m1div); + m2div_frac = tmp; + + switch (refclk_khz) { + case 19200: + iref_ndiv = 1; + iref_trim = 28; + iref_pulse_w = 1; + break; + case 24000: + iref_ndiv = 1; + iref_trim = 25; + iref_pulse_w = 2; + break; + case 38400: + iref_ndiv = 2; + iref_trim = 28; + iref_pulse_w = 1; + break; + default: + MISSING_CASE(refclk_khz); + return false; + } + + /* + * tdc_res = 0.000003 + * tdc_targetcnt = int(2 / (tdc_res * 8 * 50 * 1.1) / refclk_mhz + 0.5) + * + * The multiplication by 1000 is due to refclk MHz to KHz conversion. It + * was supposed to be a division, but we rearranged the operations of + * the formula to avoid early divisions so we don't multiply the + * rounding errors. + * + * 0.000003 * 8 * 50 * 1.1 = 0.00132, also known as 132 / 100000, which + * we also rearrange to work with integers. + * + * The 0.5 transformed to 5 results in a multiplication by 10 and the + * last division by 10. + */ + tdc_targetcnt = (2 * 1000 * 100000 * 10 / (132 * refclk_khz) + 5) / 10; + + /* + * Here we divide dco_khz by 10 in order to allow the dividend to fit in + * 32 bits. That's not a problem since we round the division down + * anyway. + */ + feedfwgain = (use_ssc || m2div_rem > 0) ? + m1div * 1000000 * 100 / (dco_khz * 3 / 10) : 0; + + if (dco_khz >= 9000000) { + prop_coeff = 5; + int_coeff = 10; + } else { + prop_coeff = 4; + int_coeff = 8; + } + + if (use_ssc) { + tmp = (uint64_t)dco_khz * 47 * 32; + do_div(tmp, refclk_khz * m1div * 10000); + ssc_stepsize = tmp; + + tmp = (uint64_t)dco_khz * 1000; + ssc_steplen = DIV_ROUND_UP_ULL(tmp, 32 * 2 * 32); + } else { + ssc_stepsize = 0; + ssc_steplen = 0; + } + ssc_steplog = 4; + + pll_state->mg_pll_div0 = (m2div_rem > 0 ? MG_PLL_DIV0_FRACNEN_H : 0) | + MG_PLL_DIV0_FBDIV_FRAC(m2div_frac) | + MG_PLL_DIV0_FBDIV_INT(m2div_int); + + pll_state->mg_pll_div1 = MG_PLL_DIV1_IREF_NDIVRATIO(iref_ndiv) | + MG_PLL_DIV1_DITHER_DIV_2 | + MG_PLL_DIV1_NDIVRATIO(1) | + MG_PLL_DIV1_FBPREDIV(m1div); + + pll_state->mg_pll_lf = MG_PLL_LF_TDCTARGETCNT(tdc_targetcnt) | + MG_PLL_LF_AFCCNTSEL_512 | + MG_PLL_LF_GAINCTRL(1) | + MG_PLL_LF_INT_COEFF(int_coeff) | + MG_PLL_LF_PROP_COEFF(prop_coeff); + + pll_state->mg_pll_frac_lock = MG_PLL_FRAC_LOCK_TRUELOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_EARLYLOCK_CRIT_32 | + MG_PLL_FRAC_LOCK_LOCKTHRESH(10) | + MG_PLL_FRAC_LOCK_DCODITHEREN | + MG_PLL_FRAC_LOCK_FEEDFWRDGAIN(feedfwgain); + if (use_ssc || m2div_rem > 0) + pll_state->mg_pll_frac_lock |= MG_PLL_FRAC_LOCK_FEEDFWRDCAL_EN; + + pll_state->mg_pll_ssc = (use_ssc ? MG_PLL_SSC_EN : 0) | + MG_PLL_SSC_TYPE(2) | + MG_PLL_SSC_STEPLENGTH(ssc_steplen) | + MG_PLL_SSC_STEPNUM(ssc_steplog) | + MG_PLL_SSC_FLLEN | + MG_PLL_SSC_STEPSIZE(ssc_stepsize); + + pll_state->mg_pll_tdc_coldst_bias = MG_PLL_TDC_COLDST_COLDSTART; + + if (refclk_khz != 38400) { + pll_state->mg_pll_tdc_coldst_bias |= + MG_PLL_TDC_COLDST_IREFINT_EN | + MG_PLL_TDC_COLDST_REFBIAS_START_PULSE_W(iref_pulse_w) | + MG_PLL_TDC_COLDST_COLDSTART | + MG_PLL_TDC_TDCOVCCORR_EN | + MG_PLL_TDC_TDCSEL(3); + + pll_state->mg_pll_bias = MG_PLL_BIAS_BIAS_GB_SEL(3) | + MG_PLL_BIAS_INIT_DCOAMP(0x3F) | + MG_PLL_BIAS_BIAS_BONUS(10) | + MG_PLL_BIAS_BIASCAL_EN | + MG_PLL_BIAS_CTRIM(12) | + MG_PLL_BIAS_VREF_RDAC(4) | + MG_PLL_BIAS_IREFTRIM(iref_trim); + } + + return true; +} + +static struct intel_shared_dpll * +icl_get_dpll(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + struct intel_encoder *encoder) +{ + struct intel_shared_dpll *pll; + struct intel_dpll_hw_state pll_state = {}; + enum port port = encoder->port; + enum intel_dpll_id min, max; + int clock = crtc_state->port_clock; + bool ret; + + switch (port) { + case PORT_A: + case PORT_B: + min = DPLL_ID_ICL_DPLL0; + max = DPLL_ID_ICL_DPLL1; + ret = icl_calc_dpll_state(crtc_state, encoder, clock, + &pll_state); + break; + case PORT_C: + case PORT_D: + case PORT_E: + case PORT_F: + min = icl_port_to_mg_pll_id(port); + max = min; + ret = icl_calc_mg_pll_state(crtc_state, encoder, clock, + &pll_state); + break; + default: + MISSING_CASE(port); + return NULL; + } + + if (!ret) { + DRM_DEBUG_KMS("Could not calculate PLL state.\n"); + return NULL; + } + + crtc_state->dpll_hw_state = pll_state; + + pll = intel_find_shared_dpll(crtc, crtc_state, min, max); + if (!pll) { + DRM_DEBUG_KMS("No PLL selected\n"); + return NULL; + } + + intel_reference_shared_dpll(pll, crtc_state); + + return pll; +} + +static i915_reg_t icl_pll_id_to_enable_reg(enum intel_dpll_id id) +{ + switch (id) { + default: + MISSING_CASE(id); + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + return CNL_DPLL_ENABLE(id); + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + return MG_PLL_ENABLE(icl_mg_pll_id_to_port(id)); + } +} + +static bool icl_pll_get_hw_state(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll, + struct intel_dpll_hw_state *hw_state) +{ + const enum intel_dpll_id id = pll->info->id; + uint32_t val; + enum port port; + bool ret = false; + + if (!intel_display_power_get_if_enabled(dev_priv, POWER_DOMAIN_PLLS)) + return false; + + val = I915_READ(icl_pll_id_to_enable_reg(id)); + if (!(val & PLL_ENABLE)) + goto out; + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + hw_state->cfgcr0 = I915_READ(ICL_DPLL_CFGCR0(id)); + hw_state->cfgcr1 = I915_READ(ICL_DPLL_CFGCR1(id)); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + port = icl_mg_pll_id_to_port(id); + hw_state->mg_refclkin_ctl = I915_READ(MG_REFCLKIN_CTL(port)); + hw_state->mg_clktop2_coreclkctl1 = + I915_READ(MG_CLKTOP2_CORECLKCTL1(port)); + hw_state->mg_clktop2_hsclkctl = + I915_READ(MG_CLKTOP2_HSCLKCTL(port)); + hw_state->mg_pll_div0 = I915_READ(MG_PLL_DIV0(port)); + hw_state->mg_pll_div1 = I915_READ(MG_PLL_DIV1(port)); + hw_state->mg_pll_lf = I915_READ(MG_PLL_LF(port)); + hw_state->mg_pll_frac_lock = I915_READ(MG_PLL_FRAC_LOCK(port)); + hw_state->mg_pll_ssc = I915_READ(MG_PLL_SSC(port)); + hw_state->mg_pll_bias = I915_READ(MG_PLL_BIAS(port)); + hw_state->mg_pll_tdc_coldst_bias = + I915_READ(MG_PLL_TDC_COLDST_BIAS(port)); + break; + default: + MISSING_CASE(id); + } + + ret = true; +out: + intel_display_power_put(dev_priv, POWER_DOMAIN_PLLS); + return ret; +} + +static void icl_dpll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + const enum intel_dpll_id id = pll->info->id; + + I915_WRITE(ICL_DPLL_CFGCR0(id), hw_state->cfgcr0); + I915_WRITE(ICL_DPLL_CFGCR1(id), hw_state->cfgcr1); + POSTING_READ(ICL_DPLL_CFGCR1(id)); +} + +static void icl_mg_pll_write(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + struct intel_dpll_hw_state *hw_state = &pll->state.hw_state; + enum port port = icl_mg_pll_id_to_port(pll->info->id); + + I915_WRITE(MG_REFCLKIN_CTL(port), hw_state->mg_refclkin_ctl); + I915_WRITE(MG_CLKTOP2_CORECLKCTL1(port), + hw_state->mg_clktop2_coreclkctl1); + I915_WRITE(MG_CLKTOP2_HSCLKCTL(port), hw_state->mg_clktop2_hsclkctl); + I915_WRITE(MG_PLL_DIV0(port), hw_state->mg_pll_div0); + I915_WRITE(MG_PLL_DIV1(port), hw_state->mg_pll_div1); + I915_WRITE(MG_PLL_LF(port), hw_state->mg_pll_lf); + I915_WRITE(MG_PLL_FRAC_LOCK(port), hw_state->mg_pll_frac_lock); + I915_WRITE(MG_PLL_SSC(port), hw_state->mg_pll_ssc); + I915_WRITE(MG_PLL_BIAS(port), hw_state->mg_pll_bias); + I915_WRITE(MG_PLL_TDC_COLDST_BIAS(port), + hw_state->mg_pll_tdc_coldst_bias); + POSTING_READ(MG_PLL_TDC_COLDST_BIAS(port)); +} + +static void icl_pll_enable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + val = I915_READ(enable_reg); + val |= PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, + PLL_POWER_STATE, 1)) + DRM_ERROR("PLL %d Power not enabled\n", id); + + switch (id) { + case DPLL_ID_ICL_DPLL0: + case DPLL_ID_ICL_DPLL1: + icl_dpll_write(dev_priv, pll); + break; + case DPLL_ID_ICL_MGPLL1: + case DPLL_ID_ICL_MGPLL2: + case DPLL_ID_ICL_MGPLL3: + case DPLL_ID_ICL_MGPLL4: + icl_mg_pll_write(dev_priv, pll); + break; + default: + MISSING_CASE(id); + } + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val |= PLL_ENABLE; + I915_WRITE(enable_reg, val); + + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, PLL_LOCK, + 1)) /* 600us actually. */ + DRM_ERROR("PLL %d not locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ +} + +static void icl_pll_disable(struct drm_i915_private *dev_priv, + struct intel_shared_dpll *pll) +{ + const enum intel_dpll_id id = pll->info->id; + i915_reg_t enable_reg = icl_pll_id_to_enable_reg(id); + uint32_t val; + + /* The first steps are done by intel_ddi_post_disable(). */ + + /* + * DVFS pre sequence would be here, but in our driver the cdclk code + * paths should already be setting the appropriate voltage, hence we do + * nothign here. + */ + + val = I915_READ(enable_reg); + val &= ~PLL_ENABLE; + I915_WRITE(enable_reg, val); + + /* Timeout is actually 1us. */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_LOCK, 0, 1)) + DRM_ERROR("PLL %d locked\n", id); + + /* DVFS post sequence would be here. See the comment above. */ + + val = I915_READ(enable_reg); + val &= ~PLL_POWER_ENABLE; + I915_WRITE(enable_reg, val); + + /* + * The spec says we need to "wait" but it also says it should be + * immediate. + */ + if (intel_wait_for_register(dev_priv, enable_reg, PLL_POWER_STATE, 0, + 1)) + DRM_ERROR("PLL %d Power not disabled\n", id); +} + +static void icl_dump_hw_state(struct drm_i915_private *dev_priv, + struct intel_dpll_hw_state *hw_state) +{ + DRM_DEBUG_KMS("dpll_hw_state: cfgcr0: 0x%x, cfgcr1: 0x%x, " + "mg_refclkin_ctl: 0x%x, hg_clktop2_coreclkctl1: 0x%x, " + "mg_clktop2_hsclkctl: 0x%x, mg_pll_div0: 0x%x, " + "mg_pll_div2: 0x%x, mg_pll_lf: 0x%x, " + "mg_pll_frac_lock: 0x%x, mg_pll_ssc: 0x%x, " + "mg_pll_bias: 0x%x, mg_pll_tdc_coldst_bias: 0x%x\n", + hw_state->cfgcr0, hw_state->cfgcr1, + hw_state->mg_refclkin_ctl, + hw_state->mg_clktop2_coreclkctl1, + hw_state->mg_clktop2_hsclkctl, + hw_state->mg_pll_div0, + hw_state->mg_pll_div1, + hw_state->mg_pll_lf, + hw_state->mg_pll_frac_lock, + hw_state->mg_pll_ssc, + hw_state->mg_pll_bias, + hw_state->mg_pll_tdc_coldst_bias); +} + +static const struct intel_shared_dpll_funcs icl_pll_funcs = { + .enable = icl_pll_enable, + .disable = icl_pll_disable, + .get_hw_state = icl_pll_get_hw_state, +}; + +static const struct dpll_info icl_plls[] = { + { "DPLL 0", &icl_pll_funcs, DPLL_ID_ICL_DPLL0, 0 }, + { "DPLL 1", &icl_pll_funcs, DPLL_ID_ICL_DPLL1, 0 }, + { "MG PLL 1", &icl_pll_funcs, DPLL_ID_ICL_MGPLL1, 0 }, + { "MG PLL 2", &icl_pll_funcs, DPLL_ID_ICL_MGPLL2, 0 }, + { "MG PLL 3", &icl_pll_funcs, DPLL_ID_ICL_MGPLL3, 0 }, + { "MG PLL 4", &icl_pll_funcs, DPLL_ID_ICL_MGPLL4, 0 }, + { }, +}; + +static const struct intel_dpll_mgr icl_pll_mgr = { + .dpll_info = icl_plls, + .get_dpll = icl_get_dpll, + .dump_hw_state = icl_dump_hw_state, +}; + /** * intel_shared_dpll_init - Initialize shared DPLLs * @dev: drm device @@ -2397,7 +3060,9 @@ void intel_shared_dpll_init(struct drm_device *dev) const struct dpll_info *dpll_info; int i; - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dpll_mgr = &icl_pll_mgr; + else if (IS_CANNONLAKE(dev_priv)) dpll_mgr = &cnl_pll_mgr; else if (IS_GEN9_BC(dev_priv)) dpll_mgr = &skl_pll_mgr; @@ -2415,13 +3080,9 @@ void intel_shared_dpll_init(struct drm_device *dev) dpll_info = dpll_mgr->dpll_info; - for (i = 0; dpll_info[i].id >= 0; i++) { + for (i = 0; dpll_info[i].name; i++) { WARN_ON(i != dpll_info[i].id); - - dev_priv->shared_dplls[i].id = dpll_info[i].id; - dev_priv->shared_dplls[i].name = dpll_info[i].name; - dev_priv->shared_dplls[i].funcs = *dpll_info[i].funcs; - dev_priv->shared_dplls[i].flags = dpll_info[i].flags; + dev_priv->shared_dplls[i].info = &dpll_info[i]; } dev_priv->dpll_mgr = dpll_mgr; @@ -2481,7 +3142,7 @@ void intel_release_shared_dpll(struct intel_shared_dpll *dpll, struct intel_shared_dpll_state *shared_dpll_state; shared_dpll_state = intel_atomic_get_shared_dpll_state(state); - shared_dpll_state[dpll->id].crtc_mask &= ~(1 << crtc->pipe); + shared_dpll_state[dpll->info->id].crtc_mask &= ~(1 << crtc->pipe); } /** diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.h b/drivers/gpu/drm/i915/intel_dpll_mgr.h index f24ccf4..7a0cd56 100644 --- a/drivers/gpu/drm/i915/intel_dpll_mgr.h +++ b/drivers/gpu/drm/i915/intel_dpll_mgr.h @@ -103,6 +103,32 @@ enum intel_dpll_id { * @DPLL_ID_SKL_DPLL3: SKL and later DPLL3 */ DPLL_ID_SKL_DPLL3 = 3, + + + /** + * @DPLL_ID_ICL_DPLL0: ICL combo PHY DPLL0 + */ + DPLL_ID_ICL_DPLL0 = 0, + /** + * @DPLL_ID_ICL_DPLL1: ICL combo PHY DPLL1 + */ + DPLL_ID_ICL_DPLL1 = 1, + /** + * @DPLL_ID_ICL_MGPLL1: ICL MG PLL 1 port 1 (C) + */ + DPLL_ID_ICL_MGPLL1 = 2, + /** + * @DPLL_ID_ICL_MGPLL2: ICL MG PLL 1 port 2 (D) + */ + DPLL_ID_ICL_MGPLL2 = 3, + /** + * @DPLL_ID_ICL_MGPLL3: ICL MG PLL 1 port 3 (E) + */ + DPLL_ID_ICL_MGPLL3 = 4, + /** + * @DPLL_ID_ICL_MGPLL4: ICL MG PLL 1 port 4 (F) + */ + DPLL_ID_ICL_MGPLL4 = 5, }; #define I915_NUM_PLLS 6 @@ -135,6 +161,21 @@ struct intel_dpll_hw_state { /* bxt */ uint32_t ebb0, ebb4, pll0, pll1, pll2, pll3, pll6, pll8, pll9, pll10, pcsdw12; + + /* + * ICL uses the following, already defined: + * uint32_t cfgcr0, cfgcr1; + */ + uint32_t mg_refclkin_ctl; + uint32_t mg_clktop2_coreclkctl1; + uint32_t mg_clktop2_hsclkctl; + uint32_t mg_pll_div0; + uint32_t mg_pll_div1; + uint32_t mg_pll_lf; + uint32_t mg_pll_frac_lock; + uint32_t mg_pll_ssc; + uint32_t mg_pll_bias; + uint32_t mg_pll_tdc_coldst_bias; }; /** @@ -206,6 +247,37 @@ struct intel_shared_dpll_funcs { }; /** + * struct dpll_info - display PLL platform specific info + */ +struct dpll_info { + /** + * @name: DPLL name; used for logging + */ + const char *name; + + /** + * @funcs: platform specific hooks + */ + const struct intel_shared_dpll_funcs *funcs; + + /** + * @id: unique indentifier for this DPLL; should match the index in the + * dev_priv->shared_dplls array + */ + enum intel_dpll_id id; + +#define INTEL_DPLL_ALWAYS_ON (1 << 0) + /** + * @flags: + * + * INTEL_DPLL_ALWAYS_ON + * Inform the state checker that the DPLL is kept enabled even if + * not in use by any CRTC. + */ + uint32_t flags; +}; + +/** * struct intel_shared_dpll - display PLL with tracked state and users */ struct intel_shared_dpll { @@ -228,30 +300,9 @@ struct intel_shared_dpll { bool on; /** - * @name: DPLL name; used for logging + * @info: platform specific info */ - const char *name; - - /** - * @id: unique indentifier for this DPLL; should match the index in the - * dev_priv->shared_dplls array - */ - enum intel_dpll_id id; - - /** - * @funcs: platform specific hooks - */ - struct intel_shared_dpll_funcs funcs; - -#define INTEL_DPLL_ALWAYS_ON (1 << 0) - /** - * @flags: - * - * INTEL_DPLL_ALWAYS_ON - * Inform the state checker that the DPLL is kept enabled even if - * not in use by any CRTC. - */ - uint32_t flags; + const struct dpll_info *info; }; #define SKL_DPLL0 0 diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index d436858..d7dbca1 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -49,13 +49,15 @@ * check the condition before the timeout. */ #define __wait_for(OP, COND, US, Wmin, Wmax) ({ \ - unsigned long timeout__ = jiffies + usecs_to_jiffies(US) + 1; \ + const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \ long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \ int ret__; \ might_sleep(); \ for (;;) { \ - bool expired__ = time_after(jiffies, timeout__); \ + const bool expired__ = ktime_after(ktime_get_raw(), end__); \ OP; \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret__ = 0; \ break; \ @@ -96,6 +98,8 @@ u64 now = local_clock(); \ if (!(ATOMIC)) \ preempt_enable(); \ + /* Guarantee COND check prior to timeout */ \ + barrier(); \ if (COND) { \ ret = 0; \ break; \ @@ -140,6 +144,10 @@ #define KHz(x) (1000 * (x)) #define MHz(x) KHz(1000 * (x)) +#define KBps(x) (1000 * (x)) +#define MBps(x) KBps(1000 * (x)) +#define GBps(x) ((u64)1000 * MBps((x))) + /* * Display related stuff */ @@ -482,7 +490,7 @@ struct intel_atomic_state { bool skip_intermediate_wm; /* Gen9+ only */ - struct skl_wm_values wm_results; + struct skl_ddb_values wm_results; struct i915_sw_fence commit_ready; @@ -548,6 +556,12 @@ struct intel_initial_plane_config { #define SKL_MAX_DST_W 4096 #define SKL_MIN_DST_H 8 #define SKL_MAX_DST_H 4096 +#define ICL_MAX_SRC_W 5120 +#define ICL_MAX_SRC_H 4096 +#define ICL_MAX_DST_W 5120 +#define ICL_MAX_DST_H 4096 +#define SKL_MIN_YUV_420_SRC_W 16 +#define SKL_MIN_YUV_420_SRC_H 16 struct intel_scaler { int in_use; @@ -598,7 +612,9 @@ struct intel_pipe_wm { struct skl_plane_wm { struct skl_wm_level wm[8]; + struct skl_wm_level uv_wm[8]; struct skl_wm_level trans_wm; + bool is_planar; }; struct skl_pipe_wm { @@ -874,6 +890,7 @@ struct intel_crtc_state { /* bitmask of visible planes (enum plane_id) */ u8 active_planes; + u8 nv12_planes; /* HDMI scrambling status */ bool hdmi_scrambling; @@ -1321,10 +1338,14 @@ void intel_check_cpu_fifo_underruns(struct drm_i915_private *dev_priv); void intel_check_pch_fifo_underruns(struct drm_i915_private *dev_priv); /* i915_irq.c */ +bool gen11_reset_one_iir(struct drm_i915_private * const i915, + const unsigned int bank, + const unsigned int bit); void gen5_enable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen5_disable_gt_irq(struct drm_i915_private *dev_priv, uint32_t mask); void gen6_mask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); void gen6_unmask_pm_irq(struct drm_i915_private *dev_priv, u32 mask); +void gen11_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_reset_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_enable_rps_interrupts(struct drm_i915_private *dev_priv); void gen6_disable_rps_interrupts(struct drm_i915_private *dev_priv); @@ -1389,6 +1410,12 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp); u8 intel_ddi_dp_voltage_max(struct intel_encoder *encoder); int intel_ddi_toggle_hdcp_signalling(struct intel_encoder *intel_encoder, bool enable); +void icl_map_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); +void icl_unmap_plls_to_ports(struct drm_crtc *crtc, + struct intel_crtc_state *crtc_state, + struct drm_atomic_state *old_state); unsigned int intel_fb_align_height(const struct drm_framebuffer *fb, int plane, unsigned int height); @@ -1571,8 +1598,6 @@ void bxt_enable_dc9(struct drm_i915_private *dev_priv); void bxt_disable_dc9(struct drm_i915_private *dev_priv); void gen9_enable_dc5(struct drm_i915_private *dev_priv); unsigned int skl_cdclk_get_vco(unsigned int freq); -void skl_enable_dc6(struct drm_i915_private *dev_priv); -void skl_disable_dc6(struct drm_i915_private *dev_priv); void intel_dp_get_m_n(struct intel_crtc *crtc, struct intel_crtc_state *pipe_config); void intel_dp_set_m_n(struct intel_crtc *crtc, enum link_m_n_set m_n); @@ -1588,9 +1613,12 @@ void hsw_disable_ips(const struct intel_crtc_state *crtc_state); enum intel_display_power_domain intel_port_to_power_domain(enum port port); void intel_mode_from_pipe_config(struct drm_display_mode *mode, struct intel_crtc_state *pipe_config); +void intel_crtc_arm_fifo_underrun(struct intel_crtc *crtc, + struct intel_crtc_state *crtc_state); int skl_update_scaler_crtc(struct intel_crtc_state *crtc_state); -int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state); +int skl_max_scale(struct intel_crtc *crtc, struct intel_crtc_state *crtc_state, + uint32_t pixel_format); static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state) { @@ -1607,6 +1635,7 @@ u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane, int skl_check_plane_surface(const struct intel_crtc_state *crtc_state, struct intel_plane_state *plane_state); int i9xx_check_plane_surface(struct intel_plane_state *plane_state); +int skl_format_to_fourcc(int format, bool rgb_order, bool alpha); /* intel_csr.c */ void intel_csr_ucode_init(struct drm_i915_private *); @@ -1773,6 +1802,7 @@ void intel_fbc_flush(struct drm_i915_private *dev_priv, unsigned int frontbuffer_bits, enum fb_op_origin origin); void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv); void intel_fbc_handle_fifo_underrun_irq(struct drm_i915_private *dev_priv); +int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv); /* intel_hdmi.c */ void intel_hdmi_init(struct drm_i915_private *dev_priv, i915_reg_t hdmi_reg, @@ -1783,7 +1813,7 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder); bool intel_hdmi_compute_config(struct intel_encoder *encoder, struct intel_crtc_state *pipe_config, struct drm_connector_state *conn_state); -void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder, +bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, struct drm_connector *connector, bool high_tmds_clock_ratio, bool scrambling); @@ -1877,7 +1907,8 @@ void intel_psr_enable(struct intel_dp *intel_dp, void intel_psr_disable(struct intel_dp *intel_dp, const struct intel_crtc_state *old_crtc_state); void intel_psr_invalidate(struct drm_i915_private *dev_priv, - unsigned frontbuffer_bits); + unsigned frontbuffer_bits, + enum fb_op_origin origin); void intel_psr_flush(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits, enum fb_op_origin origin); @@ -1886,6 +1917,8 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, unsigned frontbuffer_bits); void intel_psr_compute_config(struct intel_dp *intel_dp, struct intel_crtc_state *crtc_state); +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug); +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir); /* intel_runtime_pm.c */ int intel_power_domains_init(struct drm_i915_private *); @@ -1909,6 +1942,8 @@ bool intel_display_power_get_if_enabled(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); void intel_display_power_put(struct drm_i915_private *dev_priv, enum intel_display_power_domain domain); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices); static inline void assert_rpm_device_not_suspended(struct drm_i915_private *dev_priv) @@ -2046,6 +2081,9 @@ void skl_disable_plane(struct intel_plane *plane, struct intel_crtc *crtc); bool skl_plane_get_hw_state(struct intel_plane *plane); bool skl_plane_has_ccs(struct drm_i915_private *dev_priv, enum pipe pipe, enum plane_id plane_id); +bool intel_format_is_yuv(uint32_t format); +bool skl_plane_has_planar(struct drm_i915_private *dev_priv, + enum pipe pipe, enum plane_id plane_id); /* intel_tv.c */ void intel_tv_init(struct drm_i915_private *dev_priv); @@ -2082,31 +2120,6 @@ intel_atomic_get_crtc_state(struct drm_atomic_state *state, return to_intel_crtc_state(crtc_state); } -static inline struct intel_crtc_state * -intel_atomic_get_existing_crtc_state(struct drm_atomic_state *state, - struct intel_crtc *crtc) -{ - struct drm_crtc_state *crtc_state; - - crtc_state = drm_atomic_get_existing_crtc_state(state, &crtc->base); - - if (crtc_state) - return to_intel_crtc_state(crtc_state); - else - return NULL; -} - -static inline struct intel_plane_state * -intel_atomic_get_existing_plane_state(struct drm_atomic_state *state, - struct intel_plane *plane) -{ - struct drm_plane_state *plane_state; - - plane_state = drm_atomic_get_existing_plane_state(state, &plane->base); - - return to_intel_plane_state(plane_state); -} - int intel_atomic_setup_scalers(struct drm_i915_private *dev_priv, struct intel_crtc *intel_crtc, struct intel_crtc_state *crtc_state); @@ -2138,8 +2151,17 @@ int intel_pipe_crc_create(struct drm_minor *minor); #ifdef CONFIG_DEBUG_FS int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt); +void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc); +void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc); #else #define intel_crtc_set_crc_source NULL +static inline void intel_crtc_disable_pipe_crc(struct intel_crtc *crtc) +{ +} + +static inline void intel_crtc_enable_pipe_crc(struct intel_crtc *crtc) +{ +} #endif extern const struct file_operations i915_display_crc_ctl_fops; #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_dsi_vbt.c b/drivers/gpu/drm/i915/intel_dsi_vbt.c index 91c07b0..4d6ffa7 100644 --- a/drivers/gpu/drm/i915/intel_dsi_vbt.c +++ b/drivers/gpu/drm/i915/intel_dsi_vbt.c @@ -647,6 +647,11 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) /* prepare count */ prepare_cnt = DIV_ROUND_UP(ths_prepare_ns * ui_den, ui_num * mul); + if (prepare_cnt > PREPARE_CNT_MAX) { + DRM_DEBUG_KMS("prepare count too high %u\n", prepare_cnt); + prepare_cnt = PREPARE_CNT_MAX; + } + /* exit zero count */ exit_zero_cnt = DIV_ROUND_UP( (ths_prepare_hszero - ths_prepare_ns) * ui_den, @@ -662,32 +667,29 @@ bool intel_dsi_vbt_init(struct intel_dsi *intel_dsi, u16 panel_id) if (exit_zero_cnt < (55 * ui_den / ui_num) && (55 * ui_den) % ui_num) exit_zero_cnt += 1; + if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("exit zero count too high %u\n", exit_zero_cnt); + exit_zero_cnt = EXIT_ZERO_CNT_MAX; + } + /* clk zero count */ clk_zero_cnt = DIV_ROUND_UP( (tclk_prepare_clkzero - ths_prepare_ns) * ui_den, ui_num * mul); + if (clk_zero_cnt > CLK_ZERO_CNT_MAX) { + DRM_DEBUG_KMS("clock zero count too high %u\n", clk_zero_cnt); + clk_zero_cnt = CLK_ZERO_CNT_MAX; + } + /* trail count */ tclk_trail_ns = max(mipi_config->tclk_trail, mipi_config->ths_trail); trail_cnt = DIV_ROUND_UP(tclk_trail_ns * ui_den, ui_num * mul); - if (prepare_cnt > PREPARE_CNT_MAX || - exit_zero_cnt > EXIT_ZERO_CNT_MAX || - clk_zero_cnt > CLK_ZERO_CNT_MAX || - trail_cnt > TRAIL_CNT_MAX) - DRM_DEBUG_DRIVER("Values crossing maximum limits, restricting to max values\n"); - - if (prepare_cnt > PREPARE_CNT_MAX) - prepare_cnt = PREPARE_CNT_MAX; - - if (exit_zero_cnt > EXIT_ZERO_CNT_MAX) - exit_zero_cnt = EXIT_ZERO_CNT_MAX; - - if (clk_zero_cnt > CLK_ZERO_CNT_MAX) - clk_zero_cnt = CLK_ZERO_CNT_MAX; - - if (trail_cnt > TRAIL_CNT_MAX) + if (trail_cnt > TRAIL_CNT_MAX) { + DRM_DEBUG_KMS("trail count too high %u\n", trail_cnt); trail_cnt = TRAIL_CNT_MAX; + } /* B080 */ intel_dsi->dphy_reg = exit_zero_cnt << 24 | trail_cnt << 16 | diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4ba139c..6bfd7e3 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -81,13 +81,17 @@ static const struct engine_class_info intel_engine_classes[] = { }, }; +#define MAX_MMIO_BASES 3 struct engine_info { unsigned int hw_id; unsigned int uabi_id; u8 class; u8 instance; - u32 mmio_base; - unsigned irq_shift; + /* mmio bases table *must* be sorted in reverse gen order */ + struct engine_mmio_base { + u32 gen : 8; + u32 base : 24; + } mmio_bases[MAX_MMIO_BASES]; }; static const struct engine_info intel_engines[] = { @@ -96,64 +100,76 @@ static const struct engine_info intel_engines[] = { .uabi_id = I915_EXEC_RENDER, .class = RENDER_CLASS, .instance = 0, - .mmio_base = RENDER_RING_BASE, - .irq_shift = GEN8_RCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 1, .base = RENDER_RING_BASE } + }, }, [BCS] = { .hw_id = BCS_HW, .uabi_id = I915_EXEC_BLT, .class = COPY_ENGINE_CLASS, .instance = 0, - .mmio_base = BLT_RING_BASE, - .irq_shift = GEN8_BCS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 6, .base = BLT_RING_BASE } + }, }, [VCS] = { .hw_id = VCS_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 0, - .mmio_base = GEN6_BSD_RING_BASE, - .irq_shift = GEN8_VCS1_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD_RING_BASE }, + { .gen = 6, .base = GEN6_BSD_RING_BASE }, + { .gen = 4, .base = BSD_RING_BASE } + }, }, [VCS2] = { .hw_id = VCS2_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 1, - .mmio_base = GEN8_BSD2_RING_BASE, - .irq_shift = GEN8_VCS2_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD2_RING_BASE }, + { .gen = 8, .base = GEN8_BSD2_RING_BASE } + }, }, [VCS3] = { .hw_id = VCS3_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 2, - .mmio_base = GEN11_BSD3_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD3_RING_BASE } + }, }, [VCS4] = { .hw_id = VCS4_HW, .uabi_id = I915_EXEC_BSD, .class = VIDEO_DECODE_CLASS, .instance = 3, - .mmio_base = GEN11_BSD4_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_BSD4_RING_BASE } + }, }, [VECS] = { .hw_id = VECS_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 0, - .mmio_base = VEBOX_RING_BASE, - .irq_shift = GEN8_VECS_IRQ_SHIFT, + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, + { .gen = 7, .base = VEBOX_RING_BASE } + }, }, [VECS2] = { .hw_id = VECS2_HW, .uabi_id = I915_EXEC_VEBOX, .class = VIDEO_ENHANCEMENT_CLASS, .instance = 1, - .mmio_base = GEN11_VEBOX2_RING_BASE, - .irq_shift = 0, /* not used */ + .mmio_bases = { + { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } + }, }, }; @@ -223,16 +239,36 @@ __intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) } } +static u32 __engine_mmio_base(struct drm_i915_private *i915, + const struct engine_mmio_base *bases) +{ + int i; + + for (i = 0; i < MAX_MMIO_BASES; i++) + if (INTEL_GEN(i915) >= bases[i].gen) + break; + + GEM_BUG_ON(i == MAX_MMIO_BASES); + GEM_BUG_ON(!bases[i].base); + + return bases[i].base; +} + +static void __sprint_engine_name(char *name, const struct engine_info *info) +{ + WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", + intel_engine_classes[info->class].name, + info->instance) >= INTEL_ENGINE_CS_MAX_NAME); +} + static int intel_engine_setup(struct drm_i915_private *dev_priv, enum intel_engine_id id) { const struct engine_info *info = &intel_engines[id]; - const struct engine_class_info *class_info; struct intel_engine_cs *engine; GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); - class_info = &intel_engine_classes[info->class]; BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); @@ -253,35 +289,14 @@ intel_engine_setup(struct drm_i915_private *dev_priv, engine->id = id; engine->i915 = dev_priv; - WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s%u", - class_info->name, info->instance) >= - sizeof(engine->name)); + __sprint_engine_name(engine->name, info); engine->hw_id = engine->guc_id = info->hw_id; - if (INTEL_GEN(dev_priv) >= 11) { - switch (engine->id) { - case VCS: - engine->mmio_base = GEN11_BSD_RING_BASE; - break; - case VCS2: - engine->mmio_base = GEN11_BSD2_RING_BASE; - break; - case VECS: - engine->mmio_base = GEN11_VEBOX_RING_BASE; - break; - default: - /* take the original value for all other engines */ - engine->mmio_base = info->mmio_base; - break; - } - } else { - engine->mmio_base = info->mmio_base; - } - engine->irq_shift = info->irq_shift; + engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); engine->class = info->class; engine->instance = info->instance; engine->uabi_id = info->uabi_id; - engine->uabi_class = class_info->uabi_class; + engine->uabi_class = intel_engine_classes[info->class].uabi_class; engine->context_size = __intel_engine_context_size(dev_priv, engine->class); @@ -291,7 +306,7 @@ intel_engine_setup(struct drm_i915_private *dev_priv, /* Nothing to do here, execute in order of dependencies */ engine->schedule = NULL; - spin_lock_init(&engine->stats.lock); + seqlock_init(&engine->stats.lock); ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); @@ -436,21 +451,13 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno) GEM_BUG_ON(intel_engine_get_seqno(engine) != seqno); } -static void intel_engine_init_timeline(struct intel_engine_cs *engine) +static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) { - engine->timeline = &engine->i915->gt.global_timeline.engine[engine->id]; + i915_gem_batch_pool_init(&engine->batch_pool, engine); } static bool csb_force_mmio(struct drm_i915_private *i915) { - /* - * IOMMU adds unpredictable latency causing the CSB write (from the - * GPU into the HWSP) to only be visible some time after the interrupt - * (missed breadcrumb syndrome). - */ - if (intel_vtd_active()) - return true; - /* Older GVT emulation depends upon intercepting CSB mmio */ if (intel_vgpu_active(i915) && !intel_vgpu_has_hwsp_emulation(i915)) return true; @@ -484,12 +491,11 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine) */ void intel_engine_setup_common(struct intel_engine_cs *engine) { - intel_engine_init_execlist(engine); + i915_timeline_init(engine->i915, &engine->timeline, engine->name); - intel_engine_init_timeline(engine); + intel_engine_init_execlist(engine); intel_engine_init_hangcheck(engine); - i915_gem_batch_pool_init(engine, &engine->batch_pool); - + intel_engine_init_batch_pool(engine); intel_engine_init_cmd_parser(engine); } @@ -520,8 +526,6 @@ int intel_engine_create_scratch(struct intel_engine_cs *engine, int size) goto err_unref; engine->scratch = vma; - DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unref: @@ -615,9 +619,6 @@ static int init_status_page(struct intel_engine_cs *engine) engine->status_page.vma = vma; engine->status_page.ggtt_offset = i915_ggtt_offset(vma); engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE); - - DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n", - engine->name, i915_ggtt_offset(vma)); return 0; err_unpin: @@ -669,7 +670,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * be available. To avoid this we always pin the default * context. */ - ring = engine->context_pin(engine, engine->i915->kernel_context); + ring = intel_context_pin(engine->i915->kernel_context, engine); if (IS_ERR(ring)) return PTR_ERR(ring); @@ -678,8 +679,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine) * we can interrupt the engine at any time. */ if (engine->i915->preempt_context) { - ring = engine->context_pin(engine, - engine->i915->preempt_context); + ring = intel_context_pin(engine->i915->preempt_context, engine); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto err_unpin_kernel; @@ -703,9 +703,9 @@ err_breadcrumbs: intel_engine_fini_breadcrumbs(engine); err_unpin_preempt: if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); + intel_context_unpin(engine->i915->preempt_context, engine); err_unpin_kernel: - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->kernel_context, engine); return ret; } @@ -733,8 +733,10 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) i915_gem_object_put(engine->default_state); if (engine->i915->preempt_context) - engine->context_unpin(engine, engine->i915->preempt_context); - engine->context_unpin(engine, engine->i915->kernel_context); + intel_context_unpin(engine->i915->preempt_context, engine); + intel_context_unpin(engine->i915->kernel_context, engine); + + i915_timeline_fini(&engine->timeline); } u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) @@ -782,10 +784,24 @@ static inline uint32_t read_subslice_reg(struct drm_i915_private *dev_priv, int slice, int subslice, i915_reg_t reg) { + uint32_t mcr_slice_subslice_mask; + uint32_t mcr_slice_subslice_select; uint32_t mcr; uint32_t ret; enum forcewake_domains fw_domains; + if (INTEL_GEN(dev_priv) >= 11) { + mcr_slice_subslice_mask = GEN11_MCR_SLICE_MASK | + GEN11_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN11_MCR_SLICE(slice) | + GEN11_MCR_SUBSLICE(subslice); + } else { + mcr_slice_subslice_mask = GEN8_MCR_SLICE_MASK | + GEN8_MCR_SUBSLICE_MASK; + mcr_slice_subslice_select = GEN8_MCR_SLICE(slice) | + GEN8_MCR_SUBSLICE(subslice); + } + fw_domains = intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); fw_domains |= intel_uncore_forcewake_for_reg(dev_priv, @@ -800,14 +816,14 @@ read_subslice_reg(struct drm_i915_private *dev_priv, int slice, * The HW expects the slice and sublice selectors to be reset to 0 * after reading out the registers. */ - WARN_ON_ONCE(mcr & (GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK)); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); - mcr |= GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); + WARN_ON_ONCE(mcr & mcr_slice_subslice_mask); + mcr &= ~mcr_slice_subslice_mask; + mcr |= mcr_slice_subslice_select; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); ret = I915_READ_FW(reg); - mcr &= ~(GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK); + mcr &= ~mcr_slice_subslice_mask; I915_WRITE_FW(GEN8_MCR_SELECTOR, mcr); intel_uncore_forcewake_put__locked(dev_priv, fw_domains); @@ -871,640 +887,6 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine, } } -static int wa_add(struct drm_i915_private *dev_priv, - i915_reg_t addr, - const u32 mask, const u32 val) -{ - const u32 idx = dev_priv->workarounds.count; - - if (WARN_ON(idx >= I915_MAX_WA_REGS)) - return -ENOSPC; - - dev_priv->workarounds.reg[idx].addr = addr; - dev_priv->workarounds.reg[idx].value = val; - dev_priv->workarounds.reg[idx].mask = mask; - - dev_priv->workarounds.count++; - - return 0; -} - -#define WA_REG(addr, mask, val) do { \ - const int r = wa_add(dev_priv, (addr), (mask), (val)); \ - if (r) \ - return r; \ - } while (0) - -#define WA_SET_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - WA_REG(addr, mask, _MASKED_FIELD(mask, value)) - -static int wa_ring_whitelist_reg(struct intel_engine_cs *engine, - i915_reg_t reg) -{ - struct drm_i915_private *dev_priv = engine->i915; - struct i915_workarounds *wa = &dev_priv->workarounds; - const uint32_t index = wa->hw_whitelist_count[engine->id]; - - if (WARN_ON(index >= RING_MAX_NONPRIV_SLOTS)) - return -EINVAL; - - I915_WRITE(RING_FORCE_TO_NONPRIV(engine->mmio_base, index), - i915_mmio_reg_offset(reg)); - wa->hw_whitelist_count[engine->id]++; - - return 0; -} - -static int gen8_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); - - /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); - - /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Use Force Non-Coherent whenever executing a 3D context. This is a - * workaround for for a possible hang in the unlikely event a TLB - * invalidation occurs during a PSD flush. - */ - /* WaForceEnableNonCoherent:bdw,chv */ - /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); - - /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: - * "The Hierarchical Z RAW Stall Optimization allows non-overlapping - * polygons in the same 8x4 pixel/sample area to be processed without - * stalling waiting for the earlier ones to write to Hierarchical Z - * buffer." - * - * This optimization is off by default for BDW and CHV; turn it on. - */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - - /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN6_WIZ_HASHING_MASK, - GEN6_WIZ_HASHING_16x4); - - return 0; -} - -static int bdw_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* WaDisableDopClockGating:bdw - * - * Also see the related UCGTCL1 write in broadwell_init_clock_gating() - * to disable EUTC clock gating. - */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); - - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); - - return 0; -} - -static int chv_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen8_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); - - /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); - - return 0; -} - -static int gen9_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaConextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ - I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); - - /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ - I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | - GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); - - /* WaDisableKillLogic:bxt,skl,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - ECOCHK_DIS_TLB); - - if (HAS_LLC(dev_priv)) { - /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl - * - * Must match Display Engine. See - * WaCompressedResourceDisplayNewHashMode. - */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN9_PBE_COMPRESSED_HASH_SELECTION); - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); - - I915_WRITE(MMCD_MISC_CTRL, - I915_READ(MMCD_MISC_CTRL) | - MMCD_PCLA | - MMCD_HOTSPOT_EN); - } - - /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); - - /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ - if (!IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); - - /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ - /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); - - /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ - /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, (GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE)); - - /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); - - /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); - - /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are - * both tied to WaForceContextSaveRestoreNonCoherent - * in some hsds for skl. We keep the tie for all gen9. The - * documentation is a bit hazy and so we want to get common behaviour, - * even though there is no clear evidence we would need both on kbl/bxt. - * This area has been source of system hangs so we play it safe - * and mimic the skl regardless of what bspec says. - * - * Use Force Non-Coherent whenever executing a 3D context. This - * is a workaround for a possible hang in the unlikely event - * a TLB invalidation occurs during a PSD flush. - */ - - /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); - - /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ - I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | - BDW_DISABLE_HDC_INVALIDATION); - - /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ - if (IS_SKYLAKE(dev_priv) || - IS_KABYLAKE(dev_priv) || - IS_COFFEELAKE(dev_priv)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); - - /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); - - /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ - if (IS_GEN9_LP(dev_priv)) { - u32 val = I915_READ(GEN8_L3SQCREG1); - - val &= ~L3_PRIO_CREDITS_MASK; - val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); - I915_WRITE(GEN8_L3SQCREG1, val); - } - - /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ - I915_WRITE(GEN8_L3SQCREG4, (I915_READ(GEN8_L3SQCREG4) | - GEN8_LQSC_FLUSH_COHERENT_LINES)); - - /* - * Supporting preemption with fine-granularity requires changes in the - * batch buffer programming. Since we can't break old userspace, we - * need to set our default preemption level to safe value. Userspace is - * still able to use more fine-grained preemption levels, since in - * WaEnablePreemptionGranularityControlByUMD we're whitelisting the - * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are - * not real HW workarounds, but merely a way to start using preemption - * while maintaining old contract with userspace. - */ - - /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); - if (ret) - return ret; - - /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret = wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ - ret = wa_ring_whitelist_reg(engine, GEN8_HDC_CHICKEN1); - if (ret) - return ret; - - return 0; -} - -static int skl_tune_iz_hashing(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - u8 vals[3] = { 0, 0, 0 }; - unsigned int i; - - for (i = 0; i < 3; i++) { - u8 ss; - - /* - * Only consider slices where one, and only one, subslice has 7 - * EUs - */ - if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) - continue; - - /* - * subslice_7eu[i] != 0 (because of the check above) and - * ss_max == 4 (maximum number of subslices possible per slice) - * - * -> 0 <= ss <= 3; - */ - ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; - vals[i] = 3 - ss; - } - - if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) - return 0; - - /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, - GEN9_IZ_HASHING_MASK(2) | - GEN9_IZ_HASHING_MASK(1) | - GEN9_IZ_HASHING_MASK(0), - GEN9_IZ_HASHING(2, vals[2]) | - GEN9_IZ_HASHING(1, vals[1]) | - GEN9_IZ_HASHING(0, vals[0])); - - return 0; -} - -static int skl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:skl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableGafsUnitClkGating:skl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaInPlaceDecompressionHang:skl */ - if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:skl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return skl_tune_iz_hashing(engine); -} - -static int bxt_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); - - /* WaDisablePooledEuLoadBalancingFix:bxt */ - I915_WRITE(FF_SLICE_CS_CHICKEN2, - _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:bxt */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaInPlaceDecompressionHang:bxt */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -static int cnl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT)); - - /* WaForceContextSaveRestoreNonCoherent:cnl */ - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); - - /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ - if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); - - /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ - if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); - - /* WaInPlaceDecompressionHang:cnl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaPushConstantDereferenceHoldDisable:cnl */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); - - /* FtrEnableFastAnisoL1BankingFix: cnl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); - - /* WaDisable3DMidCmdPreemption:cnl */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); - - /* WaDisableGPGPUMidCmdPreemption:cnl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, - GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); - - /* WaEnablePreemptionGranularityControlByUMD:cnl */ - I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, - _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); - ret= wa_ring_whitelist_reg(engine, GEN8_CS_CHICKEN1); - if (ret) - return ret; - - /* WaDisableEarlyEOT:cnl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); - - return 0; -} - -static int kbl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:kbl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaDisableDynamicCreditSharing:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) - I915_WRITE(GAMT_CHKN_BIT_REG, - (I915_READ(GAMT_CHKN_BIT_REG) | - GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING)); - - /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FENCE_DEST_SLM_DISABLE); - - /* WaToEnableHwFixForPushConstHWBug:kbl */ - if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:kbl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:kbl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - /* WaDisableLSQCROPERFforOCL:kbl */ - ret = wa_ring_whitelist_reg(engine, GEN8_L3SQCREG4); - if (ret) - return ret; - - return 0; -} - -static int glk_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ - ret = wa_ring_whitelist_reg(engine, GEN9_SLICE_COMMON_ECO_CHICKEN1); - if (ret) - return ret; - - /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - return 0; -} - -static int cfl_init_workarounds(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int ret; - - ret = gen9_init_workarounds(engine); - if (ret) - return ret; - - /* WaEnableGapsTsvCreditFix:cfl */ - I915_WRITE(GEN8_GARBCNTL, (I915_READ(GEN8_GARBCNTL) | - GEN9_GAPS_TSV_CREDIT_DISABLE)); - - /* WaToEnableHwFixForPushConstHWBug:cfl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); - - /* WaDisableGafsUnitClkGating:cfl */ - I915_WRITE(GEN7_UCGCTL4, (I915_READ(GEN7_UCGCTL4) | - GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE)); - - /* WaDisableSbeCacheDispatchPortSharing:cfl */ - WA_SET_BIT_MASKED( - GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); - - /* WaInPlaceDecompressionHang:cfl */ - I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, - (I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | - GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS)); - - return 0; -} - -int init_workarounds_ring(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int err; - - if (GEM_WARN_ON(engine->id != RCS)) - return -EINVAL; - - dev_priv->workarounds.count = 0; - dev_priv->workarounds.hw_whitelist_count[engine->id] = 0; - - if (IS_BROADWELL(dev_priv)) - err = bdw_init_workarounds(engine); - else if (IS_CHERRYVIEW(dev_priv)) - err = chv_init_workarounds(engine); - else if (IS_SKYLAKE(dev_priv)) - err = skl_init_workarounds(engine); - else if (IS_BROXTON(dev_priv)) - err = bxt_init_workarounds(engine); - else if (IS_KABYLAKE(dev_priv)) - err = kbl_init_workarounds(engine); - else if (IS_GEMINILAKE(dev_priv)) - err = glk_init_workarounds(engine); - else if (IS_COFFEELAKE(dev_priv)) - err = cfl_init_workarounds(engine); - else if (IS_CANNONLAKE(dev_priv)) - err = cnl_init_workarounds(engine); - else - err = 0; - if (err) - return err; - - DRM_DEBUG_DRIVER("%s: Number of context specific w/a: %d\n", - engine->name, dev_priv->workarounds.count); - return 0; -} - -int intel_ring_workarounds_emit(struct i915_request *rq) -{ - struct i915_workarounds *w = &rq->i915->workarounds; - u32 *cs; - int ret, i; - - if (w->count == 0) - return 0; - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - cs = intel_ring_begin(rq, w->count * 2 + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_LOAD_REGISTER_IMM(w->count); - for (i = 0; i < w->count; i++) { - *cs++ = i915_mmio_reg_offset(w->reg[i].addr); - *cs++ = w->reg[i].value; - } - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - ret = rq->engine->emit_flush(rq, EMIT_BARRIER); - if (ret) - return ret; - - return 0; -} - static bool ring_is_idle(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1607,7 +989,7 @@ bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine) * the last request that remains in the timeline. When idle, it is * the last executed context as tracked by retirement. */ - rq = __i915_gem_active_peek(&engine->timeline->last_request); + rq = __i915_gem_active_peek(&engine->timeline.last_request); if (rq) return rq->ctx == kernel_context; else @@ -1655,6 +1037,9 @@ void intel_engines_park(struct drm_i915_private *i915) intel_engine_dump(engine, &p, NULL); } + /* Must be reset upon idling, or we may miss the busy wakeup. */ + GEM_BUG_ON(engine->execlists.queue_priority != INT_MIN); + if (engine->park) engine->park(engine); @@ -1677,6 +1062,8 @@ void intel_engines_unpark(struct drm_i915_private *i915) for_each_engine(engine, i915, id) { if (engine->unpark) engine->unpark(engine); + + intel_engine_init_hangcheck(engine); } } @@ -1709,17 +1096,37 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) return which; } +static int print_sched_attr(struct drm_i915_private *i915, + const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + static void print_request(struct drm_printer *m, struct i915_request *rq, const char *prefix) { - drm_printf(m, "%s%x%s [%llx:%x] prio=%d @ %dms: %s\n", prefix, + const char *name = rq->fence.ops->get_timeline_name(&rq->fence); + char buf[80]; + int x = 0; + + x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%x%s [%llx:%x]%s @ %dms: %s\n", + prefix, rq->global_seqno, i915_request_completed(rq) ? "!" : "", rq->fence.context, rq->fence.seqno, - rq->priotree.priority, + buf, jiffies_to_msecs(jiffies - rq->emitted_jiffies), - rq->timeline->common->name); + name); } static void hexdump(struct drm_printer *m, const void *buf, size_t len) @@ -1825,12 +1232,15 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, ptr = I915_READ(RING_CONTEXT_STATUS_PTR(engine)); read = GEN8_CSB_READ_PTR(ptr); write = GEN8_CSB_WRITE_PTR(ptr); - drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s\n", + drm_printf(m, "\tExeclist CSB read %d [%d cached], write %d [%d from hws], interrupt posted? %s, tasklet queued? %s (%s)\n", read, execlists->csb_head, write, intel_read_status_page(engine, intel_hws_csb_write_index(engine->i915)), yesno(test_bit(ENGINE_IRQ_EXECLIST, - &engine->irq_posted))); + &engine->irq_posted)), + yesno(test_bit(TASKLET_STATE_SCHED, + &engine->execlists.tasklet.state)), + enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); if (read >= GEN8_CSB_ENTRIES) read = 0; if (write >= GEN8_CSB_ENTRIES) @@ -1857,8 +1267,9 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine, char hdr[80]; snprintf(hdr, sizeof(hdr), - "\t\tELSP[%d] count=%d, rq: ", - idx, count); + "\t\tELSP[%d] count=%d, ring->start=%08x, rq: ", + idx, count, + i915_ggtt_offset(rq->ring->vma)); print_request(m, rq, hdr); } else { drm_printf(m, "\t\tELSP[%d] idle\n", idx); @@ -1880,11 +1291,13 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m, const char *header, ...) { + const int MAX_REQUESTS_TO_SHOW = 8; struct intel_breadcrumbs * const b = &engine->breadcrumbs; const struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_gpu_error * const error = &engine->i915->gpu_error; - struct i915_request *rq; + struct i915_request *rq, *last; struct rb_node *rb; + int count; if (header) { va_list ap; @@ -1897,12 +1310,11 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (i915_terminally_wedged(&engine->i915->gpu_error)) drm_printf(m, "*** WEDGED ***\n"); - drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n", + drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n", intel_engine_get_seqno(engine), intel_engine_last_submit(engine), engine->hangcheck.seqno, - jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp), - engine->timeline->inflight_seqnos); + jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); drm_printf(m, "\tReset count: %d (global %d)\n", i915_reset_engine_count(error, engine), i915_reset_count(error)); @@ -1911,14 +1323,14 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tRequests:\n"); - rq = list_first_entry(&engine->timeline->requests, + rq = list_first_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tfirst "); - rq = list_last_entry(&engine->timeline->requests, + rq = list_last_entry(&engine->timeline.requests, struct i915_request, link); - if (&rq->link != &engine->timeline->requests) + if (&rq->link != &engine->timeline.requests) print_request(m, rq, "\t\tlast "); rq = i915_gem_find_active_request(engine); @@ -1929,12 +1341,16 @@ void intel_engine_dump(struct intel_engine_cs *engine, rq->head, rq->postfix, rq->tail, rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); - drm_printf(m, "\t\tring->start: 0x%08x\n", + drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); - drm_printf(m, "\t\tring->head: 0x%08x\n", + drm_printf(m, "\t\tring->head: 0x%08x\n", rq->ring->head); - drm_printf(m, "\t\tring->tail: 0x%08x\n", + drm_printf(m, "\t\tring->tail: 0x%08x\n", rq->ring->tail); + drm_printf(m, "\t\tring->emit: 0x%08x\n", + rq->ring->emit); + drm_printf(m, "\t\tring->space: 0x%08x\n", + rq->ring->space); } rcu_read_unlock(); @@ -1946,18 +1362,49 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - spin_lock_irq(&engine->timeline->lock); - list_for_each_entry(rq, &engine->timeline->requests, link) - print_request(m, rq, "\t\tE "); + spin_lock_irq(&engine->timeline.lock); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->timeline.requests, link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tE "); + else + last = rq; + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tE "); + } + + last = NULL; + count = 0; drm_printf(m, "\t\tQueue priority: %d\n", execlists->queue_priority); for (rb = execlists->first; rb; rb = rb_next(rb)) { struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - list_for_each_entry(rq, &p->requests, priotree.link) - print_request(m, rq, "\t\tQ "); + list_for_each_entry(rq, &p->requests, sched.link) { + if (count++ < MAX_REQUESTS_TO_SHOW - 1) + print_request(m, rq, "\t\tQ "); + else + last = rq; + } + } + if (last) { + if (count > MAX_REQUESTS_TO_SHOW) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - MAX_REQUESTS_TO_SHOW); + } + print_request(m, last, "\t\tQ "); } - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irq(&engine->timeline.lock); spin_lock_irq(&b->rb_lock); for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) { @@ -2022,7 +1469,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) return -ENODEV; tasklet_disable(&execlists->tasklet); - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (unlikely(engine->stats.enabled == ~0)) { err = -EBUSY; @@ -2046,7 +1493,7 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine) } unlock: - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); tasklet_enable(&execlists->tasklet); return err; @@ -2075,12 +1522,13 @@ static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) */ ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) { + unsigned int seq; ktime_t total; - unsigned long flags; - spin_lock_irqsave(&engine->stats.lock, flags); - total = __intel_engine_get_busy_time(engine); - spin_unlock_irqrestore(&engine->stats.lock, flags); + do { + seq = read_seqbegin(&engine->stats.lock); + total = __intel_engine_get_busy_time(engine); + } while (read_seqretry(&engine->stats.lock, seq)); return total; } @@ -2098,15 +1546,16 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine) if (!intel_engine_supports_stats(engine)) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); WARN_ON_ONCE(engine->stats.enabled == 0); if (--engine->stats.enabled == 0) { engine->stats.total = __intel_engine_get_busy_time(engine); engine->stats.active = 0; } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_engine.c" +#include "selftests/intel_engine_cs.c" #endif diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 707d49c..b431b67 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1272,6 +1272,34 @@ out: mutex_unlock(&fbc->lock); } +/* + * intel_fbc_reset_underrun - reset FBC fifo underrun status. + * @dev_priv: i915 device instance + * + * See intel_fbc_handle_fifo_underrun_irq(). For automated testing we + * want to re-enable FBC after an underrun to increase test coverage. + */ +int intel_fbc_reset_underrun(struct drm_i915_private *dev_priv) +{ + int ret; + + cancel_work_sync(&dev_priv->fbc.underrun_work); + + ret = mutex_lock_interruptible(&dev_priv->fbc.lock); + if (ret) + return ret; + + if (dev_priv->fbc.underrun_detected) { + DRM_DEBUG_KMS("Re-allowing FBC after fifo underrun\n"); + dev_priv->fbc.no_fbc_reason = "FIFO underrun cleared"; + } + + dev_priv->fbc.underrun_detected = false; + mutex_unlock(&dev_priv->fbc.lock); + + return 0; +} + /** * intel_fbc_handle_fifo_underrun_irq - disable FBC when we get a FIFO underrun * @dev_priv: i915 device instance diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c index 6f12adc..e9e02b5 100644 --- a/drivers/gpu/drm/i915/intel_fbdev.c +++ b/drivers/gpu/drm/i915/intel_fbdev.c @@ -221,6 +221,9 @@ static int intelfb_create(struct drm_fb_helper *helper, goto out_unlock; } + fb = &ifbdev->fb->base; + intel_fb_obj_flush(intel_fb_obj(fb), ORIGIN_DIRTYFB); + info = drm_fb_helper_alloc_fbi(helper); if (IS_ERR(info)) { DRM_ERROR("Failed to allocate fb_info\n"); @@ -230,8 +233,6 @@ static int intelfb_create(struct drm_fb_helper *helper, info->par = helper; - fb = &ifbdev->fb->base; - ifbdev->helper.fb = fb; strcpy(info->fix.id, "inteldrmfb"); @@ -640,7 +641,7 @@ static bool intel_fbdev_init_bios(struct drm_device *dev, if (!crtc->state->active) continue; - WARN(!crtc->primary->fb, + WARN(!crtc->primary->state->fb, "re-used BIOS config but lost an fb on crtc %d\n", crtc->base.id); } @@ -806,7 +807,7 @@ void intel_fbdev_output_poll_changed(struct drm_device *dev) return; intel_fbdev_sync(ifbdev); - if (ifbdev->vma) + if (ifbdev->vma || ifbdev->helper.deferred_setup) drm_fb_helper_hotplug_event(&ifbdev->helper); } diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c index 3a8d3d0..7fff0a0 100644 --- a/drivers/gpu/drm/i915/intel_frontbuffer.c +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c @@ -80,7 +80,7 @@ void __intel_fb_obj_invalidate(struct drm_i915_gem_object *obj, } might_sleep(); - intel_psr_invalidate(dev_priv, frontbuffer_bits); + intel_psr_invalidate(dev_priv, frontbuffer_bits, origin); intel_edp_drrs_invalidate(dev_priv, frontbuffer_bits); intel_fbc_invalidate(dev_priv, frontbuffer_bits, origin); } diff --git a/drivers/gpu/drm/i915/intel_gpu_commands.h b/drivers/gpu/drm/i915/intel_gpu_commands.h new file mode 100644 index 0000000..105e2a9 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_gpu_commands.h @@ -0,0 +1,274 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright � 2003-2018 Intel Corporation + */ + +#ifndef _INTEL_GPU_COMMANDS_H_ +#define _INTEL_GPU_COMMANDS_H_ + +/* + * Instruction field definitions used by the command parser + */ +#define INSTR_CLIENT_SHIFT 29 +#define INSTR_MI_CLIENT 0x0 +#define INSTR_BC_CLIENT 0x2 +#define INSTR_RC_CLIENT 0x3 +#define INSTR_SUBCLIENT_SHIFT 27 +#define INSTR_SUBCLIENT_MASK 0x18000000 +#define INSTR_MEDIA_SUBCLIENT 0x2 +#define INSTR_26_TO_24_MASK 0x7000000 +#define INSTR_26_TO_24_SHIFT 24 + +/* + * Memory interface instructions used by the kernel + */ +#define MI_INSTR(opcode, flags) (((opcode) << 23) | (flags)) +/* Many MI commands use bit 22 of the header dword for GGTT vs PPGTT */ +#define MI_GLOBAL_GTT (1<<22) + +#define MI_NOOP MI_INSTR(0, 0) +#define MI_USER_INTERRUPT MI_INSTR(0x02, 0) +#define MI_WAIT_FOR_EVENT MI_INSTR(0x03, 0) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) +#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +#define MI_WAIT_FOR_PLANE_A_SCANLINES (1<<1) +#define MI_FLUSH MI_INSTR(0x04, 0) +#define MI_READ_FLUSH (1 << 0) +#define MI_EXE_FLUSH (1 << 1) +#define MI_NO_WRITE_FLUSH (1 << 2) +#define MI_SCENE_COUNT (1 << 3) /* just increment scene count */ +#define MI_END_SCENE (1 << 4) /* flush binner and incr scene count */ +#define MI_INVALIDATE_ISP (1 << 5) /* invalidate indirect state pointers */ +#define MI_REPORT_HEAD MI_INSTR(0x07, 0) +#define MI_ARB_ON_OFF MI_INSTR(0x08, 0) +#define MI_ARB_ENABLE (1<<0) +#define MI_ARB_DISABLE (0<<0) +#define MI_BATCH_BUFFER_END MI_INSTR(0x0a, 0) +#define MI_SUSPEND_FLUSH MI_INSTR(0x0b, 0) +#define MI_SUSPEND_FLUSH_EN (1<<0) +#define MI_SET_APPID MI_INSTR(0x0e, 0) +#define MI_OVERLAY_FLIP MI_INSTR(0x11, 0) +#define MI_OVERLAY_CONTINUE (0x0<<21) +#define MI_OVERLAY_ON (0x1<<21) +#define MI_OVERLAY_OFF (0x2<<21) +#define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) +#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) +#define MI_DISPLAY_FLIP_I915 MI_INSTR(0x14, 1) +#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) +/* IVB has funny definitions for which plane to flip. */ +#define MI_DISPLAY_FLIP_IVB_PLANE_A (0 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_B (1 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_A (2 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_B (3 << 19) +#define MI_DISPLAY_FLIP_IVB_PLANE_C (4 << 19) +#define MI_DISPLAY_FLIP_IVB_SPRITE_C (5 << 19) +/* SKL ones */ +#define MI_DISPLAY_FLIP_SKL_PLANE_1_A (0 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_B (1 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_1_C (2 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_A (4 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_B (5 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_2_C (6 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_A (7 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_B (8 << 8) +#define MI_DISPLAY_FLIP_SKL_PLANE_3_C (9 << 8) +#define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6, gen7 */ +#define MI_SEMAPHORE_GLOBAL_GTT (1<<22) +#define MI_SEMAPHORE_UPDATE (1<<21) +#define MI_SEMAPHORE_COMPARE (1<<20) +#define MI_SEMAPHORE_REGISTER (1<<18) +#define MI_SEMAPHORE_SYNC_VR (0<<16) /* RCS wait for VCS (RVSYNC) */ +#define MI_SEMAPHORE_SYNC_VER (1<<16) /* RCS wait for VECS (RVESYNC) */ +#define MI_SEMAPHORE_SYNC_BR (2<<16) /* RCS wait for BCS (RBSYNC) */ +#define MI_SEMAPHORE_SYNC_BV (0<<16) /* VCS wait for BCS (VBSYNC) */ +#define MI_SEMAPHORE_SYNC_VEV (1<<16) /* VCS wait for VECS (VVESYNC) */ +#define MI_SEMAPHORE_SYNC_RV (2<<16) /* VCS wait for RCS (VRSYNC) */ +#define MI_SEMAPHORE_SYNC_RB (0<<16) /* BCS wait for RCS (BRSYNC) */ +#define MI_SEMAPHORE_SYNC_VEB (1<<16) /* BCS wait for VECS (BVESYNC) */ +#define MI_SEMAPHORE_SYNC_VB (2<<16) /* BCS wait for VCS (BVSYNC) */ +#define MI_SEMAPHORE_SYNC_BVE (0<<16) /* VECS wait for BCS (VEBSYNC) */ +#define MI_SEMAPHORE_SYNC_VVE (1<<16) /* VECS wait for VCS (VEVSYNC) */ +#define MI_SEMAPHORE_SYNC_RVE (2<<16) /* VECS wait for RCS (VERSYNC) */ +#define MI_SEMAPHORE_SYNC_INVALID (3<<16) +#define MI_SEMAPHORE_SYNC_MASK (3<<16) +#define MI_SET_CONTEXT MI_INSTR(0x18, 0) +#define MI_MM_SPACE_GTT (1<<8) +#define MI_MM_SPACE_PHYSICAL (0<<8) +#define MI_SAVE_EXT_STATE_EN (1<<3) +#define MI_RESTORE_EXT_STATE_EN (1<<2) +#define MI_FORCE_RESTORE (1<<1) +#define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN (1<<2) +#define MI_SEMAPHORE_SIGNAL MI_INSTR(0x1b, 0) /* GEN8+ */ +#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) +#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ +#define MI_SEMAPHORE_POLL (1<<15) +#define MI_SEMAPHORE_SAD_GTE_SDD (1<<12) +#define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) +#define MI_STORE_DWORD_IMM_GEN4 MI_INSTR(0x20, 2) +#define MI_MEM_VIRTUAL (1 << 22) /* 945,g33,965 */ +#define MI_USE_GGTT (1 << 22) /* g4x+ */ +#define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) +#define MI_STORE_DWORD_INDEX_SHIFT 2 +/* + * Official intel docs are somewhat sloppy concerning MI_LOAD_REGISTER_IMM: + * - Always issue a MI_NOOP _before_ the MI_LOAD_REGISTER_IMM - otherwise hw + * simply ignores the register load under certain conditions. + * - One can actually load arbitrary many arbitrary registers: Simply issue x + * address/value pairs. Don't overdue it, though, x <= 2^4 must hold! + */ +#define MI_LOAD_REGISTER_IMM(x) MI_INSTR(0x22, 2*(x)-1) +#define MI_LRI_FORCE_POSTED (1<<12) +#define MI_STORE_REGISTER_MEM MI_INSTR(0x24, 1) +#define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) +#define MI_SRM_LRM_GLOBAL_GTT (1<<22) +#define MI_FLUSH_DW MI_INSTR(0x26, 1) /* for GEN6 */ +#define MI_FLUSH_DW_STORE_INDEX (1<<21) +#define MI_INVALIDATE_TLB (1<<18) +#define MI_FLUSH_DW_OP_STOREDW (1<<14) +#define MI_FLUSH_DW_OP_MASK (3<<14) +#define MI_FLUSH_DW_NOTIFY (1<<8) +#define MI_INVALIDATE_BSD (1<<7) +#define MI_FLUSH_DW_USE_GTT (1<<2) +#define MI_FLUSH_DW_USE_PPGTT (0<<2) +#define MI_LOAD_REGISTER_MEM MI_INSTR(0x29, 1) +#define MI_LOAD_REGISTER_MEM_GEN8 MI_INSTR(0x29, 2) +#define MI_BATCH_BUFFER MI_INSTR(0x30, 1) +#define MI_BATCH_NON_SECURE (1) +/* for snb/ivb/vlv this also means "batch in ppgtt" when ppgtt is enabled. */ +#define MI_BATCH_NON_SECURE_I965 (1<<8) +#define MI_BATCH_PPGTT_HSW (1<<8) +#define MI_BATCH_NON_SECURE_HSW (1<<13) +#define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) +#define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) + +/* + * 3D instructions used by the kernel + */ +#define GFX_INSTR(opcode, flags) ((0x3 << 29) | ((opcode) << 24) | (flags)) + +#define GEN9_MEDIA_POOL_STATE ((0x3 << 29) | (0x2 << 27) | (0x5 << 16) | 4) +#define GEN9_MEDIA_POOL_ENABLE (1 << 31) +#define GFX_OP_RASTER_RULES ((0x3<<29)|(0x7<<24)) +#define GFX_OP_SCISSOR ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define SC_UPDATE_SCISSOR (0x1<<1) +#define SC_ENABLE_MASK (0x1<<0) +#define SC_ENABLE (0x1<<0) +#define GFX_OP_LOAD_INDIRECT ((0x3<<29)|(0x1d<<24)|(0x7<<16)) +#define GFX_OP_SCISSOR_INFO ((0x3<<29)|(0x1d<<24)|(0x81<<16)|(0x1)) +#define SCI_YMIN_MASK (0xffff<<16) +#define SCI_XMIN_MASK (0xffff<<0) +#define SCI_YMAX_MASK (0xffff<<16) +#define SCI_XMAX_MASK (0xffff<<0) +#define GFX_OP_SCISSOR_ENABLE ((0x3<<29)|(0x1c<<24)|(0x10<<19)) +#define GFX_OP_SCISSOR_RECT ((0x3<<29)|(0x1d<<24)|(0x81<<16)|1) +#define GFX_OP_COLOR_FACTOR ((0x3<<29)|(0x1d<<24)|(0x1<<16)|0x0) +#define GFX_OP_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define GFX_OP_MAP_INFO ((0x3<<29)|(0x1d<<24)|0x4) +#define GFX_OP_DESTBUFFER_VARS ((0x3<<29)|(0x1d<<24)|(0x85<<16)|0x0) +#define GFX_OP_DESTBUFFER_INFO ((0x3<<29)|(0x1d<<24)|(0x8e<<16)|1) +#define GFX_OP_DRAWRECT_INFO ((0x3<<29)|(0x1d<<24)|(0x80<<16)|(0x3)) +#define GFX_OP_DRAWRECT_INFO_I965 ((0x7900<<16)|0x2) + +#define COLOR_BLT_CMD (2<<29 | 0x40<<22 | (5-2)) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|4) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_MONO_SRC_COPY_IMM_BLT ((2<<29)|(0x71<<22)|5) +#define BLT_WRITE_A (2<<20) +#define BLT_WRITE_RGB (1<<20) +#define BLT_WRITE_RGBA (BLT_WRITE_RGB | BLT_WRITE_A) +#define BLT_DEPTH_8 (0<<24) +#define BLT_DEPTH_16_565 (1<<24) +#define BLT_DEPTH_16_1555 (2<<24) +#define BLT_DEPTH_32 (3<<24) +#define BLT_ROP_SRC_COPY (0xcc<<16) +#define BLT_ROP_COLOR_COPY (0xf0<<16) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) /* 965+ only */ +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) /* 965+ only */ +#define CMD_OP_DISPLAYBUFFER_INFO ((0x0<<29)|(0x14<<23)|2) +#define ASYNC_FLIP (1<<22) +#define DISPLAY_PLANE_A (0<<20) +#define DISPLAY_PLANE_B (1<<20) +#define GFX_OP_PIPE_CONTROL(len) ((0x3<<29)|(0x3<<27)|(0x2<<24)|((len)-2)) +#define PIPE_CONTROL_FLUSH_L3 (1<<27) +#define PIPE_CONTROL_GLOBAL_GTT_IVB (1<<24) /* gen7+ */ +#define PIPE_CONTROL_MMIO_WRITE (1<<23) +#define PIPE_CONTROL_STORE_DATA_INDEX (1<<21) +#define PIPE_CONTROL_CS_STALL (1<<20) +#define PIPE_CONTROL_TLB_INVALIDATE (1<<18) +#define PIPE_CONTROL_MEDIA_STATE_CLEAR (1<<16) +#define PIPE_CONTROL_QW_WRITE (1<<14) +#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14) +#define PIPE_CONTROL_DEPTH_STALL (1<<13) +#define PIPE_CONTROL_WRITE_FLUSH (1<<12) +#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */ +#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */ +#define PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE (1<<10) /* GM45+ only */ +#define PIPE_CONTROL_INDIRECT_STATE_DISABLE (1<<9) +#define PIPE_CONTROL_NOTIFY (1<<8) +#define PIPE_CONTROL_FLUSH_ENABLE (1<<7) /* gen7+ */ +#define PIPE_CONTROL_DC_FLUSH_ENABLE (1<<5) +#define PIPE_CONTROL_VF_CACHE_INVALIDATE (1<<4) +#define PIPE_CONTROL_CONST_CACHE_INVALIDATE (1<<3) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE (1<<2) +#define PIPE_CONTROL_STALL_AT_SCOREBOARD (1<<1) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1<<0) +#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */ + +/* + * Commands used only by the command parser + */ +#define MI_SET_PREDICATE MI_INSTR(0x01, 0) +#define MI_ARB_CHECK MI_INSTR(0x05, 0) +#define MI_RS_CONTROL MI_INSTR(0x06, 0) +#define MI_URB_ATOMIC_ALLOC MI_INSTR(0x09, 0) +#define MI_PREDICATE MI_INSTR(0x0C, 0) +#define MI_RS_CONTEXT MI_INSTR(0x0F, 0) +#define MI_TOPOLOGY_FILTER MI_INSTR(0x0D, 0) +#define MI_LOAD_SCAN_LINES_EXCL MI_INSTR(0x13, 0) +#define MI_URB_CLEAR MI_INSTR(0x19, 0) +#define MI_UPDATE_GTT MI_INSTR(0x23, 0) +#define MI_CLFLUSH MI_INSTR(0x27, 0) +#define MI_REPORT_PERF_COUNT MI_INSTR(0x28, 0) +#define MI_REPORT_PERF_COUNT_GGTT (1<<0) +#define MI_LOAD_REGISTER_REG MI_INSTR(0x2A, 0) +#define MI_RS_STORE_DATA_IMM MI_INSTR(0x2B, 0) +#define MI_LOAD_URB_MEM MI_INSTR(0x2C, 0) +#define MI_STORE_URB_MEM MI_INSTR(0x2D, 0) +#define MI_CONDITIONAL_BATCH_BUFFER_END MI_INSTR(0x36, 0) + +#define PIPELINE_SELECT ((0x3<<29)|(0x1<<27)|(0x1<<24)|(0x4<<16)) +#define GFX_OP_3DSTATE_VF_STATISTICS ((0x3<<29)|(0x1<<27)|(0x0<<24)|(0xB<<16)) +#define MEDIA_VFE_STATE ((0x3<<29)|(0x2<<27)|(0x0<<24)|(0x0<<16)) +#define MEDIA_VFE_STATE_MMIO_ACCESS_MASK (0x18) +#define GPGPU_OBJECT ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x4<<16)) +#define GPGPU_WALKER ((0x3<<29)|(0x2<<27)|(0x1<<24)|(0x5<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x39<<16)) +#define GFX_OP_3DSTATE_DX9_CONSTANTF_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x3A<<16)) +#define GFX_OP_3DSTATE_SO_DECL_LIST \ + ((0x3<<29)|(0x3<<27)|(0x1<<24)|(0x17<<16)) + +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_VS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x43<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_GS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x44<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_HS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x45<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_DS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x46<<16)) +#define GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS \ + ((0x3<<29)|(0x3<<27)|(0x0<<24)|(0x47<<16)) + +#define MFX_WAIT ((0x3<<29)|(0x1<<27)|(0x0<<16)) + +#define COLOR_BLT ((0x2<<29)|(0x40<<22)) +#define SRC_COPY_BLT ((0x2<<29)|(0x43<<22)) + +#endif /* _INTEL_GPU_COMMANDS_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c index ff08ea0..116f4cc 100644 --- a/drivers/gpu/drm/i915/intel_guc.c +++ b/drivers/gpu/drm/i915/intel_guc.c @@ -64,10 +64,12 @@ void intel_guc_init_early(struct intel_guc *guc) { intel_guc_fw_init_early(guc); intel_guc_ct_init_early(&guc->ct); - intel_guc_log_init_early(guc); + intel_guc_log_init_early(&guc->log); mutex_init(&guc->send_mutex); + spin_lock_init(&guc->irq_lock); guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; guc->notify = gen8_guc_raise_irq; } @@ -86,9 +88,10 @@ int intel_guc_init_wq(struct intel_guc *guc) * or scheduled later on resume. This way the handling of work * item can be kept same between system suspend & rpm suspend. */ - guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log", - WQ_HIGHPRI | WQ_FREEZABLE); - if (!guc->log.runtime.flush_wq) { + guc->log.relay.flush_wq = + alloc_ordered_workqueue("i915-guc_log", + WQ_HIGHPRI | WQ_FREEZABLE); + if (!guc->log.relay.flush_wq) { DRM_ERROR("Couldn't allocate workqueue for GuC log\n"); return -ENOMEM; } @@ -111,7 +114,7 @@ int intel_guc_init_wq(struct intel_guc *guc) guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt", WQ_HIGHPRI); if (!guc->preempt_wq) { - destroy_workqueue(guc->log.runtime.flush_wq); + destroy_workqueue(guc->log.relay.flush_wq); DRM_ERROR("Couldn't allocate workqueue for GuC " "preemption\n"); return -ENOMEM; @@ -129,7 +132,7 @@ void intel_guc_fini_wq(struct intel_guc *guc) USES_GUC_SUBMISSION(dev_priv)) destroy_workqueue(guc->preempt_wq); - destroy_workqueue(guc->log.runtime.flush_wq); + destroy_workqueue(guc->log.relay.flush_wq); } static int guc_shared_data_create(struct intel_guc *guc) @@ -169,7 +172,7 @@ int intel_guc_init(struct intel_guc *guc) return ret; GEM_BUG_ON(!guc->shared_data); - ret = intel_guc_log_create(guc); + ret = intel_guc_log_create(&guc->log); if (ret) goto err_shared; @@ -184,7 +187,7 @@ int intel_guc_init(struct intel_guc *guc) return 0; err_log: - intel_guc_log_destroy(guc); + intel_guc_log_destroy(&guc->log); err_shared: guc_shared_data_destroy(guc); return ret; @@ -196,41 +199,27 @@ void intel_guc_fini(struct intel_guc *guc) i915_ggtt_disable_guc(dev_priv); intel_guc_ads_destroy(guc); - intel_guc_log_destroy(guc); + intel_guc_log_destroy(&guc->log); guc_shared_data_destroy(guc); } -static u32 get_gt_type(struct drm_i915_private *dev_priv) +static u32 get_log_control_flags(void) { - /* XXX: GT type based on PCI device ID? field seems unused by fw */ - return 0; -} - -static u32 get_core_family(struct drm_i915_private *dev_priv) -{ - u32 gen = INTEL_GEN(dev_priv); + u32 level = i915_modparams.guc_log_level; + u32 flags = 0; - switch (gen) { - case 9: - return GUC_CORE_FAMILY_GEN9; - - default: - MISSING_CASE(gen); - return GUC_CORE_FAMILY_UNKNOWN; - } -} + GEM_BUG_ON(level < 0); -static u32 get_log_verbosity_flags(void) -{ - if (i915_modparams.guc_log_level > 0) { - u32 verbosity = i915_modparams.guc_log_level - 1; + if (!GUC_LOG_LEVEL_IS_ENABLED(level)) + flags |= GUC_LOG_DEFAULT_DISABLED; - GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); - return verbosity << GUC_LOG_VERBOSITY_SHIFT; - } + if (!GUC_LOG_LEVEL_IS_VERBOSE(level)) + flags |= GUC_LOG_DISABLED; + else + flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) << + GUC_LOG_VERBOSITY_SHIFT; - GEM_BUG_ON(i915_modparams.enable_guc < 0); - return GUC_LOG_DISABLED; + return flags; } /* @@ -246,10 +235,6 @@ void intel_guc_init_params(struct intel_guc *guc) memset(params, 0, sizeof(params)); - params[GUC_CTL_DEVICE_INFO] |= - (get_gt_type(dev_priv) << GUC_CTL_GT_TYPE_SHIFT) | - (get_core_family(dev_priv) << GUC_CTL_CORE_FAMILY_SHIFT); - /* * GuC ARAT increment is 10 ns. GuC default scheduler quantum is one * second. This ARAR is calculated by: @@ -265,12 +250,13 @@ void intel_guc_init_params(struct intel_guc *guc) params[GUC_CTL_LOG_PARAMS] = guc->log.flags; - params[GUC_CTL_DEBUG] = get_log_verbosity_flags(); + params[GUC_CTL_DEBUG] = get_log_control_flags(); /* If GuC submission is enabled, set up additional parameters here */ if (USES_GUC_SUBMISSION(dev_priv)) { - u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT; - u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool); + u32 ads = intel_guc_ggtt_offset(guc, + guc->ads_vma) >> PAGE_SHIFT; + u32 pgs = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16; params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT; @@ -301,16 +287,23 @@ void intel_guc_init_params(struct intel_guc *guc) intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER); } -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len) +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { WARN(1, "Unexpected send: action=%#x\n", *action); return -ENODEV; } +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc) +{ + WARN(1, "Unexpected event: no suitable handler\n"); +} + /* * This function implements the MMIO based host to GuC interface. */ -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { struct drm_i915_private *dev_priv = guc_to_i915(guc); u32 status; @@ -320,6 +313,9 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) GEM_BUG_ON(!len); GEM_BUG_ON(len > guc->send_regs.count); + /* We expect only action code */ + GEM_BUG_ON(*action & ~INTEL_GUC_MSG_CODE_MASK); + /* If CT is available, we expect to use MMIO only during init/fini */ GEM_BUG_ON(HAS_GUC_CT(dev_priv) && *action != INTEL_GUC_ACTION_REGISTER_COMMAND_TRANSPORT_BUFFER && @@ -341,29 +337,74 @@ int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len) */ ret = __intel_wait_for_register_fw(dev_priv, guc_send_reg(guc, 0), - INTEL_GUC_RECV_MASK, - INTEL_GUC_RECV_MASK, + INTEL_GUC_MSG_TYPE_MASK, + INTEL_GUC_MSG_TYPE_RESPONSE << + INTEL_GUC_MSG_TYPE_SHIFT, 10, 10, &status); - if (status != INTEL_GUC_STATUS_SUCCESS) { - /* - * Either the GuC explicitly returned an error (which - * we convert to -EIO here) or no response at all was - * received within the timeout limit (-ETIMEDOUT) - */ - if (ret != -ETIMEDOUT) - ret = -EIO; - - DRM_WARN("INTEL_GUC_SEND: Action 0x%X failed;" - " ret=%d status=0x%08X response=0x%08X\n", - action[0], ret, status, I915_READ(SOFT_SCRATCH(15))); + /* If GuC explicitly returned an error, convert it to -EIO */ + if (!ret && !INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(status)) + ret = -EIO; + + if (ret) { + DRM_DEBUG_DRIVER("INTEL_GUC_SEND: Action 0x%X failed;" + " ret=%d status=0x%08X response=0x%08X\n", + action[0], ret, status, + I915_READ(SOFT_SCRATCH(15))); + goto out; } + if (response_buf) { + int count = min(response_buf_size, guc->send_regs.count - 1); + + for (i = 0; i < count; i++) + response_buf[i] = I915_READ(guc_send_reg(guc, i + 1)); + } + + /* Use data from the GuC response as our return value */ + ret = INTEL_GUC_MSG_TO_DATA(status); + +out: intel_uncore_forcewake_put(dev_priv, guc->send_regs.fw_domains); mutex_unlock(&guc->send_mutex); return ret; } +void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc) +{ + struct drm_i915_private *dev_priv = guc_to_i915(guc); + u32 msg, val; + + /* + * Sample the log buffer flush related bits & clear them out now + * itself from the message identity register to minimize the + * probability of losing a flush interrupt, when there are back + * to back flush interrupts. + * There can be a new flush interrupt, for different log buffer + * type (like for ISR), whilst Host is handling one (for DPC). + * Since same bit is used in message register for ISR & DPC, it + * could happen that GuC sets the bit for 2nd interrupt but Host + * clears out the bit on handling the 1st interrupt. + */ + spin_lock(&guc->irq_lock); + val = I915_READ(SOFT_SCRATCH(15)); + msg = val & guc->msg_enabled_mask; + I915_WRITE(SOFT_SCRATCH(15), val & ~msg); + spin_unlock(&guc->irq_lock); + + intel_guc_to_host_process_recv_msg(guc, msg); +} + +void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg) +{ + /* Make sure to handle only enabled messages */ + msg &= guc->msg_enabled_mask; + + if (msg & (INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED)) + intel_guc_log_handle_flush_event(&guc->log); +} + int intel_guc_sample_forcewake(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); @@ -410,7 +451,7 @@ int intel_guc_suspend(struct intel_guc *guc) u32 data[] = { INTEL_GUC_ACTION_ENTER_S_STATE, GUC_POWER_D1, /* any value greater than GUC_POWER_D0 */ - guc_ggtt_offset(guc->shared_data) + intel_guc_ggtt_offset(guc, guc->shared_data) }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); @@ -434,7 +475,7 @@ int intel_guc_reset_engine(struct intel_guc *guc, data[3] = 0; data[4] = 0; data[5] = guc->execbuf_client->stage_id; - data[6] = guc_ggtt_offset(guc->shared_data); + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); return intel_guc_send(guc, data, ARRAY_SIZE(data)); } @@ -448,13 +489,66 @@ int intel_guc_resume(struct intel_guc *guc) u32 data[] = { INTEL_GUC_ACTION_EXIT_S_STATE, GUC_POWER_D0, - guc_ggtt_offset(guc->shared_data) + intel_guc_ggtt_offset(guc, guc->shared_data) }; return intel_guc_send(guc, data, ARRAY_SIZE(data)); } /** + * DOC: GuC Address Space + * + * The layout of GuC address space is shown below: + * + * :: + * + * +==============> +====================+ <== GUC_GGTT_TOP + * ^ | | + * | | | + * | | DRAM | + * | | Memory | + * | | | + * GuC | | + * Address +========> +====================+ <== WOPCM Top + * Space ^ | HW contexts RSVD | + * | | | WOPCM | + * | | +==> +--------------------+ <== GuC WOPCM Top + * | GuC ^ | | + * | GGTT | | | + * | Pin GuC | GuC | + * | Bias WOPCM | WOPCM | + * | | Size | | + * | | | | | + * v v v | | + * +=====+=====+==> +====================+ <== GuC WOPCM Base + * | Non-GuC WOPCM | + * | (HuC/Reserved) | + * +====================+ <== WOPCM Base + * + * The lower part of GuC Address Space [0, ggtt_pin_bias) is mapped to WOPCM + * while upper part of GuC Address Space [ggtt_pin_bias, GUC_GGTT_TOP) is mapped + * to DRAM. The value of the GuC ggtt_pin_bias is determined by WOPCM size and + * actual GuC WOPCM size. + */ + +/** + * intel_guc_init_ggtt_pin_bias() - Initialize the GuC ggtt_pin_bias value. + * @guc: intel_guc structure. + * + * This function will calculate and initialize the ggtt_pin_bias value based on + * overall WOPCM size and GuC WOPCM size. + */ +void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc) +{ + struct drm_i915_private *i915 = guc_to_i915(guc); + + GEM_BUG_ON(!i915->wopcm.size); + GEM_BUG_ON(i915->wopcm.size < i915->wopcm.guc.base); + + guc->ggtt_pin_bias = i915->wopcm.size - i915->wopcm.guc.base; +} + +/** * intel_guc_allocate_vma() - Allocate a GGTT VMA for GuC usage * @guc: the guc * @size: size of area to allocate (both virtual space and memory) @@ -462,7 +556,7 @@ int intel_guc_resume(struct intel_guc *guc) * This is a wrapper to create an object for use with the GuC. In order to * use it inside the GuC, an object needs to be pinned lifetime, so we allocate * both some backing storage and a range inside the Global GTT. We must pin - * it in the GGTT somewhere other than than [0, GUC_WOPCM_TOP) because that + * it in the GGTT somewhere other than than [0, GUC ggtt_pin_bias) because that * range is reserved inside GuC. * * Return: A i915_vma if successful, otherwise an ERR_PTR. @@ -483,7 +577,7 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) goto err; ret = i915_vma_pin(vma, 0, PAGE_SIZE, - PIN_GLOBAL | PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_GLOBAL | PIN_OFFSET_BIAS | guc->ggtt_pin_bias); if (ret) { vma = ERR_PTR(ret); goto err; @@ -495,14 +589,3 @@ err: i915_gem_object_put(obj); return vma; } - -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv) -{ - u32 wopcm_size = GUC_WOPCM_TOP; - - /* On BXT, the top of WOPCM is reserved for RC6 context */ - if (IS_GEN9_LP(dev_priv)) - wopcm_size -= BXT_GUC_WOPCM_RC6_RESERVED; - - return wopcm_size; -} diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h index b9424ac..f1265e1 100644 --- a/drivers/gpu/drm/i915/intel_guc.h +++ b/drivers/gpu/drm/i915/intel_guc.h @@ -49,11 +49,16 @@ struct intel_guc { struct intel_guc_log log; struct intel_guc_ct ct; + /* Offset where Non-WOPCM memory starts. */ + u32 ggtt_pin_bias; + /* Log snapshot if GuC errors during load */ struct drm_i915_gem_object *load_err_log; /* intel_guc_recv interrupt related state */ + spinlock_t irq_lock; bool interrupts_enabled; + unsigned int msg_enabled_mask; struct i915_vma *ads_vma; struct i915_vma *stage_desc_pool; @@ -83,7 +88,11 @@ struct intel_guc { struct mutex send_mutex; /* GuC's FW specific send function */ - int (*send)(struct intel_guc *guc, const u32 *data, u32 len); + int (*send)(struct intel_guc *guc, const u32 *data, u32 len, + u32 *response_buf, u32 response_buf_size); + + /* GuC's FW specific event handler function */ + void (*handler)(struct intel_guc *guc); /* GuC's FW specific notify function */ void (*notify)(struct intel_guc *guc); @@ -92,7 +101,14 @@ struct intel_guc { static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len) { - return guc->send(guc, action, len); + return guc->send(guc, action, len, NULL, 0); +} + +static inline int +intel_guc_send_and_receive(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) +{ + return guc->send(guc, action, len, response_buf, response_buf_size); } static inline void intel_guc_notify(struct intel_guc *guc) @@ -100,17 +116,33 @@ static inline void intel_guc_notify(struct intel_guc *guc) guc->notify(guc); } -/* - * GuC does not allow any gfx GGTT address that falls into range [0, WOPCM_TOP), - * which is reserved for Boot ROM, SRAM and WOPCM. Currently this top address is - * 512K. In order to exclude 0-512K address space from GGTT, all gfx objects - * used by GuC is pinned with PIN_OFFSET_BIAS along with size of WOPCM. +static inline void intel_guc_to_host_event_handler(struct intel_guc *guc) +{ + guc->handler(guc); +} + +/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ +#define GUC_GGTT_TOP 0xFEE00000 + +/** + * intel_guc_ggtt_offset() - Get and validate the GGTT offset of @vma + * @guc: intel_guc structure. + * @vma: i915 graphics virtual memory area. + * + * GuC does not allow any gfx GGTT address that falls into range + * [0, GuC ggtt_pin_bias), which is reserved for Boot ROM, SRAM and WOPCM. + * Currently, in order to exclude [0, GuC ggtt_pin_bias) address space from + * GGTT, all gfx objects used by GuC are allocated with intel_guc_allocate_vma() + * and pinned with PIN_OFFSET_BIAS along with the value of GuC ggtt_pin_bias. + * + * Return: GGTT offset of the @vma. */ -static inline u32 guc_ggtt_offset(struct i915_vma *vma) +static inline u32 intel_guc_ggtt_offset(struct intel_guc *guc, + struct i915_vma *vma) { u32 offset = i915_ggtt_offset(vma); - GEM_BUG_ON(offset < GUC_WOPCM_TOP); + GEM_BUG_ON(offset < guc->ggtt_pin_bias); GEM_BUG_ON(range_overflows_t(u64, offset, vma->size, GUC_GGTT_TOP)); return offset; @@ -119,17 +151,43 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma) void intel_guc_init_early(struct intel_guc *guc); void intel_guc_init_send_regs(struct intel_guc *guc); void intel_guc_init_params(struct intel_guc *guc); +void intel_guc_init_ggtt_pin_bias(struct intel_guc *guc); int intel_guc_init_wq(struct intel_guc *guc); void intel_guc_fini_wq(struct intel_guc *guc); int intel_guc_init(struct intel_guc *guc); void intel_guc_fini(struct intel_guc *guc); -int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len); -int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len); +int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size); +void intel_guc_to_host_event_handler(struct intel_guc *guc); +void intel_guc_to_host_event_handler_nop(struct intel_guc *guc); +void intel_guc_to_host_event_handler_mmio(struct intel_guc *guc); +void intel_guc_to_host_process_recv_msg(struct intel_guc *guc, u32 msg); int intel_guc_sample_forcewake(struct intel_guc *guc); int intel_guc_auth_huc(struct intel_guc *guc, u32 rsa_offset); int intel_guc_suspend(struct intel_guc *guc); int intel_guc_resume(struct intel_guc *guc); struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size); -u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv); + +static inline int intel_guc_sanitize(struct intel_guc *guc) +{ + intel_uc_fw_sanitize(&guc->fw); + return 0; +} + +static inline void intel_guc_enable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask |= mask; + spin_unlock_irq(&guc->irq_lock); +} + +static inline void intel_guc_disable_msg(struct intel_guc *guc, u32 mask) +{ + spin_lock_irq(&guc->irq_lock); + guc->msg_enabled_mask &= ~mask; + spin_unlock_irq(&guc->irq_lock); +} #endif diff --git a/drivers/gpu/drm/i915/intel_guc_ads.c b/drivers/gpu/drm/i915/intel_guc_ads.c index ac62753..dcaa3fb7 100644 --- a/drivers/gpu/drm/i915/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/intel_guc_ads.c @@ -75,7 +75,7 @@ static void guc_policies_init(struct guc_policies *policies) int intel_guc_ads_create(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct i915_vma *vma; + struct i915_vma *vma, *kernel_ctx_vma; struct page *page; /* The ads obj includes the struct itself and buffers passed to GuC */ struct { @@ -121,9 +121,10 @@ int intel_guc_ads_create(struct intel_guc *guc) * to find it. Note that we have to skip our header (1 page), * because our GuC shared data is there. */ + kernel_ctx_vma = to_intel_context(dev_priv->kernel_context, + dev_priv->engine[RCS])->state; blob->ads.golden_context_lrca = - guc_ggtt_offset(dev_priv->kernel_context->engine[RCS].state) + - skipped_offset; + intel_guc_ggtt_offset(guc, kernel_ctx_vma) + skipped_offset; /* * The GuC expects us to exclude the portion of the context image that @@ -135,7 +136,7 @@ int intel_guc_ads_create(struct intel_guc *guc) blob->ads.eng_state_size[engine->guc_id] = engine->context_size - skipped_size; - base = guc_ggtt_offset(vma); + base = intel_guc_ggtt_offset(guc, vma); blob->ads.scheduler_policies = base + ptr_offset(blob, policies); blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); diff --git a/drivers/gpu/drm/i915/intel_guc_ct.c b/drivers/gpu/drm/i915/intel_guc_ct.c index 24ad557..371b600 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.c +++ b/drivers/gpu/drm/i915/intel_guc_ct.c @@ -24,14 +24,49 @@ #include "i915_drv.h" #include "intel_guc_ct.h" +#ifdef CONFIG_DRM_I915_DEBUG_GUC +#define CT_DEBUG_DRIVER(...) DRM_DEBUG_DRIVER(__VA_ARGS__) +#else +#define CT_DEBUG_DRIVER(...) do { } while (0) +#endif + +struct ct_request { + struct list_head link; + u32 fence; + u32 status; + u32 response_len; + u32 *response_buf; +}; + +struct ct_incoming_request { + struct list_head link; + u32 msg[]; +}; + enum { CTB_SEND = 0, CTB_RECV = 1 }; enum { CTB_OWNER_HOST = 0 }; +static void ct_incoming_request_worker_func(struct work_struct *w); + +/** + * intel_guc_ct_init_early - Initialize CT state without requiring device access + * @ct: pointer to CT struct + */ void intel_guc_ct_init_early(struct intel_guc_ct *ct) { /* we're using static channel owners */ ct->host_channel.owner = CTB_OWNER_HOST; + + spin_lock_init(&ct->lock); + INIT_LIST_HEAD(&ct->pending_requests); + INIT_LIST_HEAD(&ct->incoming_requests); + INIT_WORK(&ct->worker, ct_incoming_request_worker_func); +} + +static inline struct intel_guc *ct_to_guc(struct intel_guc_ct *ct) +{ + return container_of(ct, struct intel_guc, ct); } static inline const char *guc_ct_buffer_type_to_str(u32 type) @@ -49,8 +84,8 @@ static inline const char *guc_ct_buffer_type_to_str(u32 type) static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, u32 cmds_addr, u32 size, u32 owner) { - DRM_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", - desc, cmds_addr, size, owner); + CT_DEBUG_DRIVER("CT: desc %p init addr=%#x size=%u owner=%u\n", + desc, cmds_addr, size, owner); memset(desc, 0, sizeof(*desc)); desc->addr = cmds_addr; desc->size = size; @@ -59,8 +94,8 @@ static void guc_ct_buffer_desc_init(struct guc_ct_buffer_desc *desc, static void guc_ct_buffer_desc_reset(struct guc_ct_buffer_desc *desc) { - DRM_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", - desc, desc->head, desc->tail); + CT_DEBUG_DRIVER("CT: desc %p reset head=%u tail=%u\n", + desc, desc->head, desc->tail); desc->head = 0; desc->tail = 0; desc->is_in_error = 0; @@ -79,7 +114,7 @@ static int guc_action_register_ct_buffer(struct intel_guc *guc, int err; /* Can't use generic send(), CT registration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) DRM_ERROR("CT: register %s buffer failed; err=%d\n", guc_ct_buffer_type_to_str(type), err); @@ -98,7 +133,7 @@ static int guc_action_deregister_ct_buffer(struct intel_guc *guc, int err; /* Can't use generic send(), CT deregistration must go over MMIO */ - err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action)); + err = intel_guc_send_mmio(guc, action, ARRAY_SIZE(action), NULL, 0); if (err) DRM_ERROR("CT: deregister %s buffer failed; owner=%d err=%d\n", guc_ct_buffer_type_to_str(type), owner, err); @@ -156,7 +191,8 @@ static int ctch_init(struct intel_guc *guc, err = PTR_ERR(blob); goto err_vma; } - DRM_DEBUG_DRIVER("CT: vma base=%#x\n", guc_ggtt_offset(ctch->vma)); + CT_DEBUG_DRIVER("CT: vma base=%#x\n", + intel_guc_ggtt_offset(guc, ctch->vma)); /* store pointers to desc and cmds */ for (i = 0; i < ARRAY_SIZE(ctch->ctbs); i++) { @@ -170,8 +206,8 @@ static int ctch_init(struct intel_guc *guc, err_vma: i915_vma_unpin_and_release(&ctch->vma); err_out: - DRM_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", - ctch->owner, err); + CT_DEBUG_DRIVER("CT: channel %d initialization failed; err=%d\n", + ctch->owner, err); return err; } @@ -191,8 +227,8 @@ static int ctch_open(struct intel_guc *guc, int err; int i; - DRM_DEBUG_DRIVER("CT: channel %d reopen=%s\n", - ctch->owner, yesno(ctch_is_open(ctch))); + CT_DEBUG_DRIVER("CT: channel %d reopen=%s\n", + ctch->owner, yesno(ctch_is_open(ctch))); if (!ctch->vma) { err = ctch_init(guc, ctch); @@ -202,7 +238,7 @@ static int ctch_open(struct intel_guc *guc, } /* vma should be already allocated and map'ed */ - base = guc_ggtt_offset(ctch->vma); + base = intel_guc_ggtt_offset(guc, ctch->vma); /* (re)initialize descriptors * cmds buffers are in the second half of the blob page @@ -263,10 +299,29 @@ static u32 ctch_get_next_fence(struct intel_guc_ct_channel *ctch) return ++ctch->next_fence; } +/** + * DOC: CTB Host to GuC request + * + * Format of the CTB Host to GuC request message is as follows:: + * + * +------------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | ... | [n-1] | + * +------------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+ + * | | 0 | 1 | ... | n | + * +============+=========+=========+=========+=========+ + * | len >= 1 | FENCE | request specific data | + * +------+-----+---------+---------+---------+---------+ + * + * ^-----------------len-------------------^ + */ + static int ctb_write(struct intel_guc_ct_buffer *ctb, const u32 *action, u32 len /* in dwords */, - u32 fence) + u32 fence, + bool want_response) { struct guc_ct_buffer_desc *desc = ctb->desc; u32 head = desc->head / 4; /* in dwords */ @@ -295,15 +350,21 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, if (unlikely(used + len + 1 >= size)) return -ENOSPC; - /* Write the message. The format is the following: + /* + * Write the message. The format is the following: * DW0: header (including action code) * DW1: fence * DW2+: action data */ header = (len << GUC_CT_MSG_LEN_SHIFT) | (GUC_CT_MSG_WRITE_FENCE_TO_DESC) | + (want_response ? GUC_CT_MSG_SEND_STATUS : 0) | (action[0] << GUC_CT_MSG_ACTION_SHIFT); + CT_DEBUG_DRIVER("CT: writing %*ph %*ph %*ph\n", + 4, &header, 4, &fence, + 4 * (len - 1), &action[1]); + cmds[tail] = header; tail = (tail + 1) % size; @@ -322,16 +383,25 @@ static int ctb_write(struct intel_guc_ct_buffer *ctb, return 0; } -/* Wait for the response from the GuC. +/** + * wait_for_ctb_desc_update - Wait for the CT buffer descriptor update. + * @desc: buffer descriptor * @fence: response fence * @status: placeholder for status - * return: 0 response received (status is valid) - * -ETIMEDOUT no response within hardcoded timeout - * -EPROTO no response, ct buffer was in error + * + * Guc will update CT buffer descriptor with new fence and status + * after processing the command identified by the fence. Wait for + * specified fence and then read from the descriptor status of the + * command. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + * * -EPROTO no response, CT buffer is in error */ -static int wait_for_response(struct guc_ct_buffer_desc *desc, - u32 fence, - u32 *status) +static int wait_for_ctb_desc_update(struct guc_ct_buffer_desc *desc, + u32 fence, + u32 *status) { int err; @@ -363,71 +433,440 @@ static int wait_for_response(struct guc_ct_buffer_desc *desc, return err; } -static int ctch_send(struct intel_guc *guc, +/** + * wait_for_ct_request_update - Wait for CT request state update. + * @req: pointer to pending request + * @status: placeholder for status + * + * For each sent request, Guc shall send bac CT response message. + * Our message handler will update status of tracked request once + * response message with given fence is received. Wait here and + * check for valid response status value. + * + * Return: + * * 0 response received (status is valid) + * * -ETIMEDOUT no response within hardcoded timeout + */ +static int wait_for_ct_request_update(struct ct_request *req, u32 *status) +{ + int err; + + /* + * Fast commands should complete in less than 10us, so sample quickly + * up to that length of time, then switch to a slower sleep-wait loop. + * No GuC command should ever take longer than 10ms. + */ +#define done INTEL_GUC_MSG_IS_RESPONSE(READ_ONCE(req->status)) + err = wait_for_us(done, 10); + if (err) + err = wait_for(done, 10); +#undef done + + if (unlikely(err)) + DRM_ERROR("CT: fence %u err %d\n", req->fence, err); + + *status = req->status; + return err; +} + +static int ctch_send(struct intel_guc_ct *ct, struct intel_guc_ct_channel *ctch, const u32 *action, u32 len, + u32 *response_buf, + u32 response_buf_size, u32 *status) { struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_SEND]; struct guc_ct_buffer_desc *desc = ctb->desc; + struct ct_request request; + unsigned long flags; u32 fence; int err; GEM_BUG_ON(!ctch_is_open(ctch)); GEM_BUG_ON(!len); GEM_BUG_ON(len & ~GUC_CT_MSG_LEN_MASK); + GEM_BUG_ON(!response_buf && response_buf_size); fence = ctch_get_next_fence(ctch); - err = ctb_write(ctb, action, len, fence); + request.fence = fence; + request.status = 0; + request.response_len = response_buf_size; + request.response_buf = response_buf; + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request.link, &ct->pending_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + err = ctb_write(ctb, action, len, fence, !!response_buf); if (unlikely(err)) - return err; + goto unlink; - intel_guc_notify(guc); + intel_guc_notify(ct_to_guc(ct)); - err = wait_for_response(desc, fence, status); + if (response_buf) + err = wait_for_ct_request_update(&request, status); + else + err = wait_for_ctb_desc_update(desc, fence, status); if (unlikely(err)) - return err; - if (*status != INTEL_GUC_STATUS_SUCCESS) - return -EIO; - return 0; + goto unlink; + + if (!INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(*status)) { + err = -EIO; + goto unlink; + } + + if (response_buf) { + /* There shall be no data in the status */ + WARN_ON(INTEL_GUC_MSG_TO_DATA(request.status)); + /* Return actual response len */ + err = request.response_len; + } else { + /* There shall be no response payload */ + WARN_ON(request.response_len); + /* Return data decoded from the status dword */ + err = INTEL_GUC_MSG_TO_DATA(*status); + } + +unlink: + spin_lock_irqsave(&ct->lock, flags); + list_del(&request.link); + spin_unlock_irqrestore(&ct->lock, flags); + + return err; } /* * Command Transport (CT) buffer based GuC send function. */ -static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len) +static int intel_guc_send_ct(struct intel_guc *guc, const u32 *action, u32 len, + u32 *response_buf, u32 response_buf_size) { - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc_ct *ct = &guc->ct; + struct intel_guc_ct_channel *ctch = &ct->host_channel; u32 status = ~0; /* undefined */ - int err; + int ret; mutex_lock(&guc->send_mutex); - err = ctch_send(guc, ctch, action, len, &status); - if (unlikely(err)) { + ret = ctch_send(ct, ctch, action, len, response_buf, response_buf_size, + &status); + if (unlikely(ret < 0)) { DRM_ERROR("CT: send action %#X failed; err=%d status=%#X\n", - action[0], err, status); + action[0], ret, status); + } else if (unlikely(ret)) { + CT_DEBUG_DRIVER("CT: send action %#x returned %d (%#x)\n", + action[0], ret, ret); } mutex_unlock(&guc->send_mutex); - return err; + return ret; +} + +static inline unsigned int ct_header_get_len(u32 header) +{ + return (header >> GUC_CT_MSG_LEN_SHIFT) & GUC_CT_MSG_LEN_MASK; +} + +static inline unsigned int ct_header_get_action(u32 header) +{ + return (header >> GUC_CT_MSG_ACTION_SHIFT) & GUC_CT_MSG_ACTION_MASK; +} + +static inline bool ct_header_is_response(u32 header) +{ + return ct_header_get_action(header) == INTEL_GUC_ACTION_DEFAULT; +} + +static int ctb_read(struct intel_guc_ct_buffer *ctb, u32 *data) +{ + struct guc_ct_buffer_desc *desc = ctb->desc; + u32 head = desc->head / 4; /* in dwords */ + u32 tail = desc->tail / 4; /* in dwords */ + u32 size = desc->size / 4; /* in dwords */ + u32 *cmds = ctb->cmds; + s32 available; /* in dwords */ + unsigned int len; + unsigned int i; + + GEM_BUG_ON(desc->size % 4); + GEM_BUG_ON(desc->head % 4); + GEM_BUG_ON(desc->tail % 4); + GEM_BUG_ON(tail >= size); + GEM_BUG_ON(head >= size); + + /* tail == head condition indicates empty */ + available = tail - head; + if (unlikely(available == 0)) + return -ENODATA; + + /* beware of buffer wrap case */ + if (unlikely(available < 0)) + available += size; + CT_DEBUG_DRIVER("CT: available %d (%u:%u)\n", available, head, tail); + GEM_BUG_ON(available < 0); + + data[0] = cmds[head]; + head = (head + 1) % size; + + /* message len with header */ + len = ct_header_get_len(data[0]) + 1; + if (unlikely(len > (u32)available)) { + DRM_ERROR("CT: incomplete message %*ph %*ph %*ph\n", + 4, data, + 4 * (head + available - 1 > size ? + size - head : available - 1), &cmds[head], + 4 * (head + available - 1 > size ? + available - 1 - size + head : 0), &cmds[0]); + return -EPROTO; + } + + for (i = 1; i < len; i++) { + data[i] = cmds[head]; + head = (head + 1) % size; + } + CT_DEBUG_DRIVER("CT: received %*ph\n", 4 * len, data); + + desc->head = head * 4; + return 0; } /** - * Enable buffer based command transport + * DOC: CTB GuC to Host response + * + * Format of the CTB GuC to Host response message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len >= 2 | FENCE | STATUS | response specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_response(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + u32 fence; + u32 status; + u32 datalen; + struct ct_request *req; + bool found = false; + + GEM_BUG_ON(!ct_header_is_response(header)); + GEM_BUG_ON(!in_irq()); + + /* Response payload shall at least include fence and status */ + if (unlikely(len < 2)) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + fence = msg[1]; + status = msg[2]; + datalen = len - 2; + + /* Format of the status follows RESPONSE message */ + if (unlikely(!INTEL_GUC_MSG_IS_RESPONSE(status))) { + DRM_ERROR("CT: corrupted response %*ph\n", 4 * msglen, msg); + return -EPROTO; + } + + CT_DEBUG_DRIVER("CT: response fence %u status %#x\n", fence, status); + + spin_lock(&ct->lock); + list_for_each_entry(req, &ct->pending_requests, link) { + if (unlikely(fence != req->fence)) { + CT_DEBUG_DRIVER("CT: request %u awaits response\n", + req->fence); + continue; + } + if (unlikely(datalen > req->response_len)) { + DRM_ERROR("CT: response %u too long %*ph\n", + req->fence, 4 * msglen, msg); + datalen = 0; + } + if (datalen) + memcpy(req->response_buf, msg + 3, 4 * datalen); + req->response_len = datalen; + WRITE_ONCE(req->status, status); + found = true; + break; + } + spin_unlock(&ct->lock); + + if (!found) + DRM_ERROR("CT: unsolicited response %*ph\n", 4 * msglen, msg); + return 0; +} + +static void ct_process_request(struct intel_guc_ct *ct, + u32 action, u32 len, const u32 *payload) +{ + struct intel_guc *guc = ct_to_guc(ct); + + CT_DEBUG_DRIVER("CT: request %x %*ph\n", action, 4 * len, payload); + + switch (action) { + case INTEL_GUC_ACTION_DEFAULT: + if (unlikely(len < 1)) + goto fail_unexpected; + intel_guc_to_host_process_recv_msg(guc, *payload); + break; + + default: +fail_unexpected: + DRM_ERROR("CT: unexpected request %x %*ph\n", + action, 4 * len, payload); + break; + } +} + +static bool ct_process_incoming_requests(struct intel_guc_ct *ct) +{ + unsigned long flags; + struct ct_incoming_request *request; + u32 header; + u32 *payload; + bool done; + + spin_lock_irqsave(&ct->lock, flags); + request = list_first_entry_or_null(&ct->incoming_requests, + struct ct_incoming_request, link); + if (request) + list_del(&request->link); + done = !!list_empty(&ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + if (!request) + return true; + + header = request->msg[0]; + payload = &request->msg[1]; + ct_process_request(ct, + ct_header_get_action(header), + ct_header_get_len(header), + payload); + + kfree(request); + return done; +} + +static void ct_incoming_request_worker_func(struct work_struct *w) +{ + struct intel_guc_ct *ct = container_of(w, struct intel_guc_ct, worker); + bool done; + + done = ct_process_incoming_requests(ct); + if (!done) + queue_work(system_unbound_wq, &ct->worker); +} + +/** + * DOC: CTB GuC to Host request + * + * Format of the CTB GuC to Host request message is as follows:: + * + * +------------+---------+---------+---------+---------+---------+ + * | msg[0] | [1] | [2] | [3] | ... | [n-1] | + * +------------+---------+---------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+---------+---------+ + * | | 0 | 1 | 2 | ... | n | + * +============+=========+=========+=========+=========+=========+ + * | len | request specific data | + * +------+-----+---------+---------+---------+---------+---------+ + * + * ^-----------------------len-----------------------^ + */ + +static int ct_handle_request(struct intel_guc_ct *ct, const u32 *msg) +{ + u32 header = msg[0]; + u32 len = ct_header_get_len(header); + u32 msglen = len + 1; /* total message length including header */ + struct ct_incoming_request *request; + unsigned long flags; + + GEM_BUG_ON(ct_header_is_response(header)); + + request = kmalloc(sizeof(*request) + 4 * msglen, GFP_ATOMIC); + if (unlikely(!request)) { + DRM_ERROR("CT: dropping request %*ph\n", 4 * msglen, msg); + return 0; /* XXX: -ENOMEM ? */ + } + memcpy(request->msg, msg, 4 * msglen); + + spin_lock_irqsave(&ct->lock, flags); + list_add_tail(&request->link, &ct->incoming_requests); + spin_unlock_irqrestore(&ct->lock, flags); + + queue_work(system_unbound_wq, &ct->worker); + return 0; +} + +static void ct_process_host_channel(struct intel_guc_ct *ct) +{ + struct intel_guc_ct_channel *ctch = &ct->host_channel; + struct intel_guc_ct_buffer *ctb = &ctch->ctbs[CTB_RECV]; + u32 msg[GUC_CT_MSG_LEN_MASK + 1]; /* one extra dw for the header */ + int err = 0; + + if (!ctch_is_open(ctch)) + return; + + do { + err = ctb_read(ctb, msg); + if (err) + break; + + if (ct_header_is_response(msg[0])) + err = ct_handle_response(ct, msg); + else + err = ct_handle_request(ct, msg); + } while (!err); + + if (GEM_WARN_ON(err == -EPROTO)) { + DRM_ERROR("CT: corrupted message detected!\n"); + ctb->desc->is_in_error = 1; + } +} + +/* + * When we're communicating with the GuC over CT, GuC uses events + * to notify us about new messages being posted on the RECV buffer. + */ +static void intel_guc_to_host_event_handler_ct(struct intel_guc *guc) +{ + struct intel_guc_ct *ct = &guc->ct; + + ct_process_host_channel(ct); +} + +/** + * intel_guc_ct_enable - Enable buffer based command transport. + * @ct: pointer to CT struct + * * Shall only be called for platforms with HAS_GUC_CT. - * @guc: the guc - * return: 0 on success - * non-zero on failure + * + * Return: 0 on success, a negative errno code on failure. */ -int intel_guc_enable_ct(struct intel_guc *guc) +int intel_guc_ct_enable(struct intel_guc_ct *ct) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc *guc = ct_to_guc(ct); + struct drm_i915_private *i915 = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &ct->host_channel; int err; - GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + GEM_BUG_ON(!HAS_GUC_CT(i915)); err = ctch_open(guc, ctch); if (unlikely(err)) @@ -435,21 +874,24 @@ int intel_guc_enable_ct(struct intel_guc *guc) /* Switch into cmd transport buffer based send() */ guc->send = intel_guc_send_ct; + guc->handler = intel_guc_to_host_event_handler_ct; DRM_INFO("CT: %s\n", enableddisabled(true)); return 0; } /** - * Disable buffer based command transport. + * intel_guc_ct_disable - Disable buffer based command transport. + * @ct: pointer to CT struct + * * Shall only be called for platforms with HAS_GUC_CT. - * @guc: the guc */ -void intel_guc_disable_ct(struct intel_guc *guc) +void intel_guc_ct_disable(struct intel_guc_ct *ct) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct intel_guc_ct_channel *ctch = &guc->ct.host_channel; + struct intel_guc *guc = ct_to_guc(ct); + struct drm_i915_private *i915 = guc_to_i915(guc); + struct intel_guc_ct_channel *ctch = &ct->host_channel; - GEM_BUG_ON(!HAS_GUC_CT(dev_priv)); + GEM_BUG_ON(!HAS_GUC_CT(i915)); if (!ctch_is_open(ctch)) return; @@ -458,5 +900,6 @@ void intel_guc_disable_ct(struct intel_guc *guc) /* Disable send */ guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; DRM_INFO("CT: %s\n", enableddisabled(false)); } diff --git a/drivers/gpu/drm/i915/intel_guc_ct.h b/drivers/gpu/drm/i915/intel_guc_ct.h index 6d97f36..d774895a 100644 --- a/drivers/gpu/drm/i915/intel_guc_ct.h +++ b/drivers/gpu/drm/i915/intel_guc_ct.h @@ -75,12 +75,22 @@ struct intel_guc_ct_channel { struct intel_guc_ct { struct intel_guc_ct_channel host_channel; /* other channels are tbd */ + + /** @lock: protects pending requests list */ + spinlock_t lock; + + /** @pending_requests: list of requests waiting for response */ + struct list_head pending_requests; + + /** @incoming_requests: list of incoming requests */ + struct list_head incoming_requests; + + /** @worker: worker for handling incoming requests */ + struct work_struct worker; }; void intel_guc_ct_init_early(struct intel_guc_ct *ct); - -/* XXX: move to intel_uc.h ? don't fit there either */ -int intel_guc_enable_ct(struct intel_guc *guc); -void intel_guc_disable_ct(struct intel_guc *guc); +int intel_guc_ct_enable(struct intel_guc_ct *ct); +void intel_guc_ct_disable(struct intel_guc_ct *ct); #endif /* _INTEL_GUC_CT_H_ */ diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c index d07f2b9..a9e6fcc 100644 --- a/drivers/gpu/drm/i915/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/intel_guc_fw.c @@ -165,7 +165,7 @@ static int guc_xfer_ucode(struct intel_guc *guc, struct i915_vma *vma) I915_WRITE(DMA_COPY_SIZE, guc_fw->header_size + guc_fw->ucode_size); /* Set the source address for the new blob */ - offset = guc_ggtt_offset(vma) + guc_fw->header_offset; + offset = intel_guc_ggtt_offset(guc, vma) + guc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -275,9 +275,8 @@ static int guc_fw_xfer(struct intel_uc_fw *guc_fw, struct i915_vma *vma) * Called from intel_uc_init_hw() during driver load, resume from sleep and * after a GPU reset. * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need to only check that - * fetch succeeded, and then transfer the image to the h/w. + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h index 6a10aa6..0867ba7 100644 --- a/drivers/gpu/drm/i915/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/intel_guc_fwif.h @@ -23,9 +23,6 @@ #ifndef _INTEL_GUC_FWIF_H #define _INTEL_GUC_FWIF_H -#define GUC_CORE_FAMILY_GEN9 12 -#define GUC_CORE_FAMILY_UNKNOWN 0x7fffffff - #define GUC_CLIENT_PRIORITY_KMD_HIGH 0 #define GUC_CLIENT_PRIORITY_HIGH 1 #define GUC_CLIENT_PRIORITY_KMD_NORMAL 2 @@ -82,8 +79,6 @@ #define GUC_CTL_ARAT_LOW 2 #define GUC_CTL_DEVICE_INFO 3 -#define GUC_CTL_GT_TYPE_SHIFT 0 -#define GUC_CTL_CORE_FAMILY_SHIFT 7 #define GUC_CTL_LOG_PARAMS 4 #define GUC_LOG_VALID (1 << 0) @@ -127,7 +122,7 @@ #define GUC_PROFILE_ENABLED (1 << 7) #define GUC_WQ_TRACK_ENABLED (1 << 8) #define GUC_ADS_ENABLED (1 << 9) -#define GUC_DEBUG_RESERVED (1 << 10) +#define GUC_LOG_DEFAULT_DISABLED (1 << 10) #define GUC_ADS_ADDR_SHIFT 11 #define GUC_ADS_ADDR_MASK 0xfffff800 @@ -327,6 +322,58 @@ struct guc_stage_desc { u64 desc_private; } __packed; +/** + * DOC: CTB based communication + * + * The CTB (command transport buffer) communication between Host and GuC + * is based on u32 data stream written to the shared buffer. One buffer can + * be used to transmit data only in one direction (one-directional channel). + * + * Current status of the each buffer is stored in the buffer descriptor. + * Buffer descriptor holds tail and head fields that represents active data + * stream. The tail field is updated by the data producer (sender), and head + * field is updated by the data consumer (receiver):: + * + * +------------+ + * | DESCRIPTOR | +=================+============+========+ + * +============+ | | MESSAGE(s) | | + * | address |--------->+=================+============+========+ + * +------------+ + * | head | ^-----head--------^ + * +------------+ + * | tail | ^---------tail-----------------^ + * +------------+ + * | size | ^---------------size--------------------^ + * +------------+ + * + * Each message in data stream starts with the single u32 treated as a header, + * followed by optional set of u32 data that makes message specific payload:: + * + * +------------+---------+---------+---------+ + * | MESSAGE | + * +------------+---------+---------+---------+ + * | msg[0] | [1] | ... | [n-1] | + * +------------+---------+---------+---------+ + * | MESSAGE | MESSAGE PAYLOAD | + * + HEADER +---------+---------+---------+ + * | | 0 | ... | n | + * +======+=====+=========+=========+=========+ + * | 31:16| code| | | | + * +------+-----+ | | | + * | 15:5|flags| | | | + * +------+-----+ | | | + * | 4:0| len| | | | + * +------+-----+---------+---------+---------+ + * + * ^-------------len-------------^ + * + * The message header consists of: + * + * - **len**, indicates length of the message payload (in u32) + * - **code**, indicates message code + * - **flags**, holds various bits to control message handling + */ + /* * Describes single command transport buffer. * Used by both guc-master and clients. @@ -534,16 +581,6 @@ struct guc_log_buffer_state { u32 version; } __packed; -union guc_log_control { - struct { - u32 logging_enabled:1; - u32 reserved1:3; - u32 verbosity:4; - u32 reserved2:24; - }; - u32 value; -} __packed; - struct guc_ctx_report { u32 report_return_status; u32 reserved1[64]; @@ -570,7 +607,68 @@ struct guc_shared_ctx_data { struct guc_ctx_report preempt_ctx_report[GUC_MAX_ENGINES_NUM]; } __packed; -/* This Action will be programmed in C180 - SOFT_SCRATCH_O_REG */ +/** + * DOC: MMIO based communication + * + * The MMIO based communication between Host and GuC uses software scratch + * registers, where first register holds data treated as message header, + * and other registers are used to hold message payload. + * + * For Gen9+, GuC uses software scratch registers 0xC180-0xC1B8 + * + * +-----------+---------+---------+---------+ + * | MMIO[0] | MMIO[1] | ... | MMIO[n] | + * +-----------+---------+---------+---------+ + * | header | optional payload | + * +======+====+=========+=========+=========+ + * | 31:28|type| | | | + * +------+----+ | | | + * | 27:16|data| | | | + * +------+----+ | | | + * | 15:0|code| | | | + * +------+----+---------+---------+---------+ + * + * The message header consists of: + * + * - **type**, indicates message type + * - **code**, indicates message code, is specific for **type** + * - **data**, indicates message data, optional, depends on **code** + * + * The following message **types** are supported: + * + * - **REQUEST**, indicates Host-to-GuC request, requested GuC action code + * must be priovided in **code** field. Optional action specific parameters + * can be provided in remaining payload registers or **data** field. + * + * - **RESPONSE**, indicates GuC-to-Host response from earlier GuC request, + * action response status will be provided in **code** field. Optional + * response data can be returned in remaining payload registers or **data** + * field. + */ + +#define INTEL_GUC_MSG_TYPE_SHIFT 28 +#define INTEL_GUC_MSG_TYPE_MASK (0xF << INTEL_GUC_MSG_TYPE_SHIFT) +#define INTEL_GUC_MSG_DATA_SHIFT 16 +#define INTEL_GUC_MSG_DATA_MASK (0xFFF << INTEL_GUC_MSG_DATA_SHIFT) +#define INTEL_GUC_MSG_CODE_SHIFT 0 +#define INTEL_GUC_MSG_CODE_MASK (0xFFFF << INTEL_GUC_MSG_CODE_SHIFT) + +#define __INTEL_GUC_MSG_GET(T, m) \ + (((m) & INTEL_GUC_MSG_ ## T ## _MASK) >> INTEL_GUC_MSG_ ## T ## _SHIFT) +#define INTEL_GUC_MSG_TO_TYPE(m) __INTEL_GUC_MSG_GET(TYPE, m) +#define INTEL_GUC_MSG_TO_DATA(m) __INTEL_GUC_MSG_GET(DATA, m) +#define INTEL_GUC_MSG_TO_CODE(m) __INTEL_GUC_MSG_GET(CODE, m) + +enum intel_guc_msg_type { + INTEL_GUC_MSG_TYPE_REQUEST = 0x0, + INTEL_GUC_MSG_TYPE_RESPONSE = 0xF, +}; + +#define __INTEL_GUC_MSG_TYPE_IS(T, m) \ + (INTEL_GUC_MSG_TO_TYPE(m) == INTEL_GUC_MSG_TYPE_ ## T) +#define INTEL_GUC_MSG_IS_REQUEST(m) __INTEL_GUC_MSG_TYPE_IS(REQUEST, m) +#define INTEL_GUC_MSG_IS_RESPONSE(m) __INTEL_GUC_MSG_TYPE_IS(RESPONSE, m) + enum intel_guc_action { INTEL_GUC_ACTION_DEFAULT = 0x0, INTEL_GUC_ACTION_REQUEST_PREEMPTION = 0x2, @@ -602,24 +700,22 @@ enum intel_guc_report_status { INTEL_GUC_REPORT_STATUS_COMPLETE = 0x4, }; -/* - * The GuC sends its response to a command by overwriting the - * command in SS0. The response is distinguishable from a command - * by the fact that all the MASK bits are set. The remaining bits - * give more detail. - */ -#define INTEL_GUC_RECV_MASK ((u32)0xF0000000) -#define INTEL_GUC_RECV_IS_RESPONSE(x) ((u32)(x) >= INTEL_GUC_RECV_MASK) -#define INTEL_GUC_RECV_STATUS(x) (INTEL_GUC_RECV_MASK | (x)) - -/* GUC will return status back to SOFT_SCRATCH_O_REG */ -enum intel_guc_status { - INTEL_GUC_STATUS_SUCCESS = INTEL_GUC_RECV_STATUS(0x0), - INTEL_GUC_STATUS_ALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x10), - INTEL_GUC_STATUS_DEALLOCATE_DOORBELL_FAIL = INTEL_GUC_RECV_STATUS(0x20), - INTEL_GUC_STATUS_GENERIC_FAIL = INTEL_GUC_RECV_STATUS(0x0000F000) +#define GUC_LOG_CONTROL_LOGGING_ENABLED (1 << 0) +#define GUC_LOG_CONTROL_VERBOSITY_SHIFT 4 +#define GUC_LOG_CONTROL_VERBOSITY_MASK (0xF << GUC_LOG_CONTROL_VERBOSITY_SHIFT) +#define GUC_LOG_CONTROL_DEFAULT_LOGGING (1 << 8) + +enum intel_guc_response_status { + INTEL_GUC_RESPONSE_STATUS_SUCCESS = 0x0, + INTEL_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000, }; +#define INTEL_GUC_MSG_IS_RESPONSE_SUCCESS(m) \ + (typecheck(u32, (m)) && \ + ((m) & (INTEL_GUC_MSG_TYPE_MASK | INTEL_GUC_MSG_CODE_MASK)) == \ + ((INTEL_GUC_MSG_TYPE_RESPONSE << INTEL_GUC_MSG_TYPE_SHIFT) | \ + (INTEL_GUC_RESPONSE_STATUS_SUCCESS << INTEL_GUC_MSG_CODE_SHIFT))) + /* This action will be programmed in C1BC - SOFT_SCRATCH_15_REG */ enum intel_guc_recv_message { INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED = BIT(1), diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c index c0c2e7d..401e170 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.c +++ b/drivers/gpu/drm/i915/intel_guc_log.c @@ -23,12 +23,11 @@ */ #include <linux/debugfs.h> -#include <linux/relay.h> #include "intel_guc_log.h" #include "i915_drv.h" -static void guc_log_capture_logs(struct intel_guc *guc); +static void guc_log_capture_logs(struct intel_guc_log *log); /** * DOC: GuC firmware log @@ -39,7 +38,7 @@ static void guc_log_capture_logs(struct intel_guc *guc); * registers value. */ -static int guc_log_flush_complete(struct intel_guc *guc) +static int guc_action_flush_log_complete(struct intel_guc *guc) { u32 action[] = { INTEL_GUC_ACTION_LOG_BUFFER_FILE_FLUSH_COMPLETE @@ -48,7 +47,7 @@ static int guc_log_flush_complete(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_log_flush(struct intel_guc *guc) +static int guc_action_flush_log(struct intel_guc *guc) { u32 action[] = { INTEL_GUC_ACTION_FORCE_LOG_BUFFER_FLUSH, @@ -58,22 +57,40 @@ static int guc_log_flush(struct intel_guc *guc) return intel_guc_send(guc, action, ARRAY_SIZE(action)); } -static int guc_log_control(struct intel_guc *guc, bool enable, u32 verbosity) +static int guc_action_control_log(struct intel_guc *guc, bool enable, + bool default_logging, u32 verbosity) { - union guc_log_control control_val = { - { - .logging_enabled = enable, - .verbosity = verbosity, - }, - }; u32 action[] = { INTEL_GUC_ACTION_UK_LOG_ENABLE_LOGGING, - control_val.value + (enable ? GUC_LOG_CONTROL_LOGGING_ENABLED : 0) | + (verbosity << GUC_LOG_CONTROL_VERBOSITY_SHIFT) | + (default_logging ? GUC_LOG_CONTROL_DEFAULT_LOGGING : 0) }; + GEM_BUG_ON(verbosity > GUC_LOG_VERBOSITY_MAX); + return intel_guc_send(guc, action, ARRAY_SIZE(action)); } +static inline struct intel_guc *log_to_guc(struct intel_guc_log *log) +{ + return container_of(log, struct intel_guc, log); +} + +static void guc_log_enable_flush_events(struct intel_guc_log *log) +{ + intel_guc_enable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + +static void guc_log_disable_flush_events(struct intel_guc_log *log) +{ + intel_guc_disable_msg(log_to_guc(log), + INTEL_GUC_RECV_MSG_FLUSH_LOG_BUFFER | + INTEL_GUC_RECV_MSG_CRASH_DUMP_POSTED); +} + /* * Sub buffer switch callback. Called whenever relay has to switch to a new * sub buffer, relay stays on the same sub buffer if 0 is returned. @@ -121,14 +138,7 @@ static struct dentry *create_buf_file_callback(const char *filename, if (!parent) return NULL; - /* - * Not using the channel filename passed as an argument, since for each - * channel relay appends the corresponding CPU number to the filename - * passed in relay_open(). This should be fine as relay just needs a - * dentry of the file associated with the channel buffer and that file's - * name need not be same as the filename passed as an argument. - */ - buf_file = debugfs_create_file("guc_log", mode, + buf_file = debugfs_create_file(filename, mode, parent, buf, &relay_file_operations); return buf_file; } @@ -149,59 +159,7 @@ static struct rchan_callbacks relay_callbacks = { .remove_buf_file = remove_buf_file_callback, }; -static int guc_log_relay_file_create(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - struct dentry *log_dir; - int ret; - - if (!i915_modparams.guc_log_level) - return 0; - - mutex_lock(&guc->log.runtime.relay_lock); - - /* For now create the log file in /sys/kernel/debug/dri/0 dir */ - log_dir = dev_priv->drm.primary->debugfs_root; - - /* - * If /sys/kernel/debug/dri/0 location do not exist, then debugfs is - * not mounted and so can't create the relay file. - * The relay API seems to fit well with debugfs only, for availing relay - * there are 3 requirements which can be met for debugfs file only in a - * straightforward/clean manner :- - * i) Need the associated dentry pointer of the file, while opening the - * relay channel. - * ii) Should be able to use 'relay_file_operations' fops for the file. - * iii) Set the 'i_private' field of file's inode to the pointer of - * relay channel buffer. - */ - if (!log_dir) { - DRM_ERROR("Debugfs dir not available yet for GuC log file\n"); - ret = -ENODEV; - goto out_unlock; - } - - ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir); - if (ret < 0 && ret != -EEXIST) { - DRM_ERROR("Couldn't associate relay chan with file %d\n", ret); - goto out_unlock; - } - - ret = 0; - -out_unlock: - mutex_unlock(&guc->log.runtime.relay_lock); - return ret; -} - -static bool guc_log_has_relay(struct intel_guc *guc) -{ - lockdep_assert_held(&guc->log.runtime.relay_lock); - - return guc->log.runtime.relay_chan != NULL; -} - -static void guc_move_to_next_buf(struct intel_guc *guc) +static void guc_move_to_next_buf(struct intel_guc_log *log) { /* * Make sure the updates made in the sub buffer are visible when @@ -209,21 +167,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc) */ smp_wmb(); - if (!guc_log_has_relay(guc)) - return; - /* All data has been written, so now move the offset of sub buffer. */ - relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size); + relay_reserve(log->relay.channel, log->vma->obj->base.size); /* Switch to the next sub buffer */ - relay_flush(guc->log.runtime.relay_chan); + relay_flush(log->relay.channel); } -static void *guc_get_write_buffer(struct intel_guc *guc) +static void *guc_get_write_buffer(struct intel_guc_log *log) { - if (!guc_log_has_relay(guc)) - return NULL; - /* * Just get the base address of a new sub buffer and copy data into it * ourselves. NULL will be returned in no-overwrite mode, if all sub @@ -233,25 +185,25 @@ static void *guc_get_write_buffer(struct intel_guc *guc) * done without using relay_reserve() along with relay_write(). So its * better to use relay_reserve() alone. */ - return relay_reserve(guc->log.runtime.relay_chan, 0); + return relay_reserve(log->relay.channel, 0); } -static bool guc_check_log_buf_overflow(struct intel_guc *guc, +static bool guc_check_log_buf_overflow(struct intel_guc_log *log, enum guc_log_buffer_type type, unsigned int full_cnt) { - unsigned int prev_full_cnt = guc->log.prev_overflow_count[type]; + unsigned int prev_full_cnt = log->stats[type].sampled_overflow; bool overflow = false; if (full_cnt != prev_full_cnt) { overflow = true; - guc->log.prev_overflow_count[type] = full_cnt; - guc->log.total_overflow_count[type] += full_cnt - prev_full_cnt; + log->stats[type].overflow = full_cnt; + log->stats[type].sampled_overflow += full_cnt - prev_full_cnt; if (full_cnt < prev_full_cnt) { /* buffer_full_cnt is a 4 bit counter */ - guc->log.total_overflow_count[type] += 16; + log->stats[type].sampled_overflow += 16; } DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); } @@ -275,7 +227,7 @@ static unsigned int guc_get_log_buffer_size(enum guc_log_buffer_type type) return 0; } -static void guc_read_update_log_buffer(struct intel_guc *guc) +static void guc_read_update_log_buffer(struct intel_guc_log *log) { unsigned int buffer_size, read_offset, write_offset, bytes_to_copy, full_cnt; struct guc_log_buffer_state *log_buf_state, *log_buf_snapshot_state; @@ -284,16 +236,16 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) void *src_data, *dst_data; bool new_overflow; - if (WARN_ON(!guc->log.runtime.buf_addr)) - return; + mutex_lock(&log->relay.lock); - /* Get the pointer to shared GuC log buffer */ - log_buf_state = src_data = guc->log.runtime.buf_addr; + if (WARN_ON(!intel_guc_log_relay_enabled(log))) + goto out_unlock; - mutex_lock(&guc->log.runtime.relay_lock); + /* Get the pointer to shared GuC log buffer */ + log_buf_state = src_data = log->relay.buf_addr; /* Get the pointer to local buffer to store the logs */ - log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc); + log_buf_snapshot_state = dst_data = guc_get_write_buffer(log); if (unlikely(!log_buf_snapshot_state)) { /* @@ -301,10 +253,9 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * getting consumed by User at a slow rate. */ DRM_ERROR_RATELIMITED("no sub-buffer to capture logs\n"); - guc->log.capture_miss_count++; - mutex_unlock(&guc->log.runtime.relay_lock); + log->relay.full_count++; - return; + goto out_unlock; } /* Actual logs are present from the 2nd page */ @@ -325,8 +276,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) full_cnt = log_buf_state_local.buffer_full_cnt; /* Bookkeeping stuff */ - guc->log.flush_count[type] += log_buf_state_local.flush_to_file; - new_overflow = guc_check_log_buf_overflow(guc, type, full_cnt); + log->stats[type].flush += log_buf_state_local.flush_to_file; + new_overflow = guc_check_log_buf_overflow(log, type, full_cnt); /* Update the state of shared log buffer */ log_buf_state->read_ptr = write_offset; @@ -373,38 +324,35 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) dst_data += buffer_size; } - guc_move_to_next_buf(guc); + guc_move_to_next_buf(log); - mutex_unlock(&guc->log.runtime.relay_lock); +out_unlock: + mutex_unlock(&log->relay.lock); } static void capture_logs_work(struct work_struct *work) { - struct intel_guc *guc = - container_of(work, struct intel_guc, log.runtime.flush_work); - - guc_log_capture_logs(guc); -} + struct intel_guc_log *log = + container_of(work, struct intel_guc_log, relay.flush_work); -static bool guc_log_has_runtime(struct intel_guc *guc) -{ - return guc->log.runtime.buf_addr != NULL; + guc_log_capture_logs(log); } -static int guc_log_runtime_create(struct intel_guc *guc) +static int guc_log_map(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); void *vaddr; int ret; - lockdep_assert_held(&dev_priv->drm.struct_mutex); + lockdep_assert_held(&log->relay.lock); - if (!guc->log.vma) + if (!log->vma) return -ENODEV; - GEM_BUG_ON(guc_log_has_runtime(guc)); - - ret = i915_gem_object_set_to_wc_domain(guc->log.vma->obj, true); + mutex_lock(&dev_priv->drm.struct_mutex); + ret = i915_gem_object_set_to_wc_domain(log->vma->obj, true); + mutex_unlock(&dev_priv->drm.struct_mutex); if (ret) return ret; @@ -413,49 +361,40 @@ static int guc_log_runtime_create(struct intel_guc *guc) * buffer pages, so that we can directly get the data * (up-to-date) from memory. */ - vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC); + vaddr = i915_gem_object_pin_map(log->vma->obj, I915_MAP_WC); if (IS_ERR(vaddr)) { DRM_ERROR("Couldn't map log buffer pages %d\n", ret); return PTR_ERR(vaddr); } - guc->log.runtime.buf_addr = vaddr; + log->relay.buf_addr = vaddr; return 0; } -static void guc_log_runtime_destroy(struct intel_guc *guc) +static void guc_log_unmap(struct intel_guc_log *log) { - /* - * It's possible that the runtime stuff was never allocated because - * GuC log was disabled at the boot time. - */ - if (!guc_log_has_runtime(guc)) - return; + lockdep_assert_held(&log->relay.lock); - i915_gem_object_unpin_map(guc->log.vma->obj); - guc->log.runtime.buf_addr = NULL; + i915_gem_object_unpin_map(log->vma->obj); + log->relay.buf_addr = NULL; } -void intel_guc_log_init_early(struct intel_guc *guc) +void intel_guc_log_init_early(struct intel_guc_log *log) { - mutex_init(&guc->log.runtime.relay_lock); - INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work); + mutex_init(&log->relay.lock); + INIT_WORK(&log->relay.flush_work, capture_logs_work); } -int intel_guc_log_relay_create(struct intel_guc *guc) +static int guc_log_relay_create(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); struct rchan *guc_log_relay_chan; size_t n_subbufs, subbuf_size; int ret; - if (!i915_modparams.guc_log_level) - return 0; - - mutex_lock(&guc->log.runtime.relay_lock); - - GEM_BUG_ON(guc_log_has_relay(guc)); + lockdep_assert_held(&log->relay.lock); /* Keep the size of sub buffers same as shared log buffer */ subbuf_size = GUC_LOG_SIZE; @@ -468,157 +407,56 @@ int intel_guc_log_relay_create(struct intel_guc *guc) */ n_subbufs = 8; - /* - * Create a relay channel, so that we have buffers for storing - * the GuC firmware logs, the channel will be linked with a file - * later on when debugfs is registered. - */ - guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size, - n_subbufs, &relay_callbacks, dev_priv); + guc_log_relay_chan = relay_open("guc_log", + dev_priv->drm.primary->debugfs_root, + subbuf_size, n_subbufs, + &relay_callbacks, dev_priv); if (!guc_log_relay_chan) { DRM_ERROR("Couldn't create relay chan for GuC logging\n"); ret = -ENOMEM; - goto err; + return ret; } GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size); - guc->log.runtime.relay_chan = guc_log_relay_chan; - - mutex_unlock(&guc->log.runtime.relay_lock); + log->relay.channel = guc_log_relay_chan; return 0; - -err: - mutex_unlock(&guc->log.runtime.relay_lock); - /* logging will be off */ - i915_modparams.guc_log_level = 0; - return ret; -} - -void intel_guc_log_relay_destroy(struct intel_guc *guc) -{ - mutex_lock(&guc->log.runtime.relay_lock); - - /* - * It's possible that the relay was never allocated because - * GuC log was disabled at the boot time. - */ - if (!guc_log_has_relay(guc)) - goto out_unlock; - - relay_close(guc->log.runtime.relay_chan); - guc->log.runtime.relay_chan = NULL; - -out_unlock: - mutex_unlock(&guc->log.runtime.relay_lock); } -static int guc_log_late_setup(struct intel_guc *guc) +static void guc_log_relay_destroy(struct intel_guc_log *log) { - struct drm_i915_private *dev_priv = guc_to_i915(guc); - int ret; - - if (!guc_log_has_runtime(guc)) { - /* - * If log was disabled at boot time, then setup needed to handle - * log buffer flush interrupts would not have been done yet, so - * do that now. - */ - ret = intel_guc_log_relay_create(guc); - if (ret) - goto err; - - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - ret = guc_log_runtime_create(guc); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - if (ret) - goto err_relay; - } - - ret = guc_log_relay_file_create(guc); - if (ret) - goto err_runtime; - - return 0; + lockdep_assert_held(&log->relay.lock); -err_runtime: - mutex_lock(&dev_priv->drm.struct_mutex); - guc_log_runtime_destroy(guc); - mutex_unlock(&dev_priv->drm.struct_mutex); -err_relay: - intel_guc_log_relay_destroy(guc); -err: - /* logging will remain off */ - i915_modparams.guc_log_level = 0; - return ret; + relay_close(log->relay.channel); + log->relay.channel = NULL; } -static void guc_log_capture_logs(struct intel_guc *guc) +static void guc_log_capture_logs(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); - guc_read_update_log_buffer(guc); + guc_read_update_log_buffer(log); /* * Generally device is expected to be active only at this * time, so get/put should be really quick. */ intel_runtime_pm_get(dev_priv); - guc_log_flush_complete(guc); - intel_runtime_pm_put(dev_priv); -} - -static void guc_flush_logs(struct intel_guc *guc) -{ - struct drm_i915_private *dev_priv = guc_to_i915(guc); - - if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) - return; - - /* First disable the interrupts, will be renabled afterwards */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - gen9_disable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - - /* - * Before initiating the forceful flush, wait for any pending/ongoing - * flush to complete otherwise forceful flush may not actually happen. - */ - flush_work(&guc->log.runtime.flush_work); - - /* Ask GuC to update the log buffer state */ - intel_runtime_pm_get(dev_priv); - guc_log_flush(guc); + guc_action_flush_log_complete(guc); intel_runtime_pm_put(dev_priv); - - /* GuC would have updated log buffer by now, so capture it */ - guc_log_capture_logs(guc); } -int intel_guc_log_create(struct intel_guc *guc) +int intel_guc_log_create(struct intel_guc_log *log) { + struct intel_guc *guc = log_to_guc(log); struct i915_vma *vma; unsigned long offset; u32 flags; int ret; - GEM_BUG_ON(guc->log.vma); - - /* - * We require SSE 4.1 for fast reads from the GuC log buffer and - * it should be present on the chipsets supporting GuC based - * submisssions. - */ - if (WARN_ON(!i915_has_memcpy_from_wc())) { - ret = -EINVAL; - goto err; - } + GEM_BUG_ON(log->vma); vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE); if (IS_ERR(vma)) { @@ -626,13 +464,7 @@ int intel_guc_log_create(struct intel_guc *guc) goto err; } - guc->log.vma = vma; - - if (i915_modparams.guc_log_level) { - ret = guc_log_runtime_create(guc); - if (ret < 0) - goto err_vma; - } + log->vma = vma; /* each allocated unit is a page */ flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL | @@ -640,117 +472,159 @@ int intel_guc_log_create(struct intel_guc *guc) (GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) | (GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT); - offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */ - guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; + offset = intel_guc_ggtt_offset(guc, vma) >> PAGE_SHIFT; + log->flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags; return 0; -err_vma: - i915_vma_unpin_and_release(&guc->log.vma); err: /* logging will be off */ i915_modparams.guc_log_level = 0; return ret; } -void intel_guc_log_destroy(struct intel_guc *guc) +void intel_guc_log_destroy(struct intel_guc_log *log) +{ + i915_vma_unpin_and_release(&log->vma); +} + +int intel_guc_log_level_get(struct intel_guc_log *log) { - guc_log_runtime_destroy(guc); - i915_vma_unpin_and_release(&guc->log.vma); + GEM_BUG_ON(!log->vma); + GEM_BUG_ON(i915_modparams.guc_log_level < 0); + + return i915_modparams.guc_log_level; } -int intel_guc_log_control(struct intel_guc *guc, u64 control_val) +int intel_guc_log_level_set(struct intel_guc_log *log, u64 val) { + struct intel_guc *guc = log_to_guc(log); struct drm_i915_private *dev_priv = guc_to_i915(guc); - bool enable_logging = control_val > 0; - u32 verbosity; int ret; - if (!guc->log.vma) - return -ENODEV; + BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN != 0); + GEM_BUG_ON(!log->vma); + GEM_BUG_ON(i915_modparams.guc_log_level < 0); - BUILD_BUG_ON(GUC_LOG_VERBOSITY_MIN); - if (control_val > 1 + GUC_LOG_VERBOSITY_MAX) + /* + * GuC is recognizing log levels starting from 0 to max, we're using 0 + * as indication that logging should be disabled. + */ + if (val < GUC_LOG_LEVEL_DISABLED || val > GUC_LOG_LEVEL_MAX) return -EINVAL; - /* This combination doesn't make sense & won't have any effect */ - if (!enable_logging && !i915_modparams.guc_log_level) - return 0; + mutex_lock(&dev_priv->drm.struct_mutex); - verbosity = enable_logging ? control_val - 1 : 0; + if (i915_modparams.guc_log_level == val) { + ret = 0; + goto out_unlock; + } - ret = mutex_lock_interruptible(&dev_priv->drm.struct_mutex); - if (ret) - return ret; intel_runtime_pm_get(dev_priv); - ret = guc_log_control(guc, enable_logging, verbosity); + ret = guc_action_control_log(guc, GUC_LOG_LEVEL_IS_VERBOSE(val), + GUC_LOG_LEVEL_IS_ENABLED(val), + GUC_LOG_LEVEL_TO_VERBOSITY(val)); intel_runtime_pm_put(dev_priv); + if (ret) { + DRM_DEBUG_DRIVER("guc_log_control action failed %d\n", ret); + goto out_unlock; + } + + i915_modparams.guc_log_level = val; + +out_unlock: mutex_unlock(&dev_priv->drm.struct_mutex); - if (ret < 0) { - DRM_DEBUG_DRIVER("guc_logging_control action failed %d\n", ret); - return ret; - } + return ret; +} - if (enable_logging) { - i915_modparams.guc_log_level = 1 + verbosity; +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log) +{ + return log->relay.buf_addr; +} - /* - * If log was disabled at boot time, then the relay channel file - * wouldn't have been created by now and interrupts also would - * not have been enabled. Try again now, just in case. - */ - ret = guc_log_late_setup(guc); - if (ret < 0) { - DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret); - return ret; - } +int intel_guc_log_relay_open(struct intel_guc_log *log) +{ + int ret; - /* GuC logging is currently the only user of Guc2Host interrupts */ - mutex_lock(&dev_priv->drm.struct_mutex); - intel_runtime_pm_get(dev_priv); - gen9_enable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); - mutex_unlock(&dev_priv->drm.struct_mutex); - } else { - /* - * Once logging is disabled, GuC won't generate logs & send an - * interrupt. But there could be some data in the log buffer - * which is yet to be captured. So request GuC to update the log - * buffer state and then collect the left over logs. - */ - guc_flush_logs(guc); + mutex_lock(&log->relay.lock); - /* As logging is disabled, update log level to reflect that */ - i915_modparams.guc_log_level = 0; + if (intel_guc_log_relay_enabled(log)) { + ret = -EEXIST; + goto out_unlock; } - return ret; -} + /* + * We require SSE 4.1 for fast reads from the GuC log buffer and + * it should be present on the chipsets supporting GuC based + * submisssions. + */ + if (!i915_has_memcpy_from_wc()) { + ret = -ENXIO; + goto out_unlock; + } -void i915_guc_log_register(struct drm_i915_private *dev_priv) -{ - if (!USES_GUC_SUBMISSION(dev_priv) || !i915_modparams.guc_log_level) - return; + ret = guc_log_relay_create(log); + if (ret) + goto out_unlock; + + ret = guc_log_map(log); + if (ret) + goto out_relay; - guc_log_late_setup(&dev_priv->guc); + mutex_unlock(&log->relay.lock); + + guc_log_enable_flush_events(log); + + /* + * When GuC is logging without us relaying to userspace, we're ignoring + * the flush notification. This means that we need to unconditionally + * flush on relay enabling, since GuC only notifies us once. + */ + queue_work(log->relay.flush_wq, &log->relay.flush_work); + + return 0; + +out_relay: + guc_log_relay_destroy(log); +out_unlock: + mutex_unlock(&log->relay.lock); + + return ret; } -void i915_guc_log_unregister(struct drm_i915_private *dev_priv) +void intel_guc_log_relay_flush(struct intel_guc_log *log) { - struct intel_guc *guc = &dev_priv->guc; + struct intel_guc *guc = log_to_guc(log); + struct drm_i915_private *i915 = guc_to_i915(guc); + + /* + * Before initiating the forceful flush, wait for any pending/ongoing + * flush to complete otherwise forceful flush may not actually happen. + */ + flush_work(&log->relay.flush_work); - if (!USES_GUC_SUBMISSION(dev_priv)) - return; + intel_runtime_pm_get(i915); + guc_action_flush_log(guc); + intel_runtime_pm_put(i915); - mutex_lock(&dev_priv->drm.struct_mutex); - /* GuC logging is currently the only user of Guc2Host interrupts */ - intel_runtime_pm_get(dev_priv); - gen9_disable_guc_interrupts(dev_priv); - intel_runtime_pm_put(dev_priv); + /* GuC would have updated log buffer by now, so capture it */ + guc_log_capture_logs(log); +} - guc_log_runtime_destroy(guc); - mutex_unlock(&dev_priv->drm.struct_mutex); +void intel_guc_log_relay_close(struct intel_guc_log *log) +{ + guc_log_disable_flush_events(log); + flush_work(&log->relay.flush_work); + + mutex_lock(&log->relay.lock); + GEM_BUG_ON(!intel_guc_log_relay_enabled(log)); + guc_log_unmap(log); + guc_log_relay_destroy(log); + mutex_unlock(&log->relay.lock); +} - intel_guc_log_relay_destroy(guc); +void intel_guc_log_handle_flush_event(struct intel_guc_log *log) +{ + queue_work(log->relay.flush_wq, &log->relay.flush_work); } diff --git a/drivers/gpu/drm/i915/intel_guc_log.h b/drivers/gpu/drm/i915/intel_guc_log.h index dab0e94..fa80535 100644 --- a/drivers/gpu/drm/i915/intel_guc_log.h +++ b/drivers/gpu/drm/i915/intel_guc_log.h @@ -25,11 +25,12 @@ #ifndef _INTEL_GUC_LOG_H_ #define _INTEL_GUC_LOG_H_ +#include <linux/mutex.h> +#include <linux/relay.h> #include <linux/workqueue.h> #include "intel_guc_fwif.h" -struct drm_i915_private; struct intel_guc; /* @@ -39,33 +40,53 @@ struct intel_guc; #define GUC_LOG_SIZE ((1 + GUC_LOG_DPC_PAGES + 1 + GUC_LOG_ISR_PAGES + \ 1 + GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT) +/* + * While we're using plain log level in i915, GuC controls are much more... + * "elaborate"? We have a couple of bits for verbosity, separate bit for actual + * log enabling, and separate bit for default logging - which "conveniently" + * ignores the enable bit. + */ +#define GUC_LOG_LEVEL_DISABLED 0 +#define GUC_LOG_LEVEL_NON_VERBOSE 1 +#define GUC_LOG_LEVEL_IS_ENABLED(x) ((x) > GUC_LOG_LEVEL_DISABLED) +#define GUC_LOG_LEVEL_IS_VERBOSE(x) ((x) > GUC_LOG_LEVEL_NON_VERBOSE) +#define GUC_LOG_LEVEL_TO_VERBOSITY(x) ({ \ + typeof(x) _x = (x); \ + GUC_LOG_LEVEL_IS_VERBOSE(_x) ? _x - 2 : 0; \ +}) +#define GUC_VERBOSITY_TO_LOG_LEVEL(x) ((x) + 2) +#define GUC_LOG_LEVEL_MAX GUC_VERBOSITY_TO_LOG_LEVEL(GUC_LOG_VERBOSITY_MAX) + struct intel_guc_log { u32 flags; struct i915_vma *vma; - /* The runtime stuff gets created only when GuC logging gets enabled */ struct { void *buf_addr; struct workqueue_struct *flush_wq; struct work_struct flush_work; - struct rchan *relay_chan; - /* To serialize the access to relay_chan */ - struct mutex relay_lock; - } runtime; + struct rchan *channel; + struct mutex lock; + u32 full_count; + } relay; /* logging related stats */ - u32 capture_miss_count; - u32 flush_interrupt_count; - u32 prev_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 total_overflow_count[GUC_MAX_LOG_BUFFER]; - u32 flush_count[GUC_MAX_LOG_BUFFER]; + struct { + u32 sampled_overflow; + u32 overflow; + u32 flush; + } stats[GUC_MAX_LOG_BUFFER]; }; -int intel_guc_log_create(struct intel_guc *guc); -void intel_guc_log_destroy(struct intel_guc *guc); -void intel_guc_log_init_early(struct intel_guc *guc); -int intel_guc_log_relay_create(struct intel_guc *guc); -void intel_guc_log_relay_destroy(struct intel_guc *guc); -int intel_guc_log_control(struct intel_guc *guc, u64 control_val); -void i915_guc_log_register(struct drm_i915_private *dev_priv); -void i915_guc_log_unregister(struct drm_i915_private *dev_priv); +void intel_guc_log_init_early(struct intel_guc_log *log); +int intel_guc_log_create(struct intel_guc_log *log); +void intel_guc_log_destroy(struct intel_guc_log *log); + +int intel_guc_log_level_get(struct intel_guc_log *log); +int intel_guc_log_level_set(struct intel_guc_log *log, u64 control_val); +bool intel_guc_log_relay_enabled(const struct intel_guc_log *log); +int intel_guc_log_relay_open(struct intel_guc_log *log); +void intel_guc_log_relay_flush(struct intel_guc_log *log); +void intel_guc_log_relay_close(struct intel_guc_log *log); + +void intel_guc_log_handle_flush_event(struct intel_guc_log *log); #endif diff --git a/drivers/gpu/drm/i915/intel_guc_reg.h b/drivers/gpu/drm/i915/intel_guc_reg.h index 19a9247..d860847 100644 --- a/drivers/gpu/drm/i915/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/intel_guc_reg.h @@ -66,22 +66,20 @@ #define UOS_MOVE (1<<4) #define START_DMA (1<<0) #define DMA_GUC_WOPCM_OFFSET _MMIO(0xc340) +#define GUC_WOPCM_OFFSET_VALID (1<<0) #define HUC_LOADING_AGENT_VCR (0<<1) #define HUC_LOADING_AGENT_GUC (1<<1) -#define GUC_WOPCM_OFFSET_VALUE 0x80000 /* 512KB */ +#define GUC_WOPCM_OFFSET_SHIFT 14 +#define GUC_WOPCM_OFFSET_MASK (0x3ffff << GUC_WOPCM_OFFSET_SHIFT) #define GUC_MAX_IDLE_COUNT _MMIO(0xC3E4) #define HUC_STATUS2 _MMIO(0xD3B0) #define HUC_FW_VERIFIED (1<<7) -/* Defines WOPCM space available to GuC firmware */ #define GUC_WOPCM_SIZE _MMIO(0xc050) -/* GuC addresses below GUC_WOPCM_TOP don't map through the GTT */ -#define GUC_WOPCM_TOP (0x80 << 12) /* 512KB */ -#define BXT_GUC_WOPCM_RC6_RESERVED (0x10 << 12) /* 64KB */ - -/* GuC addresses above GUC_GGTT_TOP also don't map through the GTT */ -#define GUC_GGTT_TOP 0xFEE00000 +#define GUC_WOPCM_SIZE_LOCKED (1<<0) +#define GUC_WOPCM_SIZE_SHIFT 12 +#define GUC_WOPCM_SIZE_MASK (0xfffff << GUC_WOPCM_SIZE_SHIFT) #define GEN8_GT_PM_CONFIG _MMIO(0x138140) #define GEN9LP_GT_PM_CONFIG _MMIO(0x138140) diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c index 8a8ad2f..2feb650 100644 --- a/drivers/gpu/drm/i915/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/intel_guc_submission.c @@ -124,9 +124,17 @@ static int reserve_doorbell(struct intel_guc_client *client) return 0; } +static bool has_doorbell(struct intel_guc_client *client) +{ + if (client->doorbell_id == GUC_DOORBELL_INVALID) + return false; + + return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); +} + static void unreserve_doorbell(struct intel_guc_client *client) { - GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID); + GEM_BUG_ON(!has_doorbell(client)); __clear_bit(client->doorbell_id, client->guc->doorbell_bitmap); client->doorbell_id = GUC_DOORBELL_INVALID; @@ -184,14 +192,6 @@ static struct guc_doorbell_info *__get_doorbell(struct intel_guc_client *client) return client->vaddr + client->doorbell_offset; } -static bool has_doorbell(struct intel_guc_client *client) -{ - if (client->doorbell_id == GUC_DOORBELL_INVALID) - return false; - - return test_bit(client->doorbell_id, client->guc->doorbell_bitmap); -} - static void __create_doorbell(struct intel_guc_client *client) { struct guc_doorbell_info *doorbell; @@ -207,7 +207,6 @@ static void __destroy_doorbell(struct intel_guc_client *client) struct guc_doorbell_info *doorbell; u16 db_id = client->doorbell_id; - doorbell = __get_doorbell(client); doorbell->db_status = GUC_DOORBELL_DISABLED; doorbell->cookie = 0; @@ -224,6 +223,9 @@ static int create_doorbell(struct intel_guc_client *client) { int ret; + if (WARN_ON(!has_doorbell(client))) + return -ENODEV; /* internal setup error, should never happen */ + __update_doorbell_desc(client, client->doorbell_id); __create_doorbell(client); @@ -231,8 +233,8 @@ static int create_doorbell(struct intel_guc_client *client) if (ret) { __destroy_doorbell(client); __update_doorbell_desc(client, GUC_DOORBELL_INVALID); - DRM_ERROR("Couldn't create client %u doorbell: %d\n", - client->stage_id, ret); + DRM_DEBUG_DRIVER("Couldn't create client %u doorbell: %d\n", + client->stage_id, ret); return ret; } @@ -362,7 +364,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, desc->db_id = client->doorbell_id; for_each_engine_masked(engine, dev_priv, client->engines, tmp) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u32 guc_engine_id = engine->guc_id; struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id]; @@ -386,8 +388,8 @@ static void guc_stage_desc_init(struct intel_guc *guc, lrc->context_desc = lower_32_bits(ce->lrc_desc); /* The state page is after PPHWSP */ - lrc->ring_lrca = - guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; + lrc->ring_lrca = intel_guc_ggtt_offset(guc, ce->state) + + LRC_STATE_PN * PAGE_SIZE; /* XXX: In direct submission, the GuC wants the HW context id * here. In proxy submission, it wants the stage id @@ -395,7 +397,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) | (guc_engine_id << GUC_ELC_ENGINE_OFFSET); - lrc->ring_begin = guc_ggtt_offset(ce->ring->vma); + lrc->ring_begin = intel_guc_ggtt_offset(guc, ce->ring->vma); lrc->ring_end = lrc->ring_begin + ce->ring->size - 1; lrc->ring_next_free_location = lrc->ring_begin; lrc->ring_current_tail_pointer_value = 0; @@ -411,7 +413,7 @@ static void guc_stage_desc_init(struct intel_guc *guc, * The doorbell, process descriptor, and workqueue are all parts * of the client object, which the GuC will reference via the GGTT */ - gfx_addr = guc_ggtt_offset(client->vma); + gfx_addr = intel_guc_ggtt_offset(guc, client->vma); desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) + client->doorbell_offset; desc->db_trigger_cpu = ptr_to_u64(__get_doorbell(client)); @@ -584,7 +586,7 @@ static void inject_preempt_context(struct work_struct *work) data[3] = engine->guc_id; data[4] = guc->execbuf_client->priority; data[5] = guc->execbuf_client->stage_id; - data[6] = guc_ggtt_offset(guc->shared_data); + data[6] = intel_guc_ggtt_offset(guc, guc->shared_data); if (WARN_ON(intel_guc_send(guc, data, ARRAY_SIZE(data)))) { execlists_clear_active(&engine->execlists, @@ -657,7 +659,17 @@ static void port_assign(struct execlist_port *port, struct i915_request *rq) port_set(port, i915_request_get(rq)); } -static void guc_dequeue(struct intel_engine_cs *engine) +static inline int rq_prio(const struct i915_request *rq) +{ + return rq->sched.attr.priority; +} + +static inline int port_prio(const struct execlist_port *port) +{ + return rq_prio(port_request(port)); +} + +static bool __guc_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -667,28 +679,29 @@ static void guc_dequeue(struct intel_engine_cs *engine) bool submit = false; struct rb_node *rb; - spin_lock_irq(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); + rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); if (port_isset(port)) { - if (engine->i915->preempt_context) { + if (intel_engine_has_preemption(engine)) { struct guc_preempt_work *preempt_work = &engine->i915->guc.preempt_work[engine->id]; + int prio = execlists->queue_priority; - if (execlists->queue_priority > - max(port_request(port)->priotree.priority, 0)) { + if (__execlists_need_preempt(prio, port_prio(port))) { execlists_set_active(execlists, EXECLISTS_ACTIVE_PREEMPT); queue_work(engine->i915->guc.preempt_wq, &preempt_work->work); - goto unlock; + return false; } } port++; if (port_isset(port)) - goto unlock; + return false; } GEM_BUG_ON(port_isset(port)); @@ -696,11 +709,11 @@ static void guc_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { if (last && rq->ctx != last->ctx) { if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -709,7 +722,7 @@ static void guc_dequeue(struct intel_engine_cs *engine) port++; } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); @@ -726,19 +739,34 @@ static void guc_dequeue(struct intel_engine_cs *engine) done: execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; execlists->first = rb; - if (submit) { + if (submit) port_assign(port, last); - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); - guc_submit(engine); - } + if (last) + execlists_user_begin(execlists, execlists->port); /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + return submit; +} + +static void guc_dequeue(struct intel_engine_cs *engine) +{ + unsigned long flags; + bool submit; + + local_irq_save(flags); + + spin_lock(&engine->timeline.lock); + submit = __guc_dequeue(engine); + spin_unlock(&engine->timeline.lock); + + if (submit) + guc_submit(engine); + + local_irq_restore(flags); } static void guc_submission_tasklet(unsigned long data) @@ -748,17 +776,20 @@ static void guc_submission_tasklet(unsigned long data) struct execlist_port *port = execlists->port; struct i915_request *rq; - rq = port_request(&port[0]); + rq = port_request(port); while (rq && i915_request_completed(rq)) { trace_i915_request_out(rq); i915_request_put(rq); - execlists_port_complete(execlists, port); - - rq = port_request(&port[0]); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) { + execlists_user_begin(execlists, port); + rq = port_request(port); + } else { + execlists_user_end(execlists); + rq = NULL; + } } - if (!rq) - execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) && intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) == @@ -977,7 +1008,8 @@ static void guc_fill_preempt_context(struct intel_guc *guc) enum intel_engine_id id; for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &client->owner->engine[id]; + struct intel_context *ce = + to_intel_context(client->owner, engine); u32 addr = intel_hws_preempt_done_address(engine); u32 *cs; diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c index c8ea510..d47e346 100644 --- a/drivers/gpu/drm/i915/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/intel_hangcheck.c @@ -246,9 +246,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) */ tmp = I915_READ_CTL(engine); if (tmp & RING_WAIT) { - i915_handle_error(dev_priv, BIT(engine->id), - "Kicking stuck wait on %s", - engine->name); + i915_handle_error(dev_priv, BIT(engine->id), 0, + "stuck wait on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; } @@ -258,8 +257,8 @@ engine_stuck(struct intel_engine_cs *engine, u64 acthd) default: return ENGINE_DEAD; case 1: - i915_handle_error(dev_priv, ALL_ENGINES, - "Kicking stuck semaphore on %s", + i915_handle_error(dev_priv, ALL_ENGINES, 0, + "stuck semaphore on %s", engine->name); I915_WRITE_CTL(engine, tmp); return ENGINE_WAIT_KICK; @@ -357,7 +356,7 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine, break; case ENGINE_DEAD: - if (drm_debug & DRM_UT_DRIVER) { + if (GEM_SHOW_DEBUG()) { struct drm_printer p = drm_debug_printer("hangcheck"); intel_engine_dump(engine, &p, "%s\n", engine->name); } @@ -386,13 +385,13 @@ static void hangcheck_declare_hang(struct drm_i915_private *i915, if (stuck != hung) hung &= ~stuck; len = scnprintf(msg, sizeof(msg), - "%s on ", stuck == hung ? "No progress" : "Hang"); + "%s on ", stuck == hung ? "no progress" : "hang"); for_each_engine_masked(engine, i915, hung, tmp) len += scnprintf(msg + len, sizeof(msg) - len, "%s, ", engine->name); msg[len-2] = '\0'; - return i915_handle_error(i915, hung, "%s", msg); + return i915_handle_error(i915, hung, I915_ERROR_CAPTURE, "%s", msg); } /* @@ -453,6 +452,7 @@ static void i915_hangcheck_elapsed(struct work_struct *work) void intel_engine_init_hangcheck(struct intel_engine_cs *engine) { memset(&engine->hangcheck, 0, sizeof(engine->hangcheck)); + engine->hangcheck.action_timestamp = jiffies; } void intel_hangcheck_init(struct drm_i915_private *i915) diff --git a/drivers/gpu/drm/i915/intel_hdcp.c b/drivers/gpu/drm/i915/intel_hdcp.c index 14ca5d3..2db5da5 100644 --- a/drivers/gpu/drm/i915/intel_hdcp.c +++ b/drivers/gpu/drm/i915/intel_hdcp.c @@ -37,6 +37,43 @@ static int intel_hdcp_poll_ksv_fifo(struct intel_digital_port *intel_dig_port, return 0; } +static bool hdcp_key_loadable(struct drm_i915_private *dev_priv) +{ + struct i915_power_domains *power_domains = &dev_priv->power_domains; + struct i915_power_well *power_well; + enum i915_power_well_id id; + bool enabled = false; + + /* + * On HSW and BDW, Display HW loads the Key as soon as Display resumes. + * On all BXT+, SW can load the keys only when the PW#1 is turned on. + */ + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + id = HSW_DISP_PW_GLOBAL; + else + id = SKL_DISP_PW_1; + + mutex_lock(&power_domains->lock); + + /* PG1 (power well #1) needs to be enabled */ + for_each_power_well(dev_priv, power_well) { + if (power_well->id == id) { + enabled = power_well->ops->is_enabled(dev_priv, + power_well); + break; + } + } + mutex_unlock(&power_domains->lock); + + /* + * Another req for hdcp key loadability is enabled state of pll for + * cdclk. Without active crtc we wont land here. So we are assuming that + * cdclk is already on. + */ + + return enabled; +} + static void intel_hdcp_clear_keys(struct drm_i915_private *dev_priv) { I915_WRITE(HDCP_KEY_CONF, HDCP_CLEAR_KEYS_TRIGGER); @@ -142,53 +179,17 @@ bool intel_hdcp_is_ksv_valid(u8 *ksv) return true; } -/* Implements Part 2 of the HDCP authorization procedure */ static -int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, - const struct intel_hdcp_shim *shim) +int intel_hdcp_validate_v_prime(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim, + u8 *ksv_fifo, u8 num_downstream, u8 *bstatus) { struct drm_i915_private *dev_priv; u32 vprime, sha_text, sha_leftovers, rep_ctl; - u8 bstatus[2], num_downstream, *ksv_fifo; int ret, i, j, sha_idx; dev_priv = intel_dig_port->base.base.dev->dev_private; - ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); - if (ret) { - DRM_ERROR("KSV list failed to become ready (%d)\n", ret); - return ret; - } - - ret = shim->read_bstatus(intel_dig_port, bstatus); - if (ret) - return ret; - - if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) || - DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) { - DRM_ERROR("Max Topology Limit Exceeded\n"); - return -EPERM; - } - - /* - * When repeater reports 0 device count, HDCP1.4 spec allows disabling - * the HDCP encryption. That implies that repeater can't have its own - * display. As there is no consumption of encrypted content in the - * repeater with 0 downstream devices, we are failing the - * authentication. - */ - num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); - if (num_downstream == 0) - return -EINVAL; - - ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); - if (!ksv_fifo) - return -ENOMEM; - - ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); - if (ret) - return ret; - /* Process V' values from the receiver */ for (i = 0; i < DRM_HDCP_V_PRIME_NUM_PARTS; i++) { ret = shim->read_v_prime_part(intel_dig_port, i, &vprime); @@ -353,7 +354,8 @@ int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, return ret; sha_idx += sizeof(sha_text); } else { - DRM_ERROR("Invalid number of leftovers %d\n", sha_leftovers); + DRM_DEBUG_KMS("Invalid number of leftovers %d\n", + sha_leftovers); return -EINVAL; } @@ -381,17 +383,83 @@ int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, if (intel_wait_for_register(dev_priv, HDCP_REP_CTL, HDCP_SHA1_COMPLETE, HDCP_SHA1_COMPLETE, 1)) { - DRM_ERROR("Timed out waiting for SHA1 complete\n"); + DRM_DEBUG_KMS("Timed out waiting for SHA1 complete\n"); return -ETIMEDOUT; } if (!(I915_READ(HDCP_REP_CTL) & HDCP_SHA1_V_MATCH)) { - DRM_ERROR("SHA-1 mismatch, HDCP failed\n"); + DRM_DEBUG_KMS("SHA-1 mismatch, HDCP failed\n"); return -ENXIO; } + return 0; +} + +/* Implements Part 2 of the HDCP authorization procedure */ +static +int intel_hdcp_auth_downstream(struct intel_digital_port *intel_dig_port, + const struct intel_hdcp_shim *shim) +{ + u8 bstatus[2], num_downstream, *ksv_fifo; + int ret, i, tries = 3; + + ret = intel_hdcp_poll_ksv_fifo(intel_dig_port, shim); + if (ret) { + DRM_ERROR("KSV list failed to become ready (%d)\n", ret); + return ret; + } + + ret = shim->read_bstatus(intel_dig_port, bstatus); + if (ret) + return ret; + + if (DRM_HDCP_MAX_DEVICE_EXCEEDED(bstatus[0]) || + DRM_HDCP_MAX_CASCADE_EXCEEDED(bstatus[1])) { + DRM_ERROR("Max Topology Limit Exceeded\n"); + return -EPERM; + } + + /* + * When repeater reports 0 device count, HDCP1.4 spec allows disabling + * the HDCP encryption. That implies that repeater can't have its own + * display. As there is no consumption of encrypted content in the + * repeater with 0 downstream devices, we are failing the + * authentication. + */ + num_downstream = DRM_HDCP_NUM_DOWNSTREAM(bstatus[0]); + if (num_downstream == 0) + return -EINVAL; + + ksv_fifo = kzalloc(num_downstream * DRM_HDCP_KSV_LEN, GFP_KERNEL); + if (!ksv_fifo) + return -ENOMEM; + + ret = shim->read_ksv_fifo(intel_dig_port, num_downstream, ksv_fifo); + if (ret) + goto err; + + /* + * When V prime mismatches, DP Spec mandates re-read of + * V prime atleast twice. + */ + for (i = 0; i < tries; i++) { + ret = intel_hdcp_validate_v_prime(intel_dig_port, shim, + ksv_fifo, num_downstream, + bstatus); + if (!ret) + break; + } + + if (i == tries) { + DRM_ERROR("V Prime validation failed.(%d)\n", ret); + goto err; + } + DRM_DEBUG_KMS("HDCP is enabled (%d downstream devices)\n", num_downstream); - return 0; + ret = 0; +err: + kfree(ksv_fifo); + return ret; } /* Implements Part 1 of the HDCP authorization procedure */ @@ -506,15 +574,26 @@ static int intel_hdcp_auth(struct intel_digital_port *intel_dig_port, */ wait_remaining_ms_from_jiffies(r0_prime_gen_start, 300); - ri.reg = 0; - ret = shim->read_ri_prime(intel_dig_port, ri.shim); - if (ret) - return ret; - I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + tries = 3; - /* Wait for Ri prime match */ - if (wait_for(I915_READ(PORT_HDCP_STATUS(port)) & - (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) { + /* + * DP HDCP Spec mandates the two more reattempt to read R0, incase + * of R0 mismatch. + */ + for (i = 0; i < tries; i++) { + ri.reg = 0; + ret = shim->read_ri_prime(intel_dig_port, ri.shim); + if (ret) + return ret; + I915_WRITE(PORT_HDCP_RPRIME(port), ri.reg); + + /* Wait for Ri prime match */ + if (!wait_for(I915_READ(PORT_HDCP_STATUS(port)) & + (HDCP_STATUS_RI_MATCH | HDCP_STATUS_ENC), 1)) + break; + } + + if (i == tries) { DRM_ERROR("Timed out waiting for Ri prime match (%x)\n", I915_READ(PORT_HDCP_STATUS(port))); return -ETIMEDOUT; @@ -580,8 +659,8 @@ static int _intel_hdcp_enable(struct intel_connector *connector) DRM_DEBUG_KMS("[%s:%d] HDCP is being enabled...\n", connector->base.name, connector->base.base.id); - if (!(I915_READ(SKL_FUSE_STATUS) & SKL_FUSE_PG_DIST_STATUS(1))) { - DRM_ERROR("PG1 is disabled, cannot load keys\n"); + if (!hdcp_key_loadable(dev_priv)) { + DRM_ERROR("HDCP key Load is not possible\n"); return -ENXIO; } diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 1baef4a..ee929f3 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -2082,41 +2082,33 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c * it enables scrambling. This should be called before enabling the HDMI * 2.0 port, as the sink can choose to disable the scrambling if it doesn't * detect a scrambled clock within 100 ms. + * + * Returns: + * True on success, false on failure. */ -void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, +bool intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder, struct drm_connector *connector, bool high_tmds_clock_ratio, bool scrambling) { + struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base); - struct drm_i915_private *dev_priv = connector->dev->dev_private; struct drm_scrambling *sink_scrambling = - &connector->display_info.hdmi.scdc.scrambling; - struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv, - intel_hdmi->ddc_bus); - bool ret; + &connector->display_info.hdmi.scdc.scrambling; + struct i2c_adapter *adapter = + intel_gmbus_get_adapter(dev_priv, intel_hdmi->ddc_bus); if (!sink_scrambling->supported) - return; - - DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n", - encoder->base.name, connector->name); + return true; - /* Set TMDS bit clock ratio to 1/40 or 1/10 */ - ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio); - if (!ret) { - DRM_ERROR("Set TMDS ratio failed\n"); - return; - } - - /* Enable/disable sink scrambling */ - ret = drm_scdc_set_scrambling(adptr, scrambling); - if (!ret) { - DRM_ERROR("Set sink scrambling failed\n"); - return; - } + DRM_DEBUG_KMS("[CONNECTOR:%d:%s] scrambling=%s, TMDS bit clock ratio=1/%d\n", + connector->base.id, connector->name, + yesno(scrambling), high_tmds_clock_ratio ? 40 : 10); - DRM_DEBUG_KMS("sink scrambling handled\n"); + /* Set TMDS bit clock ratio to 1/40 or 1/10, and enable/disable scrambling */ + return drm_scdc_set_high_tmds_clock_ratio(adapter, + high_tmds_clock_ratio) && + drm_scdc_set_scrambling(adapter, scrambling); } static u8 chv_port_to_ddc_pin(struct drm_i915_private *dev_priv, enum port port) diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c index 0e3d3e8..43aa92b 100644 --- a/drivers/gpu/drm/i915/intel_hotplug.c +++ b/drivers/gpu/drm/i915/intel_hotplug.c @@ -100,6 +100,8 @@ enum port intel_hpd_pin_to_port(struct drm_i915_private *dev_priv, if (IS_CNL_WITH_PORT_F(dev_priv)) return PORT_F; return PORT_E; + case HPD_PORT_F: + return PORT_F; default: return PORT_NONE; /* no port for this pin */ } @@ -132,6 +134,7 @@ enum hpd_pin intel_hpd_pin_default(struct drm_i915_private *dev_priv, case PORT_F: if (IS_CNL_WITH_PORT_F(dev_priv)) return HPD_PORT_E; + return HPD_PORT_F; default: MISSING_CASE(port); return HPD_NONE; diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c index 65e2afb..2912852 100644 --- a/drivers/gpu/drm/i915/intel_huc.c +++ b/drivers/gpu/drm/i915/intel_huc.c @@ -55,7 +55,7 @@ int intel_huc_auth(struct intel_huc *huc) return -ENOEXEC; vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_OFFSET_BIAS | guc->ggtt_pin_bias); if (IS_ERR(vma)) { ret = PTR_ERR(vma); DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret); @@ -63,7 +63,8 @@ int intel_huc_auth(struct intel_huc *huc) } ret = intel_guc_auth_huc(guc, - guc_ggtt_offset(vma) + huc->fw.rsa_offset); + intel_guc_ggtt_offset(guc, vma) + + huc->fw.rsa_offset); if (ret) { DRM_ERROR("HuC: GuC did not ack Auth request %d\n", ret); goto fail_unpin; @@ -91,3 +92,28 @@ fail: DRM_ERROR("HuC: Authentication failed %d\n", ret); return ret; } + +/** + * intel_huc_check_status() - check HuC status + * @huc: intel_huc structure + * + * This function reads status register to verify if HuC + * firmware was successfully loaded. + * + * Returns positive value if HuC firmware is loaded and verified + * and -ENODEV if HuC is not present. + */ +int intel_huc_check_status(struct intel_huc *huc) +{ + struct drm_i915_private *dev_priv = huc_to_i915(huc); + u32 status; + + if (!HAS_HUC(dev_priv)) + return -ENODEV; + + intel_runtime_pm_get(dev_priv); + status = I915_READ(HUC_STATUS2) & HUC_FW_VERIFIED; + intel_runtime_pm_put(dev_priv); + + return status; +} diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h index 5d6e804..aa85490 100644 --- a/drivers/gpu/drm/i915/intel_huc.h +++ b/drivers/gpu/drm/i915/intel_huc.h @@ -37,5 +37,12 @@ struct intel_huc { void intel_huc_init_early(struct intel_huc *huc); int intel_huc_auth(struct intel_huc *huc); +int intel_huc_check_status(struct intel_huc *huc); + +static inline int intel_huc_sanitize(struct intel_huc *huc) +{ + intel_uc_fw_sanitize(&huc->fw); + return 0; +} #endif diff --git a/drivers/gpu/drm/i915/intel_huc_fw.c b/drivers/gpu/drm/i915/intel_huc_fw.c index c66afa9..f93d238 100644 --- a/drivers/gpu/drm/i915/intel_huc_fw.c +++ b/drivers/gpu/drm/i915/intel_huc_fw.c @@ -118,7 +118,8 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); /* Set the source address for the uCode */ - offset = guc_ggtt_offset(vma) + huc_fw->header_offset; + offset = intel_guc_ggtt_offset(&dev_priv->guc, vma) + + huc_fw->header_offset; I915_WRITE(DMA_ADDR_0_LOW, lower_32_bits(offset)); I915_WRITE(DMA_ADDR_0_HIGH, upper_32_bits(offset) & 0xFFFF); @@ -154,9 +155,8 @@ static int huc_fw_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma) * Called from intel_uc_init_hw() during driver load, resume from sleep and * after a GPU reset. Note that HuC must be loaded before GuC. * - * The firmware image should have already been fetched into memory by the - * earlier call to intel_uc_init_fw(), so here we need to only check that - * fetch succeeded, and then transfer the image to the h/w. + * The firmware image should have already been fetched into memory, so only + * check that fetch succeeded, and then transfer the image to the h/w. * * Return: non-zero code on error */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 697af5a..15434ca 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -139,6 +139,7 @@ #include "i915_gem_render_state.h" #include "intel_lrc_reg.h" #include "intel_mocs.h" +#include "intel_workarounds.h" #define RING_EXECLIST_QFULL (1 << 0x2) #define RING_EXECLIST1_VALID (1 << 0x3) @@ -176,14 +177,16 @@ static inline struct i915_priolist *to_priolist(struct rb_node *rb) static inline int rq_prio(const struct i915_request *rq) { - return rq->priotree.priority; + return rq->sched.attr.priority; } static inline bool need_preempt(const struct intel_engine_cs *engine, const struct i915_request *last, int prio) { - return engine->i915->preempt_context && prio > max(rq_prio(last), 0); + return (intel_engine_has_preemption(engine) && + __execlists_need_preempt(prio, rq_prio(last)) && + !i915_request_completed(last)); } /** @@ -221,7 +224,7 @@ static void intel_lr_context_descriptor_update(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); u64 desc; BUILD_BUG_ON(MAX_CONTEXT_HW_ID > (BIT(GEN8_CTX_ID_WIDTH))); @@ -255,9 +258,7 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, } static struct i915_priolist * -lookup_priolist(struct intel_engine_cs *engine, - struct i915_priotree *pt, - int prio) +lookup_priolist(struct intel_engine_cs *engine, int prio) { struct intel_engine_execlists * const execlists = &engine->execlists; struct i915_priolist *p; @@ -328,10 +329,10 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) struct i915_priolist *uninitialized_var(p); int last_prio = I915_PRIORITY_INVALID; - lockdep_assert_held(&engine->timeline->lock); + lockdep_assert_held(&engine->timeline.lock); list_for_each_entry_safe_reverse(rq, rn, - &engine->timeline->requests, + &engine->timeline.requests, link) { if (i915_request_completed(rq)) return; @@ -342,10 +343,11 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != last_prio) { last_prio = rq_prio(rq); - p = lookup_priolist(engine, &rq->priotree, last_prio); + p = lookup_priolist(engine, last_prio); } - list_add(&rq->priotree.link, &p->requests); + GEM_BUG_ON(p->priority != rq_prio(rq)); + list_add(&rq->sched.link, &p->requests); } } @@ -354,10 +356,13 @@ execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) { struct intel_engine_cs *engine = container_of(execlists, typeof(*engine), execlists); + unsigned long flags; + + spin_lock_irqsave(&engine->timeline.lock, flags); - spin_lock_irq(&engine->timeline->lock); __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->timeline->lock); + + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static inline void @@ -374,6 +379,19 @@ execlists_context_status_change(struct i915_request *rq, unsigned long status) status, rq); } +inline void +execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port) +{ + execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER); +} + +inline void +execlists_user_end(struct intel_engine_execlists *execlists) +{ + execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER); +} + static inline void execlists_context_schedule_in(struct i915_request *rq) { @@ -382,10 +400,11 @@ execlists_context_schedule_in(struct i915_request *rq) } static inline void -execlists_context_schedule_out(struct i915_request *rq) +execlists_context_schedule_out(struct i915_request *rq, unsigned long status) { intel_engine_context_out(rq->engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + execlists_context_status_change(rq, status); + trace_i915_request_out(rq); } static void @@ -399,7 +418,7 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state) static u64 execlists_update_context(struct i915_request *rq) { - struct intel_context *ce = &rq->ctx->engine[rq->engine->id]; + struct intel_context *ce = to_intel_context(rq->ctx, rq->engine); struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt; u32 *reg_state = ce->lrc_reg_state; @@ -454,10 +473,12 @@ static void execlists_submit_ports(struct intel_engine_cs *engine) desc = execlists_update_context(rq); GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc)); - GEM_TRACE("%s in[%d]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s in[%d]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, n, port[n].context_id, count, rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(engine), rq_prio(rq)); } else { GEM_BUG_ON(!n); @@ -506,7 +527,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) { struct intel_engine_execlists *execlists = &engine->execlists; struct intel_context *ce = - &engine->i915->preempt_context->engine[engine->id]; + to_intel_context(engine->i915->preempt_context, engine); unsigned int n; GEM_BUG_ON(execlists->preempt_complete_status != @@ -535,7 +556,7 @@ static void inject_preempt_context(struct intel_engine_cs *engine) execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT); } -static void execlists_dequeue(struct intel_engine_cs *engine) +static bool __execlists_dequeue(struct intel_engine_cs *engine) { struct intel_engine_execlists * const execlists = &engine->execlists; struct execlist_port *port = execlists->port; @@ -545,6 +566,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct rb_node *rb; bool submit = false; + lockdep_assert_held(&engine->timeline.lock); + /* Hardware submission is through 2 ports. Conceptually each port * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is * static for a context, and unique to each, so we only execute @@ -566,7 +589,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * and context switches) submission. */ - spin_lock_irq(&engine->timeline->lock); rb = execlists->first; GEM_BUG_ON(rb_first(&execlists->queue) != rb); @@ -577,9 +599,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * know the next preemption status we see corresponds * to this ELSP update. */ + GEM_BUG_ON(!execlists_is_active(execlists, + EXECLISTS_ACTIVE_USER)); GEM_BUG_ON(!port_count(&port[0])); if (port_count(&port[0]) > 1) - goto unlock; + return false; /* * If we write to ELSP a second time before the HW has had @@ -589,11 +613,11 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * the HW to indicate that it has had a chance to respond. */ if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_HWACK)) - goto unlock; + return false; if (need_preempt(engine, last, execlists->queue_priority)) { inject_preempt_context(engine); - goto unlock; + return false; } /* @@ -618,7 +642,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) * priorities of the ports haven't been switch. */ if (port_count(&port[1])) - goto unlock; + return false; /* * WaIdleLiteRestore:bdw,skl @@ -635,7 +659,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) struct i915_priolist *p = to_priolist(rb); struct i915_request *rq, *rn; - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { /* * Can we combine this request with the current port? * It has to be the same context/ringbuffer and not @@ -655,7 +679,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) */ if (port == last_port) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -669,7 +693,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (ctx_single_port_submission(last->ctx) || ctx_single_port_submission(rq->ctx)) { __list_del_many(&p->requests, - &rq->priotree.link); + &rq->sched.link); goto done; } @@ -682,7 +706,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine) GEM_BUG_ON(port_isset(port)); } - INIT_LIST_HEAD(&rq->priotree.link); + INIT_LIST_HEAD(&rq->sched.link); __i915_request_submit(rq); trace_i915_request_in(rq, port_index(port, execlists)); last = rq; @@ -695,8 +719,27 @@ static void execlists_dequeue(struct intel_engine_cs *engine) if (p->priority != I915_PRIORITY_NORMAL) kmem_cache_free(engine->i915->priorities, p); } + done: - execlists->queue_priority = rb ? to_priolist(rb)->priority : INT_MIN; + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose queue_priority such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the queue_priority then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the queue_priority is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority = + port != execlists->port ? rq_prio(last) : INT_MIN; + execlists->first = rb; if (submit) port_assign(port, last); @@ -704,13 +747,25 @@ done: /* We must always keep the beast fed if we have work piled up */ GEM_BUG_ON(execlists->first && !port_isset(execlists->port)); -unlock: - spin_unlock_irq(&engine->timeline->lock); + /* Re-evaluate the executing context setup after each preemptive kick */ + if (last) + execlists_user_begin(execlists, execlists->port); + + return submit; +} - if (submit) { - execlists_set_active(execlists, EXECLISTS_ACTIVE_USER); +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + unsigned long flags; + bool submit; + + spin_lock_irqsave(&engine->timeline.lock, flags); + submit = __execlists_dequeue(engine); + spin_unlock_irqrestore(&engine->timeline.lock, flags); + + if (submit) execlists_submit_ports(engine); - } GEM_BUG_ON(port_isset(execlists->port) && !execlists_is_active(execlists, EXECLISTS_ACTIVE_USER)); @@ -725,19 +780,101 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists) while (num_ports-- && port_isset(port)) { struct i915_request *rq = port_request(port); - GEM_BUG_ON(!execlists->active); - intel_engine_context_out(rq->engine); + GEM_TRACE("%s:port%u global=%d (fence %llx:%d), (current %d)\n", + rq->engine->name, + (unsigned int)(port - execlists->port), + rq->global_seqno, + rq->fence.context, rq->fence.seqno, + intel_engine_get_seqno(rq->engine)); - execlists_context_status_change(rq, - i915_request_completed(rq) ? - INTEL_CONTEXT_SCHEDULE_OUT : - INTEL_CONTEXT_SCHEDULE_PREEMPTED); + GEM_BUG_ON(!execlists->active); + execlists_context_schedule_out(rq, + i915_request_completed(rq) ? + INTEL_CONTEXT_SCHEDULE_OUT : + INTEL_CONTEXT_SCHEDULE_PREEMPTED); i915_request_put(rq); memset(port, 0, sizeof(*port)); port++; } + + execlists_user_end(execlists); +} + +static void clear_gtiir(struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + int i; + + /* + * Clear any pending interrupt state. + * + * We do it twice out of paranoia that some of the IIR are + * double buffered, and so if we only reset it once there may + * still be an interrupt pending. + */ + if (INTEL_GEN(dev_priv) >= 11) { + static const struct { + u8 bank; + u8 bit; + } gen11_gtiir[] = { + [RCS] = {0, GEN11_RCS0}, + [BCS] = {0, GEN11_BCS}, + [_VCS(0)] = {1, GEN11_VCS(0)}, + [_VCS(1)] = {1, GEN11_VCS(1)}, + [_VCS(2)] = {1, GEN11_VCS(2)}, + [_VCS(3)] = {1, GEN11_VCS(3)}, + [_VECS(0)] = {1, GEN11_VECS(0)}, + [_VECS(1)] = {1, GEN11_VECS(1)}, + }; + unsigned long irqflags; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gen11_gtiir)); + + spin_lock_irqsave(&dev_priv->irq_lock, irqflags); + for (i = 0; i < 2; i++) { + gen11_reset_one_iir(dev_priv, + gen11_gtiir[engine->id].bank, + gen11_gtiir[engine->id].bit); + } + spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags); + } else { + static const u8 gtiir[] = { + [RCS] = 0, + [BCS] = 0, + [VCS] = 1, + [VCS2] = 1, + [VECS] = 3, + }; + + GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); + + for (i = 0; i < 2; i++) { + I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), + engine->irq_keep_mask); + POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); + } + GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & + engine->irq_keep_mask); + } +} + +static void reset_irq(struct intel_engine_cs *engine) +{ + /* Mark all CS interrupts as complete */ + smp_store_mb(engine->execlists.active, 0); + synchronize_hardirq(engine->i915->drm.irq); + + clear_gtiir(engine); + + /* + * The port is checked prior to scheduling a tasklet, but + * just in case we have suspended the tasklet to do the + * wedging make sure that when it wakes, it decides there + * is no work to do by clearing the irq_posted bit. + */ + clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); } static void execlists_cancel_requests(struct intel_engine_cs *engine) @@ -747,7 +884,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) struct rb_node *rb; unsigned long flags; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("%s current %d\n", + engine->name, intel_engine_get_seqno(engine)); /* * Before we call engine->cancel_requests(), we should have exclusive @@ -767,11 +905,12 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) /* Cancel the requests on the HW and clear the ELSP tracker. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->timeline->requests, link) { + list_for_each_entry(rq, &engine->timeline.requests, link) { GEM_BUG_ON(!rq->global_seqno); if (!i915_request_completed(rq)) dma_fence_set_error(&rq->fence, -EIO); @@ -782,8 +921,8 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) while (rb) { struct i915_priolist *p = to_priolist(rb); - list_for_each_entry_safe(rq, rn, &p->requests, priotree.link) { - INIT_LIST_HEAD(&rq->priotree.link); + list_for_each_entry_safe(rq, rn, &p->requests, sched.link) { + INIT_LIST_HEAD(&rq->sched.link); dma_fence_set_error(&rq->fence, -EIO); __i915_request_submit(rq); @@ -803,18 +942,7 @@ static void execlists_cancel_requests(struct intel_engine_cs *engine) execlists->first = NULL; GEM_BUG_ON(port_isset(execlists->port)); - spin_unlock(&engine->timeline->lock); - - /* - * The port is checked prior to scheduling a tasklet, but - * just in case we have suspended the tasklet to do the - * wedging make sure that when it wakes, it decides there - * is no work to do by clearing the irq_posted bit. - */ - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); } @@ -827,7 +955,7 @@ static void execlists_submission_tasklet(unsigned long data) { struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; struct intel_engine_execlists * const execlists = &engine->execlists; - struct execlist_port * const port = execlists->port; + struct execlist_port *port = execlists->port; struct drm_i915_private *dev_priv = engine->i915; bool fw = false; @@ -880,6 +1008,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, @@ -954,10 +1083,13 @@ static void execlists_submission_tasklet(unsigned long data) EXECLISTS_ACTIVE_USER)); rq = port_unpack(port, &count); - GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x, prio=%d\n", + GEM_TRACE("%s out[0]: ctx=%d.%d, global=%d (fence %llx:%d) (current %d), prio=%d\n", engine->name, port->context_id, count, rq ? rq->global_seqno : 0, + rq ? rq->fence.context : 0, + rq ? rq->fence.seqno : 0, + intel_engine_get_seqno(engine), rq ? rq_prio(rq) : 0); /* Check the context/desc id for this event matches */ @@ -965,28 +1097,43 @@ static void execlists_submission_tasklet(unsigned long data) GEM_BUG_ON(count == 0); if (--count == 0) { + /* + * On the final event corresponding to the + * submission of this context, we expect either + * an element-switch event or a completion + * event (and on completion, the active-idle + * marker). No more preemptions, lite-restore + * or otherwise. + */ GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED); GEM_BUG_ON(port_isset(&port[1]) && !(status & GEN8_CTX_STATUS_ELEMENT_SWITCH)); + GEM_BUG_ON(!port_isset(&port[1]) && + !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt and CSB is processed. + */ GEM_BUG_ON(!i915_request_completed(rq)); - execlists_context_schedule_out(rq); - trace_i915_request_out(rq); + + execlists_context_schedule_out(rq, + INTEL_CONTEXT_SCHEDULE_OUT); i915_request_put(rq); GEM_TRACE("%s completed ctx=%d\n", engine->name, port->context_id); - execlists_port_complete(execlists, port); + port = execlists_port_complete(execlists, port); + if (port_isset(port)) + execlists_user_begin(execlists, port); + else + execlists_user_end(execlists); } else { port_set(port, port_pack(rq, count)); } - - /* After the final element, the hw should be idle */ - GEM_BUG_ON(port_count(port) == 0 && - !(status & GEN8_CTX_STATUS_ACTIVE_IDLE)); - if (port_count(port) == 0) - execlists_clear_active(execlists, - EXECLISTS_ACTIVE_USER); } if (head != execlists->csb_head) { @@ -1001,21 +1148,31 @@ static void execlists_submission_tasklet(unsigned long data) if (fw) intel_uncore_forcewake_put(dev_priv, execlists->fw_domains); + + /* If the engine is now idle, so should be the flag; and vice versa. */ + GEM_BUG_ON(execlists_is_active(&engine->execlists, + EXECLISTS_ACTIVE_USER) == + !port_isset(engine->execlists.port)); } static void queue_request(struct intel_engine_cs *engine, - struct i915_priotree *pt, + struct i915_sched_node *node, int prio) { - list_add_tail(&pt->link, &lookup_priolist(engine, pt, prio)->requests); + list_add_tail(&node->link, + &lookup_priolist(engine, prio)->requests); +} + +static void __submit_queue(struct intel_engine_cs *engine, int prio) +{ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); } static void submit_queue(struct intel_engine_cs *engine, int prio) { - if (prio > engine->execlists.queue_priority) { - engine->execlists.queue_priority = prio; - tasklet_hi_schedule(&engine->execlists.tasklet); - } + if (prio > engine->execlists.queue_priority) + __submit_queue(engine, prio); } static void execlists_submit_request(struct i915_request *request) @@ -1024,42 +1181,45 @@ static void execlists_submit_request(struct i915_request *request) unsigned long flags; /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); - queue_request(engine, &request->priotree, rq_prio(request)); + queue_request(engine, &request->sched, rq_prio(request)); submit_queue(engine, rq_prio(request)); GEM_BUG_ON(!engine->execlists.first); - GEM_BUG_ON(list_empty(&request->priotree.link)); + GEM_BUG_ON(list_empty(&request->sched.link)); - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *pt_to_request(struct i915_priotree *pt) +static struct i915_request *sched_to_request(struct i915_sched_node *node) { - return container_of(pt, struct i915_request, priotree); + return container_of(node, struct i915_request, sched); } static struct intel_engine_cs * -pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked) +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) { - struct intel_engine_cs *engine = pt_to_request(pt)->engine; + struct intel_engine_cs *engine = sched_to_request(node)->engine; GEM_BUG_ON(!locked); if (engine != locked) { - spin_unlock(&locked->timeline->lock); - spin_lock(&engine->timeline->lock); + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); } return engine; } -static void execlists_schedule(struct i915_request *request, int prio) +static void execlists_schedule(struct i915_request *request, + const struct i915_sched_attr *attr) { - struct intel_engine_cs *engine; + struct i915_priolist *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; struct i915_dependency *dep, *p; struct i915_dependency stack; + const int prio = attr->priority; LIST_HEAD(dfs); GEM_BUG_ON(prio == I915_PRIORITY_INVALID); @@ -1067,23 +1227,23 @@ static void execlists_schedule(struct i915_request *request, int prio) if (i915_request_completed(request)) return; - if (prio <= READ_ONCE(request->priotree.priority)) + if (prio <= READ_ONCE(request->sched.attr.priority)) return; /* Need BKL in order to use the temporary link inside i915_dependency */ lockdep_assert_held(&request->i915->drm.struct_mutex); - stack.signaler = &request->priotree; + stack.signaler = &request->sched; list_add(&stack.dfs_link, &dfs); /* * Recursively bump all dependent priorities to match the new request. * * A naive approach would be to use recursion: - * static void update_priorities(struct i915_priotree *pt, prio) { - * list_for_each_entry(dep, &pt->signalers_list, signal_link) + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) * update_priorities(dep->signal, prio) - * queue_request(pt); + * queue_request(node); * } * but that may have unlimited recursion depth and so runs a very * real risk of overunning the kernel stack. Instead, we build @@ -1095,7 +1255,7 @@ static void execlists_schedule(struct i915_request *request, int prio) * last element in the list is the request we must execute first. */ list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; /* * Within an engine, there can be no cycle, but we may @@ -1103,14 +1263,14 @@ static void execlists_schedule(struct i915_request *request, int prio) * (redundant dependencies are not eliminated) and across * engines. */ - list_for_each_entry(p, &pt->signalers_list, signal_link) { + list_for_each_entry(p, &node->signalers_list, signal_link) { GEM_BUG_ON(p == dep); /* no cycles! */ - if (i915_priotree_signaled(p->signaler)) + if (i915_sched_node_signaled(p->signaler)) continue; - GEM_BUG_ON(p->signaler->priority < pt->priority); - if (prio > READ_ONCE(p->signaler->priority)) + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) list_move_tail(&p->dfs_link, &dfs); } } @@ -1121,37 +1281,45 @@ static void execlists_schedule(struct i915_request *request, int prio) * execlists_submit_request()), we can set our own priority and skip * acquiring the engine locks. */ - if (request->priotree.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->priotree.link)); - request->priotree.priority = prio; + if (request->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&request->sched.link)); + request->sched.attr = *attr; if (stack.dfs_link.next == stack.dfs_link.prev) return; __list_del_entry(&stack.dfs_link); } + last = NULL; engine = request->engine; - spin_lock_irq(&engine->timeline->lock); + spin_lock_irq(&engine->timeline.lock); /* Fifo and depth-first replacement ensure our deps execute before us */ list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_priotree *pt = dep->signaler; + struct i915_sched_node *node = dep->signaler; INIT_LIST_HEAD(&dep->dfs_link); - engine = pt_lock_engine(pt, engine); + engine = sched_lock_engine(node, engine); - if (prio <= pt->priority) + if (prio <= node->attr.priority) continue; - pt->priority = prio; - if (!list_empty(&pt->link)) { - __list_del_entry(&pt->link); - queue_request(engine, pt, prio); + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = lookup_priolist(engine, prio); + last = engine; + } + GEM_BUG_ON(pl->priority != prio); + list_move_tail(&node->link, &pl->requests); } - submit_queue(engine, prio); + + if (prio > engine->execlists.queue_priority && + i915_sw_fence_done(&sched_to_request(node)->submit)) + __submit_queue(engine, prio); } - spin_unlock_irq(&engine->timeline->lock); + spin_unlock_irq(&engine->timeline.lock); } static int __context_pin(struct i915_gem_context *ctx, struct i915_vma *vma) @@ -1181,7 +1349,7 @@ static struct intel_ring * execlists_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); void *vaddr; int ret; @@ -1215,6 +1383,7 @@ execlists_context_pin(struct intel_engine_cs *engine, ce->lrc_reg_state = vaddr + LRC_STATE_PN * PAGE_SIZE; ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = i915_ggtt_offset(ce->ring->vma); + ce->lrc_reg_state[CTX_RING_HEAD+1] = ce->ring->head; ce->state->obj->pin_global++; i915_gem_context_get(ctx); @@ -1233,7 +1402,7 @@ err: static void execlists_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1252,8 +1421,8 @@ static void execlists_context_unpin(struct intel_engine_cs *engine, static int execlists_request_alloc(struct i915_request *request) { - struct intel_engine_cs *engine = request->engine; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(request->ctx, request->engine); int ret; GEM_BUG_ON(!ce->pin_count); @@ -1513,6 +1682,8 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return -EINVAL; switch (INTEL_GEN(engine->i915)) { + case 11: + return 0; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -1565,14 +1736,6 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) return ret; } -static u8 gtiir[] = { - [RCS] = 0, - [BCS] = 0, - [VCS] = 1, - [VCS2] = 1, - [VECS] = 3, -}; - static void enable_execlists(struct intel_engine_cs *engine) { struct drm_i915_private *dev_priv = engine->i915; @@ -1632,6 +1795,8 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + intel_whitelist_workarounds_apply(engine); + /* We need to disable the AsyncFlip performance optimisations in order * to use MI_WAIT_FOR_EVENT within the CS. It should already be * programmed to '1' on all products. @@ -1642,7 +1807,7 @@ static int gen8_init_render_ring(struct intel_engine_cs *engine) I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); - return init_workarounds_ring(engine); + return 0; } static int gen9_init_render_ring(struct intel_engine_cs *engine) @@ -1653,49 +1818,25 @@ static int gen9_init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; - return init_workarounds_ring(engine); -} - -static void reset_irq(struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - int i; - - GEM_BUG_ON(engine->id >= ARRAY_SIZE(gtiir)); - - /* - * Clear any pending interrupt state. - * - * We do it twice out of paranoia that some of the IIR are double - * buffered, and if we only reset it once there may still be - * an interrupt pending. - */ - for (i = 0; i < 2; i++) { - I915_WRITE(GEN8_GT_IIR(gtiir[engine->id]), - GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift); - POSTING_READ(GEN8_GT_IIR(gtiir[engine->id])); - } - GEM_BUG_ON(I915_READ(GEN8_GT_IIR(gtiir[engine->id])) & - (GT_CONTEXT_SWITCH_INTERRUPT << engine->irq_shift)); + intel_whitelist_workarounds_apply(engine); - clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted); + return 0; } static void reset_common_ring(struct intel_engine_cs *engine, struct i915_request *request) { struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; unsigned long flags; + u32 *regs; - GEM_TRACE("%s seqno=%x\n", - engine->name, request ? request->global_seqno : 0); + GEM_TRACE("%s request global=%x, current=%d\n", + engine->name, request ? request->global_seqno : 0, + intel_engine_get_seqno(engine)); /* See execlists_cancel_requests() for the irq/spinlock split. */ local_irq_save(flags); - reset_irq(engine); - /* * Catch up with any missed context-switch interrupts. * @@ -1706,14 +1847,12 @@ static void reset_common_ring(struct intel_engine_cs *engine, * requests were completed. */ execlists_cancel_port_requests(execlists); + reset_irq(engine); /* Push back any incomplete requests for replay after the reset. */ - spin_lock(&engine->timeline->lock); + spin_lock(&engine->timeline.lock); __unwind_incomplete_requests(engine); - spin_unlock(&engine->timeline->lock); - - /* Mark all CS interrupts as complete */ - execlists->active = 0; + spin_unlock(&engine->timeline.lock); local_irq_restore(flags); @@ -1739,14 +1878,24 @@ static void reset_common_ring(struct intel_engine_cs *engine, * future request will be after userspace has had the opportunity * to recreate its own state. */ - ce = &request->ctx->engine[engine->id]; - execlists_init_reg_state(ce->lrc_reg_state, - request->ctx, engine, ce->ring); + regs = to_intel_context(request->ctx, engine)->lrc_reg_state; + if (engine->default_state) { + void *defaults; + + defaults = i915_gem_object_pin_map(engine->default_state, + I915_MAP_WB); + if (!IS_ERR(defaults)) { + memcpy(regs, /* skip restoring the vanilla PPHWSP */ + defaults + LRC_STATE_PN * PAGE_SIZE, + engine->context_size - PAGE_SIZE); + i915_gem_object_unpin_map(engine->default_state); + } + } + execlists_init_reg_state(regs, request->ctx, engine, request->ring); /* Move the RING_HEAD onto the breadcrumb, past the hanging batch */ - ce->lrc_reg_state[CTX_RING_BUFFER_START+1] = - i915_ggtt_offset(ce->ring->vma); - ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix; + regs[CTX_RING_BUFFER_START + 1] = i915_ggtt_offset(request->ring->vma); + regs[CTX_RING_HEAD + 1] = request->postfix; request->ring->head = request->postfix; intel_ring_update_space(request->ring); @@ -1807,7 +1956,7 @@ static int gen8_emit_bb_start(struct i915_request *rq, rq->ctx->ppgtt->pd_dirty_rings &= ~intel_engine_flag(rq->engine); } - cs = intel_ring_begin(rq, 4); + cs = intel_ring_begin(rq, 6); if (IS_ERR(cs)) return PTR_ERR(cs); @@ -1836,6 +1985,9 @@ static int gen8_emit_bb_start(struct i915_request *rq, (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0); *cs++ = lower_32_bits(offset); *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; intel_ring_advance(rq, cs); return 0; @@ -1978,7 +2130,7 @@ static void gen8_emit_breadcrumb(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -1994,7 +2146,7 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) cs = gen8_emit_ggtt_write_rcs(cs, request->global_seqno, intel_hws_seqno_address(request->engine)); *cs++ = MI_USER_INTERRUPT; - *cs++ = MI_NOOP; + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; request->tail = intel_ring_offset(request, cs); assert_ring_tail_valid(request->ring, request->tail); @@ -2006,7 +2158,7 @@ static int gen8_init_rcs_context(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret) return ret; @@ -2066,11 +2218,13 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine) engine->unpark = NULL; engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (engine->i915->preempt_context) + engine->flags |= I915_ENGINE_HAS_PREEMPTION; engine->i915->caps.scheduler = I915_SCHEDULER_CAP_ENABLED | I915_SCHEDULER_CAP_PRIORITY; - if (engine->i915->preempt_context) + if (intel_engine_has_preemption(engine)) engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION; } @@ -2109,7 +2263,20 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) static inline void logical_ring_default_irqs(struct intel_engine_cs *engine) { - unsigned shift = engine->irq_shift; + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS] = GEN8_RCS_IRQ_SHIFT, + [BCS] = GEN8_BCS_IRQ_SHIFT, + [VCS] = GEN8_VCS1_IRQ_SHIFT, + [VCS2] = GEN8_VCS2_IRQ_SHIFT, + [VECS] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; } @@ -2165,9 +2332,13 @@ static int logical_ring_init(struct intel_engine_cs *engine) } engine->execlists.preempt_complete_status = ~0u; - if (engine->i915->preempt_context) + if (engine->i915->preempt_context) { + struct intel_context *ce = + to_intel_context(engine->i915->preempt_context, engine); + engine->execlists.preempt_complete_status = - upper_32_bits(engine->i915->preempt_context->engine[engine->id].lrc_desc); + upper_32_bits(ce->lrc_desc); + } return 0; @@ -2421,8 +2592,10 @@ populate_lr_context(struct i915_gem_context *ctx, defaults = i915_gem_object_pin_map(engine->default_state, I915_MAP_WB); - if (IS_ERR(defaults)) - return PTR_ERR(defaults); + if (IS_ERR(defaults)) { + ret = PTR_ERR(defaults); + goto err_unpin_ctx; + } memcpy(vaddr + start, defaults + start, engine->context_size); i915_gem_object_unpin_map(engine->default_state); @@ -2440,19 +2613,20 @@ populate_lr_context(struct i915_gem_context *ctx, _MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT); +err_unpin_ctx: i915_gem_object_unpin_map(ctx_obj); - - return 0; + return ret; } static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { struct drm_i915_gem_object *ctx_obj; - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); struct i915_vma *vma; uint32_t context_size; struct intel_ring *ring; + struct i915_timeline *timeline; int ret; if (ce->state) @@ -2468,8 +2642,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, ctx_obj = i915_gem_object_create(ctx->i915, context_size); if (IS_ERR(ctx_obj)) { - DRM_DEBUG_DRIVER("Alloc LRC backing obj failed.\n"); - return PTR_ERR(ctx_obj); + ret = PTR_ERR(ctx_obj); + goto error_deref_obj; } vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL); @@ -2478,7 +2652,14 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx, goto error_deref_obj; } - ring = intel_engine_create_ring(engine, ctx->ring_size); + timeline = i915_timeline_create(ctx->i915, ctx->name); + if (IS_ERR(timeline)) { + ret = PTR_ERR(timeline); + goto error_deref_obj; + } + + ring = intel_engine_create_ring(engine, timeline, ctx->ring_size); + i915_timeline_put(timeline); if (IS_ERR(ring)) { ret = PTR_ERR(ring); goto error_deref_obj; @@ -2520,7 +2701,8 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) */ list_for_each_entry(ctx, &dev_priv->contexts.list, link) { for_each_engine(engine, dev_priv, id) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = + to_intel_context(ctx, engine); u32 *reg; if (!ce->state) @@ -2542,3 +2724,7 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv) } } } + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_lrc.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index 59d7b86..4ec7d8d 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -108,7 +108,7 @@ static inline uint64_t intel_lr_context_descriptor(struct i915_gem_context *ctx, struct intel_engine_cs *engine) { - return ctx->engine[engine->id].lrc_desc; + return to_intel_context(ctx, engine)->lrc_desc; } #endif /* _INTEL_LRC_H_ */ diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index d35d2d5..8691c86 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -326,7 +326,8 @@ static void intel_enable_lvds(struct intel_encoder *encoder, I915_WRITE(PP_CONTROL(0), I915_READ(PP_CONTROL(0)) | PANEL_POWER_ON); POSTING_READ(lvds_encoder->reg); - if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 1000)) + + if (intel_wait_for_register(dev_priv, PP_STATUS(0), PP_ON, PP_ON, 5000)) DRM_ERROR("timed out waiting for panel to power on\n"); intel_panel_enable_backlight(pipe_config, conn_state); diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c index c0b34b7..9f0bd6a4 100644 --- a/drivers/gpu/drm/i915/intel_mocs.c +++ b/drivers/gpu/drm/i915/intel_mocs.c @@ -178,7 +178,8 @@ static bool get_mocs_settings(struct drm_i915_private *dev_priv, { bool result = false; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv) || + IS_ICELAKE(dev_priv)) { table->size = ARRAY_SIZE(skylake_mocs_table); table->table = skylake_mocs_table; result = true; @@ -217,6 +218,8 @@ static i915_reg_t mocs_register(enum intel_engine_id engine_id, int index) return GEN9_VEBOX_MOCS(index); case VCS2: return GEN9_MFX1_MOCS(index); + case VCS3: + return GEN11_MFX2_MOCS(index); default: MISSING_CASE(engine_id); return INVALID_MMIO_REG; diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c index 36671a9..c2f10d8 100644 --- a/drivers/gpu/drm/i915/intel_overlay.c +++ b/drivers/gpu/drm/i915/intel_overlay.c @@ -807,6 +807,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay, ret = PTR_ERR(vma); goto out_pin_section; } + intel_fb_obj_flush(new_bo, ORIGIN_DIRTYFB); ret = i915_vma_put_fence(vma); if (ret) diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c index 1f5cd57..39a4e4e 100644 --- a/drivers/gpu/drm/i915/intel_pipe_crc.c +++ b/drivers/gpu/drm/i915/intel_pipe_crc.c @@ -569,7 +569,8 @@ unlock: static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, enum intel_pipe_crc_source *source, - uint32_t *val) + uint32_t *val, + bool set_wa) { if (*source == INTEL_PIPE_CRC_SOURCE_AUTO) *source = INTEL_PIPE_CRC_SOURCE_PF; @@ -582,7 +583,7 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, *val = PIPE_CRC_ENABLE | PIPE_CRC_SOURCE_SPRITE_IVB; break; case INTEL_PIPE_CRC_SOURCE_PF: - if ((IS_HASWELL(dev_priv) || + if (set_wa && (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) && pipe == PIPE_A) hsw_pipe_A_crc_wa(dev_priv, true); @@ -600,7 +601,8 @@ static int ivb_pipe_crc_ctl_reg(struct drm_i915_private *dev_priv, static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, enum pipe pipe, - enum intel_pipe_crc_source *source, u32 *val) + enum intel_pipe_crc_source *source, u32 *val, + bool set_wa) { if (IS_GEN2(dev_priv)) return i8xx_pipe_crc_ctl_reg(source, val); @@ -611,7 +613,7 @@ static int get_new_crc_ctl_reg(struct drm_i915_private *dev_priv, else if (IS_GEN5(dev_priv) || IS_GEN6(dev_priv)) return ilk_pipe_crc_ctl_reg(source, val); else - return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val); + return ivb_pipe_crc_ctl_reg(dev_priv, pipe, source, val, set_wa); } static int pipe_crc_set_source(struct drm_i915_private *dev_priv, @@ -636,7 +638,7 @@ static int pipe_crc_set_source(struct drm_i915_private *dev_priv, return -EIO; } - ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val); + ret = get_new_crc_ctl_reg(dev_priv, pipe, &source, &val, true); if (ret != 0) goto out; @@ -764,13 +766,12 @@ display_crc_ctl_parse_object(const char *buf, enum intel_pipe_crc_object *o) { int i; - for (i = 0; i < ARRAY_SIZE(pipe_crc_objects); i++) - if (!strcmp(buf, pipe_crc_objects[i])) { - *o = i; - return 0; - } + i = match_string(pipe_crc_objects, ARRAY_SIZE(pipe_crc_objects), buf); + if (i < 0) + return i; - return -EINVAL; + *o = i; + return 0; } static int display_crc_ctl_parse_pipe(struct drm_i915_private *dev_priv, @@ -796,13 +797,12 @@ display_crc_ctl_parse_source(const char *buf, enum intel_pipe_crc_source *s) return 0; } - for (i = 0; i < ARRAY_SIZE(pipe_crc_sources); i++) - if (!strcmp(buf, pipe_crc_sources[i])) { - *s = i; - return 0; - } + i = match_string(pipe_crc_sources, ARRAY_SIZE(pipe_crc_sources), buf); + if (i < 0) + return i; - return -EINVAL; + *s = i; + return 0; } static int display_crc_ctl_parse(struct drm_i915_private *dev_priv, @@ -916,7 +916,7 @@ int intel_pipe_crc_create(struct drm_minor *minor) int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, size_t *values_cnt) { - struct drm_i915_private *dev_priv = crtc->dev->dev_private; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; enum intel_display_power_domain power_domain; enum intel_pipe_crc_source source; @@ -934,10 +934,11 @@ int intel_crtc_set_crc_source(struct drm_crtc *crtc, const char *source_name, return -EIO; } - ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val); + ret = get_new_crc_ctl_reg(dev_priv, crtc->index, &source, &val, true); if (ret != 0) goto out; + pipe_crc->source = source; I915_WRITE(PIPE_CRC_CTL(crtc->index), val); POSTING_READ(PIPE_CRC_CTL(crtc->index)); @@ -959,3 +960,39 @@ out: return ret; } + +void intel_crtc_enable_pipe_crc(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; + u32 val = 0; + + if (!crtc->crc.opened) + return; + + if (get_new_crc_ctl_reg(dev_priv, crtc->index, &pipe_crc->source, &val, false) < 0) + return; + + /* Don't need pipe_crc->lock here, IRQs are not generated. */ + pipe_crc->skipped = 0; + + I915_WRITE(PIPE_CRC_CTL(crtc->index), val); + POSTING_READ(PIPE_CRC_CTL(crtc->index)); +} + +void intel_crtc_disable_pipe_crc(struct intel_crtc *intel_crtc) +{ + struct drm_crtc *crtc = &intel_crtc->base; + struct drm_i915_private *dev_priv = to_i915(crtc->dev); + struct intel_pipe_crc *pipe_crc = &dev_priv->pipe_crc[crtc->index]; + + /* Swallow crc's until we stop generating them. */ + spin_lock_irq(&pipe_crc->lock); + pipe_crc->skipped = INT_MIN; + spin_unlock_irq(&pipe_crc->lock); + + I915_WRITE(PIPE_CRC_CTL(crtc->index), 0); + POSTING_READ(PIPE_CRC_CTL(crtc->index)); + synchronize_irq(dev_priv->drm.irq); +} diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index b8da4dc..b85229e 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3567,6 +3567,23 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } +static u8 intel_enabled_dbuf_slices_num(struct drm_i915_private *dev_priv) +{ + u8 enabled_slices; + + /* Slice 1 will always be enabled */ + enabled_slices = 1; + + /* Gen prior to GEN11 have only one DBuf slice */ + if (INTEL_GEN(dev_priv) < 11) + return enabled_slices; + + if (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE) + enabled_slices++; + + return enabled_slices; +} + /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3754,9 +3771,42 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) return true; } +static unsigned int intel_get_ddb_size(struct drm_i915_private *dev_priv, + const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + const int num_active, + struct skl_ddb_allocation *ddb) +{ + const struct drm_display_mode *adjusted_mode; + u64 total_data_bw; + u16 ddb_size = INTEL_INFO(dev_priv)->ddb_size; + + WARN_ON(ddb_size == 0); + + if (INTEL_GEN(dev_priv) < 11) + return ddb_size - 4; /* 4 blocks for bypass path allocation */ + + adjusted_mode = &cstate->base.adjusted_mode; + total_data_bw = (u64)total_data_rate * drm_mode_vrefresh(adjusted_mode); + + /* + * 12GB/s is maximum BW supported by single DBuf slice. + */ + if (total_data_bw >= GBps(12) || num_active > 1) { + ddb->enabled_slices = 2; + } else { + ddb->enabled_slices = 1; + ddb_size /= 2; + } + + return ddb_size; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, + const unsigned int total_data_rate, + struct skl_ddb_allocation *ddb, struct skl_ddb_entry *alloc, /* out */ int *num_active /* out */) { @@ -3779,11 +3829,8 @@ skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, else *num_active = hweight32(dev_priv->active_crtcs); - ddb_size = INTEL_INFO(dev_priv)->ddb_size; - WARN_ON(ddb_size == 0); - - if (INTEL_GEN(dev_priv) < 11) - ddb_size -= 4; /* 4 blocks for bypass path allocation */ + ddb_size = intel_get_ddb_size(dev_priv, cstate, total_data_rate, + *num_active, ddb); /* * If the state doesn't change the active CRTC's, then there's @@ -3817,14 +3864,64 @@ static unsigned int skl_cursor_allocation(int num_active) return 8; } -static void skl_ddb_entry_init_from_hw(struct skl_ddb_entry *entry, u32 reg) +static void skl_ddb_entry_init_from_hw(struct drm_i915_private *dev_priv, + struct skl_ddb_entry *entry, u32 reg) { - entry->start = reg & 0x3ff; - entry->end = (reg >> 16) & 0x3ff; + u16 mask; + + if (INTEL_GEN(dev_priv) >= 11) + mask = ICL_DDB_ENTRY_MASK; + else + mask = SKL_DDB_ENTRY_MASK; + entry->start = reg & mask; + entry->end = (reg >> DDB_ENTRY_END_SHIFT) & mask; + if (entry->end) entry->end += 1; } +static void +skl_ddb_get_hw_plane_state(struct drm_i915_private *dev_priv, + const enum pipe pipe, + const enum plane_id plane_id, + struct skl_ddb_allocation *ddb /* out */) +{ + u32 val, val2 = 0; + int fourcc, pixel_format; + + /* Cursor doesn't support NV12/planar, so no extra calculation needed */ + if (plane_id == PLANE_CURSOR) { + val = I915_READ(CUR_BUF_CFG(pipe)); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); + return; + } + + val = I915_READ(PLANE_CTL(pipe, plane_id)); + + /* No DDB allocated for disabled planes */ + if (!(val & PLANE_CTL_ENABLE)) + return; + + pixel_format = val & PLANE_CTL_FORMAT_MASK; + fourcc = skl_format_to_fourcc(pixel_format, + val & PLANE_CTL_ORDER_RGBX, + val & PLANE_CTL_ALPHA_MASK); + + val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); + val2 = I915_READ(PLANE_NV12_BUF_CFG(pipe, plane_id)); + + if (fourcc == DRM_FORMAT_NV12) { + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val2); + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->uv_plane[pipe][plane_id], val); + } else { + skl_ddb_entry_init_from_hw(dev_priv, + &ddb->plane[pipe][plane_id], val); + } +} + void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */) { @@ -3832,6 +3929,8 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, memset(ddb, 0, sizeof(*ddb)); + ddb->enabled_slices = intel_enabled_dbuf_slices_num(dev_priv); + for_each_intel_crtc(&dev_priv->drm, crtc) { enum intel_display_power_domain power_domain; enum plane_id plane_id; @@ -3841,16 +3940,9 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, if (!intel_display_power_get_if_enabled(dev_priv, power_domain)) continue; - for_each_plane_id_on_crtc(crtc, plane_id) { - u32 val; - - if (plane_id != PLANE_CURSOR) - val = I915_READ(PLANE_BUF_CFG(pipe, plane_id)); - else - val = I915_READ(CUR_BUF_CFG(pipe)); - - skl_ddb_entry_init_from_hw(&ddb->plane[pipe][plane_id], val); - } + for_each_plane_id_on_crtc(crtc, plane_id) + skl_ddb_get_hw_plane_state(dev_priv, pipe, + plane_id, ddb); intel_display_power_put(dev_priv, power_domain); } @@ -4009,9 +4101,9 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, static unsigned int skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, const struct drm_plane_state *pstate, - int y) + const int plane) { - struct intel_plane *plane = to_intel_plane(pstate->plane); + struct intel_plane *intel_plane = to_intel_plane(pstate->plane); struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); uint32_t data_rate; uint32_t width = 0, height = 0; @@ -4025,9 +4117,9 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, fb = pstate->fb; format = fb->format->format; - if (plane->id == PLANE_CURSOR) + if (intel_plane->id == PLANE_CURSOR) return 0; - if (y && format != DRM_FORMAT_NV12) + if (plane == 1 && format != DRM_FORMAT_NV12) return 0; /* @@ -4038,19 +4130,14 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, width = drm_rect_width(&intel_pstate->base.src) >> 16; height = drm_rect_height(&intel_pstate->base.src) >> 16; - /* for planar format */ - if (format == DRM_FORMAT_NV12) { - if (y) /* y-plane data rate */ - data_rate = width * height * - fb->format->cpp[0]; - else /* uv-plane data rate */ - data_rate = (width / 2) * (height / 2) * - fb->format->cpp[1]; - } else { - /* for packed formats */ - data_rate = width * height * fb->format->cpp[0]; + /* UV plane does 1/2 pixel sub-sampling */ + if (plane == 1 && format == DRM_FORMAT_NV12) { + width /= 2; + height /= 2; } + data_rate = width * height * fb->format->cpp[plane]; + down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate); return mul_round_up_u32_fixed16(data_rate, down_scale_amount); @@ -4063,8 +4150,8 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate, */ static unsigned int skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, - unsigned *plane_data_rate, - unsigned *plane_y_data_rate) + unsigned int *plane_data_rate, + unsigned int *uv_plane_data_rate) { struct drm_crtc_state *cstate = &intel_cstate->base; struct drm_atomic_state *state = cstate->state; @@ -4080,17 +4167,17 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, enum plane_id plane_id = to_intel_plane(plane)->id; unsigned int rate; - /* packed/uv */ + /* packed/y */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 0); plane_data_rate[plane_id] = rate; total_data_rate += rate; - /* y-plane */ + /* uv-plane */ rate = skl_plane_relative_data_rate(intel_cstate, pstate, 1); - plane_y_data_rate[plane_id] = rate; + uv_plane_data_rate[plane_id] = rate; total_data_rate += rate; } @@ -4099,8 +4186,7 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate, } static uint16_t -skl_ddb_min_alloc(const struct drm_plane_state *pstate, - const int y) +skl_ddb_min_alloc(const struct drm_plane_state *pstate, const int plane) { struct drm_framebuffer *fb = pstate->fb; struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate); @@ -4111,8 +4197,8 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, if (WARN_ON(!fb)) return 0; - /* For packed formats, no y-plane, return 0 */ - if (y && fb->format->format != DRM_FORMAT_NV12) + /* For packed formats, and uv-plane, return 0 */ + if (plane == 1 && fb->format->format != DRM_FORMAT_NV12) return 0; /* For Non Y-tile return 8-blocks */ @@ -4131,15 +4217,12 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, src_h = drm_rect_height(&intel_pstate->base.src) >> 16; /* Halve UV plane width and height for NV12 */ - if (fb->format->format == DRM_FORMAT_NV12 && !y) { + if (plane == 1) { src_w /= 2; src_h /= 2; } - if (fb->format->format == DRM_FORMAT_NV12 && !y) - plane_bpp = fb->format->cpp[1]; - else - plane_bpp = fb->format->cpp[0]; + plane_bpp = fb->format->cpp[plane]; if (drm_rotation_90_or_270(pstate->rotation)) { switch (plane_bpp) { @@ -4167,7 +4250,7 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, static void skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, - uint16_t *minimum, uint16_t *y_minimum) + uint16_t *minimum, uint16_t *uv_minimum) { const struct drm_plane_state *pstate; struct drm_plane *plane; @@ -4182,7 +4265,7 @@ skl_ddb_calc_min(const struct intel_crtc_state *cstate, int num_active, continue; minimum[plane_id] = skl_ddb_min_alloc(pstate, 0); - y_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); + uv_minimum[plane_id] = skl_ddb_min_alloc(pstate, 1); } minimum[PLANE_CURSOR] = skl_cursor_allocation(num_active); @@ -4200,17 +4283,17 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, struct skl_ddb_entry *alloc = &cstate->wm.skl.ddb; uint16_t alloc_size, start; uint16_t minimum[I915_MAX_PLANES] = {}; - uint16_t y_minimum[I915_MAX_PLANES] = {}; + uint16_t uv_minimum[I915_MAX_PLANES] = {}; unsigned int total_data_rate; enum plane_id plane_id; int num_active; - unsigned plane_data_rate[I915_MAX_PLANES] = {}; - unsigned plane_y_data_rate[I915_MAX_PLANES] = {}; + unsigned int plane_data_rate[I915_MAX_PLANES] = {}; + unsigned int uv_plane_data_rate[I915_MAX_PLANES] = {}; uint16_t total_min_blocks = 0; /* Clear the partitioning for disabled planes. */ memset(ddb->plane[pipe], 0, sizeof(ddb->plane[pipe])); - memset(ddb->y_plane[pipe], 0, sizeof(ddb->y_plane[pipe])); + memset(ddb->uv_plane[pipe], 0, sizeof(ddb->uv_plane[pipe])); if (WARN_ON(!state)) return 0; @@ -4220,12 +4303,16 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, return 0; } - skl_ddb_get_pipe_allocation_limits(dev, cstate, alloc, &num_active); + total_data_rate = skl_get_total_relative_data_rate(cstate, + plane_data_rate, + uv_plane_data_rate); + skl_ddb_get_pipe_allocation_limits(dev, cstate, total_data_rate, ddb, + alloc, &num_active); alloc_size = skl_ddb_entry_size(alloc); if (alloc_size == 0) return 0; - skl_ddb_calc_min(cstate, num_active, minimum, y_minimum); + skl_ddb_calc_min(cstate, num_active, minimum, uv_minimum); /* * 1. Allocate the mininum required blocks for each active plane @@ -4235,7 +4322,7 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, for_each_plane_id_on_crtc(intel_crtc, plane_id) { total_min_blocks += minimum[plane_id]; - total_min_blocks += y_minimum[plane_id]; + total_min_blocks += uv_minimum[plane_id]; } if (total_min_blocks > alloc_size) { @@ -4255,16 +4342,13 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * * FIXME: we may not allocate every single block here. */ - total_data_rate = skl_get_total_relative_data_rate(cstate, - plane_data_rate, - plane_y_data_rate); if (total_data_rate == 0) return 0; start = alloc->start; for_each_plane_id_on_crtc(intel_crtc, plane_id) { - unsigned int data_rate, y_data_rate; - uint16_t plane_blocks, y_plane_blocks = 0; + unsigned int data_rate, uv_data_rate; + uint16_t plane_blocks, uv_plane_blocks; if (plane_id == PLANE_CURSOR) continue; @@ -4288,21 +4372,20 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, start += plane_blocks; - /* - * allocation for y_plane part of planar format: - */ - y_data_rate = plane_y_data_rate[plane_id]; + /* Allocate DDB for UV plane for planar format/NV12 */ + uv_data_rate = uv_plane_data_rate[plane_id]; - y_plane_blocks = y_minimum[plane_id]; - y_plane_blocks += div_u64((uint64_t)alloc_size * y_data_rate, - total_data_rate); + uv_plane_blocks = uv_minimum[plane_id]; + uv_plane_blocks += div_u64((uint64_t)alloc_size * uv_data_rate, + total_data_rate); - if (y_data_rate) { - ddb->y_plane[pipe][plane_id].start = start; - ddb->y_plane[pipe][plane_id].end = start + y_plane_blocks; + if (uv_data_rate) { + ddb->uv_plane[pipe][plane_id].start = start; + ddb->uv_plane[pipe][plane_id].end = + start + uv_plane_blocks; } - start += y_plane_blocks; + start += uv_plane_blocks; } return 0; @@ -4398,7 +4481,7 @@ static int skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, - struct skl_wm_params *wp) + struct skl_wm_params *wp, int plane_id) { struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane); const struct drm_plane_state *pstate = &intel_pstate->base; @@ -4411,6 +4494,12 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, if (!intel_wm_plane_visible(cstate, intel_pstate)) return 0; + /* only NV12 format has two planes */ + if (plane_id == 1 && fb->format->format != DRM_FORMAT_NV12) { + DRM_DEBUG_KMS("Non NV12 format have single plane\n"); + return -EINVAL; + } + wp->y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || fb->modifier == I915_FORMAT_MOD_Yf_TILED || fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || @@ -4418,6 +4507,7 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; wp->rc_surface = fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; + wp->is_planar = fb->format->format == DRM_FORMAT_NV12; if (plane->id == PLANE_CURSOR) { wp->width = intel_pstate->base.crtc_w; @@ -4430,8 +4520,10 @@ skl_compute_plane_wm_params(const struct drm_i915_private *dev_priv, wp->width = drm_rect_width(&intel_pstate->base.src) >> 16; } - wp->cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : - fb->format->cpp[0]; + if (plane_id == 1 && wp->is_planar) + wp->width /= 2; + + wp->cpp = fb->format->cpp[plane_id]; wp->plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); @@ -4499,9 +4591,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint16_t ddb_allocation, int level, const struct skl_wm_params *wp, - uint16_t *out_blocks, /* out */ - uint8_t *out_lines, /* out */ - bool *enabled /* out */) + const struct skl_wm_level *result_prev, + struct skl_wm_level *result /* out */) { const struct drm_plane_state *pstate = &intel_pstate->base; uint32_t latency = dev_priv->wm.skl_latency[level]; @@ -4515,7 +4606,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (latency == 0 || !intel_wm_plane_visible(cstate, intel_pstate)) { - *enabled = false; + result->plane_en = false; return 0; } @@ -4568,6 +4659,15 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } else { res_blocks++; } + + /* + * Make sure result blocks for higher latency levels are atleast + * as high as level below the current level. + * Assumption in DDB algorithm optimization for special cases. + * Also covers Display WA #1125 for RC. + */ + if (result_prev->plane_res_b > res_blocks) + res_blocks = result_prev->plane_res_b; } if (INTEL_GEN(dev_priv) >= 11) { @@ -4596,7 +4696,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((level > 0 && res_lines > 31) || res_blocks >= ddb_allocation || min_disp_buf_needed >= ddb_allocation) { - *enabled = false; + result->plane_en = false; /* * If there are no valid level 0 watermarks, then we can't @@ -4615,10 +4715,21 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } } + /* + * Display WA #826 (SKL:ALL, BXT:ALL) & #1059 (CNL:A) + * disable wm level 1-7 on NV12 planes + */ + if (wp->is_planar && level >= 1 && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv) || + IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_A0))) { + result->plane_en = false; + return 0; + } + /* The number of lines are ignored for the level 0 watermark. */ - *out_lines = level ? res_lines : 0; - *out_blocks = res_blocks; - *enabled = true; + result->plane_res_b = res_blocks; + result->plane_res_l = res_lines; + result->plane_en = true; return 0; } @@ -4629,7 +4740,8 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, struct intel_crtc_state *cstate, const struct intel_plane_state *intel_pstate, const struct skl_wm_params *wm_params, - struct skl_plane_wm *wm) + struct skl_plane_wm *wm, + int plane_id) { struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_plane *plane = intel_pstate->base.plane; @@ -4637,15 +4749,26 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, uint16_t ddb_blocks; enum pipe pipe = intel_crtc->pipe; int level, max_level = ilk_wm_max_level(dev_priv); + enum plane_id intel_plane_id = intel_plane->id; int ret; if (WARN_ON(!intel_pstate->base.fb)) return -EINVAL; - ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][intel_plane->id]); + ddb_blocks = plane_id ? + skl_ddb_entry_size(&ddb->uv_plane[pipe][intel_plane_id]) : + skl_ddb_entry_size(&ddb->plane[pipe][intel_plane_id]); for (level = 0; level <= max_level; level++) { - struct skl_wm_level *result = &wm->wm[level]; + struct skl_wm_level *result = plane_id ? &wm->uv_wm[level] : + &wm->wm[level]; + struct skl_wm_level *result_prev; + + if (level) + result_prev = plane_id ? &wm->uv_wm[level - 1] : + &wm->wm[level - 1]; + else + result_prev = plane_id ? &wm->uv_wm[0] : &wm->wm[0]; ret = skl_compute_plane_wm(dev_priv, cstate, @@ -4653,13 +4776,15 @@ skl_compute_wm_levels(const struct drm_i915_private *dev_priv, ddb_blocks, level, wm_params, - &result->plane_res_b, - &result->plane_res_l, - &result->plane_en); + result_prev, + result); if (ret) return ret; } + if (intel_pstate->base.fb->format->format == DRM_FORMAT_NV12) + wm->is_planar = true; + return 0; } @@ -4769,20 +4894,39 @@ static int skl_build_pipe_wm(struct intel_crtc_state *cstate, wm = &pipe_wm->planes[plane_id]; ddb_blocks = skl_ddb_entry_size(&ddb->plane[pipe][plane_id]); - memset(&wm_params, 0, sizeof(struct skl_wm_params)); ret = skl_compute_plane_wm_params(dev_priv, cstate, - intel_pstate, &wm_params); + intel_pstate, &wm_params, 0); if (ret) return ret; ret = skl_compute_wm_levels(dev_priv, ddb, cstate, - intel_pstate, &wm_params, wm); + intel_pstate, &wm_params, wm, 0); if (ret) return ret; + skl_compute_transition_wm(cstate, &wm_params, &wm->wm[0], ddb_blocks, &wm->trans_wm); + + /* uv plane watermarks must also be validated for NV12/Planar */ + if (wm_params.is_planar) { + memset(&wm_params, 0, sizeof(struct skl_wm_params)); + wm->is_planar = true; + + ret = skl_compute_plane_wm_params(dev_priv, cstate, + intel_pstate, + &wm_params, 1); + if (ret) + return ret; + + ret = skl_compute_wm_levels(dev_priv, ddb, cstate, + intel_pstate, &wm_params, + wm, 1); + if (ret) + return ret; + } } + pipe_wm->linetime = skl_compute_linetime_wm(cstate); return 0; @@ -4833,10 +4977,21 @@ static void skl_write_plane_wm(struct intel_crtc *intel_crtc, skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), &ddb->plane[pipe][plane_id]); - if (INTEL_GEN(dev_priv) < 11) + if (INTEL_GEN(dev_priv) >= 11) + return skl_ddb_entry_write(dev_priv, + PLANE_BUF_CFG(pipe, plane_id), + &ddb->plane[pipe][plane_id]); + if (wm->is_planar) { + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), + &ddb->uv_plane[pipe][plane_id]); skl_ddb_entry_write(dev_priv, PLANE_NV12_BUF_CFG(pipe, plane_id), - &ddb->y_plane[pipe][plane_id]); + &ddb->plane[pipe][plane_id]); + } else { + skl_ddb_entry_write(dev_priv, PLANE_BUF_CFG(pipe, plane_id), + &ddb->plane[pipe][plane_id]); + I915_WRITE(PLANE_NV12_BUF_CFG(pipe, plane_id), 0x0); + } } static void skl_write_cursor_wm(struct intel_crtc *intel_crtc, @@ -4944,15 +5099,13 @@ skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) struct drm_plane *plane; enum pipe pipe = intel_crtc->pipe; - WARN_ON(!drm_atomic_get_existing_crtc_state(state, crtc)); - drm_for_each_plane_mask(plane, dev, cstate->base.plane_mask) { enum plane_id plane_id = to_intel_plane(plane)->id; if (skl_ddb_entry_equal(&cur_ddb->plane[pipe][plane_id], &new_ddb->plane[pipe][plane_id]) && - skl_ddb_entry_equal(&cur_ddb->y_plane[pipe][plane_id], - &new_ddb->y_plane[pipe][plane_id])) + skl_ddb_entry_equal(&cur_ddb->uv_plane[pipe][plane_id], + &new_ddb->uv_plane[pipe][plane_id])) continue; plane_state = drm_atomic_get_plane_state(state, plane); @@ -4966,69 +5119,16 @@ skl_ddb_add_affected_planes(struct intel_crtc_state *cstate) static int skl_compute_ddb(struct drm_atomic_state *state) { - struct drm_device *dev = state->dev; - struct drm_i915_private *dev_priv = to_i915(dev); + const struct drm_i915_private *dev_priv = to_i915(state->dev); struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct intel_crtc *intel_crtc; struct skl_ddb_allocation *ddb = &intel_state->wm_results.ddb; - uint32_t realloc_pipes = pipes_modified(state); - int ret; - - /* - * If this is our first atomic update following hardware readout, - * we can't trust the DDB that the BIOS programmed for us. Let's - * pretend that all pipes switched active status so that we'll - * ensure a full DDB recompute. - */ - if (dev_priv->wm.distrust_bios_wm) { - ret = drm_modeset_lock(&dev->mode_config.connection_mutex, - state->acquire_ctx); - if (ret) - return ret; - - intel_state->active_pipe_changes = ~0; - - /* - * We usually only initialize intel_state->active_crtcs if we - * we're doing a modeset; make sure this field is always - * initialized during the sanitization process that happens - * on the first commit too. - */ - if (!intel_state->modeset) - intel_state->active_crtcs = dev_priv->active_crtcs; - } - - /* - * If the modeset changes which CRTC's are active, we need to - * recompute the DDB allocation for *all* active pipes, even - * those that weren't otherwise being modified in any way by this - * atomic commit. Due to the shrinking of the per-pipe allocations - * when new active CRTC's are added, it's possible for a pipe that - * we were already using and aren't changing at all here to suddenly - * become invalid if its DDB needs exceeds its new allocation. - * - * Note that if we wind up doing a full DDB recompute, we can't let - * any other display updates race with this transaction, so we need - * to grab the lock on *all* CRTC's. - */ - if (intel_state->active_pipe_changes) { - realloc_pipes = ~0; - intel_state->wm_results.dirty_pipes = ~0; - } + struct intel_crtc *crtc; + struct intel_crtc_state *cstate; + int ret, i; - /* - * We're not recomputing for the pipes not included in the commit, so - * make sure we start with the current state. - */ memcpy(ddb, &dev_priv->wm.skl_hw.ddb, sizeof(*ddb)); - for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { - struct intel_crtc_state *cstate; - - cstate = intel_atomic_get_crtc_state(state, intel_crtc); - if (IS_ERR(cstate)) - return PTR_ERR(cstate); - + for_each_new_intel_crtc_in_state(intel_state, crtc, cstate, i) { ret = skl_allocate_pipe_ddb(cstate, ddb); if (ret) return ret; @@ -5042,14 +5142,15 @@ skl_compute_ddb(struct drm_atomic_state *state) } static void -skl_copy_wm_for_pipe(struct skl_wm_values *dst, - struct skl_wm_values *src, - enum pipe pipe) +skl_copy_ddb_for_pipe(struct skl_ddb_values *dst, + struct skl_ddb_values *src, + enum pipe pipe) { - memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], - sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.uv_plane[pipe], src->ddb.uv_plane[pipe], + sizeof(dst->ddb.uv_plane[pipe])); memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], sizeof(dst->ddb.plane[pipe])); + dst->ddb.enabled_slices = src->ddb.enabled_slices; } static void @@ -5090,23 +5191,23 @@ skl_print_wm_changes(const struct drm_atomic_state *state) } static int -skl_compute_wm(struct drm_atomic_state *state) +skl_ddb_add_affected_pipes(struct drm_atomic_state *state, bool *changed) { - struct drm_crtc *crtc; - struct drm_crtc_state *cstate; - struct intel_atomic_state *intel_state = to_intel_atomic_state(state); - struct skl_wm_values *results = &intel_state->wm_results; struct drm_device *dev = state->dev; - struct skl_pipe_wm *pipe_wm; - bool changed = false; + const struct drm_i915_private *dev_priv = to_i915(dev); + const struct drm_crtc *crtc; + const struct drm_crtc_state *cstate; + struct intel_crtc *intel_crtc; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + uint32_t realloc_pipes = pipes_modified(state); int ret, i; /* * When we distrust bios wm we always need to recompute to set the * expected DDB allocations for each CRTC. */ - if (to_i915(dev)->wm.distrust_bios_wm) - changed = true; + if (dev_priv->wm.distrust_bios_wm) + (*changed) = true; /* * If this transaction isn't actually touching any CRTC's, don't @@ -5117,14 +5218,86 @@ skl_compute_wm(struct drm_atomic_state *state) * hold _all_ CRTC state mutexes. */ for_each_new_crtc_in_state(state, crtc, cstate, i) - changed = true; + (*changed) = true; - if (!changed) + if (!*changed) return 0; + /* + * If this is our first atomic update following hardware readout, + * we can't trust the DDB that the BIOS programmed for us. Let's + * pretend that all pipes switched active status so that we'll + * ensure a full DDB recompute. + */ + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + + intel_state->active_pipe_changes = ~0; + + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + + /* + * If the modeset changes which CRTC's are active, we need to + * recompute the DDB allocation for *all* active pipes, even + * those that weren't otherwise being modified in any way by this + * atomic commit. Due to the shrinking of the per-pipe allocations + * when new active CRTC's are added, it's possible for a pipe that + * we were already using and aren't changing at all here to suddenly + * become invalid if its DDB needs exceeds its new allocation. + * + * Note that if we wind up doing a full DDB recompute, we can't let + * any other display updates race with this transaction, so we need + * to grab the lock on *all* CRTC's. + */ + if (intel_state->active_pipe_changes) { + realloc_pipes = ~0; + intel_state->wm_results.dirty_pipes = ~0; + } + + /* + * We're not recomputing for the pipes not included in the commit, so + * make sure we start with the current state. + */ + for_each_intel_crtc_mask(dev, intel_crtc, realloc_pipes) { + struct intel_crtc_state *cstate; + + cstate = intel_atomic_get_crtc_state(state, intel_crtc); + if (IS_ERR(cstate)) + return PTR_ERR(cstate); + } + + return 0; +} + +static int +skl_compute_wm(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *cstate; + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct skl_ddb_values *results = &intel_state->wm_results; + struct skl_pipe_wm *pipe_wm; + bool changed = false; + int ret, i; + /* Clear all dirty flags */ results->dirty_pipes = 0; + ret = skl_ddb_add_affected_pipes(state, &changed); + if (ret || !changed) + return ret; + ret = skl_compute_ddb(state); if (ret) return ret; @@ -5197,8 +5370,8 @@ static void skl_initial_wm(struct intel_atomic_state *state, struct intel_crtc *intel_crtc = to_intel_crtc(cstate->base.crtc); struct drm_device *dev = intel_crtc->base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - struct skl_wm_values *results = &state->wm_results; - struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; + struct skl_ddb_values *results = &state->wm_results; + struct skl_ddb_values *hw_vals = &dev_priv->wm.skl_hw; enum pipe pipe = intel_crtc->pipe; if ((results->dirty_pipes & drm_crtc_mask(&intel_crtc->base)) == 0) @@ -5209,7 +5382,7 @@ static void skl_initial_wm(struct intel_atomic_state *state, if (cstate->base.active_changed) skl_atomic_update_crtc_wm(state, cstate); - skl_copy_wm_for_pipe(hw_vals, results, pipe); + skl_copy_ddb_for_pipe(hw_vals, results, pipe); mutex_unlock(&dev_priv->wm.wm_mutex); } @@ -5341,7 +5514,7 @@ void skl_pipe_wm_get_hw_state(struct drm_crtc *crtc, void skl_wm_get_hw_state(struct drm_device *dev) { struct drm_i915_private *dev_priv = to_i915(dev); - struct skl_wm_values *hw = &dev_priv->wm.skl_hw; + struct skl_ddb_values *hw = &dev_priv->wm.skl_hw; struct skl_ddb_allocation *ddb = &dev_priv->wm.skl_hw.ddb; struct drm_crtc *crtc; struct intel_crtc *intel_crtc; @@ -5362,8 +5535,12 @@ void skl_wm_get_hw_state(struct drm_device *dev) /* Fully recompute DDB on first atomic commit */ dev_priv->wm.distrust_bios_wm = true; } else { - /* Easy/common case; just sanitize DDB now if everything off */ - memset(ddb, 0, sizeof(*ddb)); + /* + * Easy/common case; just sanitize DDB now if everything off + * Keep dbuf slice info intact + */ + memset(ddb->plane, 0, sizeof(ddb->plane)); + memset(ddb->uv_plane, 0, sizeof(ddb->uv_plane)); } } @@ -6572,7 +6749,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) rps->efficient_freq = rps->rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -6585,7 +6762,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) rps->max_freq); } - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -6890,15 +7067,18 @@ static void gen6_enable_rps(struct drm_i915_private *dev_priv) static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) { struct intel_rps *rps = &dev_priv->gt_pm.rps; - int min_freq = 15; + const int min_freq = 15; + const int scaling_factor = 180; unsigned int gpu_freq; unsigned int max_ia_freq, min_ring_freq; unsigned int max_gpu_freq, min_gpu_freq; - int scaling_factor = 180; struct cpufreq_policy *policy; WARN_ON(!mutex_is_locked(&dev_priv->pcu_lock)); + if (rps->max_freq <= rps->min_freq) + return; + policy = cpufreq_cpu_get(0); if (policy) { max_ia_freq = policy->cpuinfo.max_freq; @@ -6918,13 +7098,12 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + min_gpu_freq = rps->min_freq; + max_gpu_freq = rps->max_freq; + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* Convert GT frequency to 50 HZ units */ - min_gpu_freq = rps->min_freq / GEN9_FREQ_SCALER; - max_gpu_freq = rps->max_freq / GEN9_FREQ_SCALER; - } else { - min_gpu_freq = rps->min_freq; - max_gpu_freq = rps->max_freq; + min_gpu_freq /= GEN9_FREQ_SCALER; + max_gpu_freq /= GEN9_FREQ_SCALER; } /* @@ -6933,10 +7112,10 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) * the PCU should use as a reference to determine the ring frequency. */ for (gpu_freq = max_gpu_freq; gpu_freq >= min_gpu_freq; gpu_freq--) { - int diff = max_gpu_freq - gpu_freq; + const int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || INTEL_GEN(dev_priv) >= 10) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -8026,10 +8205,10 @@ void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv) dev_priv->gt_pm.rc6.enabled = true; /* force RC6 disabling */ intel_disable_gt_powersave(dev_priv); - if (INTEL_GEN(dev_priv) < 11) - gen6_reset_rps_interrupts(dev_priv); + if (INTEL_GEN(dev_priv) >= 11) + gen11_reset_rps_interrupts(dev_priv); else - WARN_ON_ONCE(1); + gen6_reset_rps_interrupts(dev_priv); } static inline void intel_disable_llc_pstate(struct drm_i915_private *i915) @@ -8142,8 +8321,6 @@ static void intel_enable_rps(struct drm_i915_private *dev_priv) cherryview_enable_rps(dev_priv); } else if (IS_VALLEYVIEW(dev_priv)) { valleyview_enable_rps(dev_priv); - } else if (WARN_ON_ONCE(INTEL_GEN(dev_priv) >= 11)) { - /* TODO */ } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rps(dev_priv); } else if (IS_BROADWELL(dev_priv)) { @@ -8487,6 +8664,13 @@ static void gen8_set_l3sqc_credits(struct drm_i915_private *dev_priv, I915_WRITE(GEN7_MISCCPCTL, misccpctl); } +static void icl_init_clock_gating(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable to reduce Sampler power */ + I915_WRITE(GEN10_DFR_RATIO_EN_AND_CHICKEN, + I915_READ(GEN10_DFR_RATIO_EN_AND_CHICKEN) & ~DFR_DISABLE); +} + static void cnp_init_clock_gating(struct drm_i915_private *dev_priv) { if (!HAS_PCH_CNP(dev_priv)) @@ -9013,7 +9197,9 @@ static void nop_init_clock_gating(struct drm_i915_private *dev_priv) */ void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv) { - if (IS_CANNONLAKE(dev_priv)) + if (IS_ICELAKE(dev_priv)) + dev_priv->display.init_clock_gating = icl_init_clock_gating; + else if (IS_CANNONLAKE(dev_priv)) dev_priv->display.init_clock_gating = cnl_init_clock_gating; else if (IS_COFFEELAKE(dev_priv)) dev_priv->display.init_clock_gating = cfl_init_clock_gating; diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c index 23175c5..db27f2f 100644 --- a/drivers/gpu/drm/i915/intel_psr.c +++ b/drivers/gpu/drm/i915/intel_psr.c @@ -93,7 +93,115 @@ static void psr_aux_io_power_put(struct intel_dp *intel_dp) intel_display_power_put(dev_priv, psr_aux_domain(intel_dp)); } -static bool intel_dp_get_y_cord_status(struct intel_dp *intel_dp) +void intel_psr_irq_control(struct drm_i915_private *dev_priv, bool debug) +{ + u32 debug_mask, mask; + + /* No PSR interrupts on VLV/CHV */ + if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) + return; + + mask = EDP_PSR_ERROR(TRANSCODER_EDP); + debug_mask = EDP_PSR_POST_EXIT(TRANSCODER_EDP) | + EDP_PSR_PRE_ENTRY(TRANSCODER_EDP); + + if (INTEL_GEN(dev_priv) >= 8) { + mask |= EDP_PSR_ERROR(TRANSCODER_A) | + EDP_PSR_ERROR(TRANSCODER_B) | + EDP_PSR_ERROR(TRANSCODER_C); + + debug_mask |= EDP_PSR_POST_EXIT(TRANSCODER_A) | + EDP_PSR_PRE_ENTRY(TRANSCODER_A) | + EDP_PSR_POST_EXIT(TRANSCODER_B) | + EDP_PSR_PRE_ENTRY(TRANSCODER_B) | + EDP_PSR_POST_EXIT(TRANSCODER_C) | + EDP_PSR_PRE_ENTRY(TRANSCODER_C); + } + + if (debug) + mask |= debug_mask; + + WRITE_ONCE(dev_priv->psr.debug, debug); + I915_WRITE(EDP_PSR_IMR, ~mask); +} + +static void psr_event_print(u32 val, bool psr2_enabled) +{ + DRM_DEBUG_KMS("PSR exit events: 0x%x\n", val); + if (val & PSR_EVENT_PSR2_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR2 watchdog timer expired\n"); + if ((val & PSR_EVENT_PSR2_DISABLED) && psr2_enabled) + DRM_DEBUG_KMS("\tPSR2 disabled\n"); + if (val & PSR_EVENT_SU_DIRTY_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU dirty FIFO underrun\n"); + if (val & PSR_EVENT_SU_CRC_FIFO_UNDERRUN) + DRM_DEBUG_KMS("\tSU CRC FIFO underrun\n"); + if (val & PSR_EVENT_GRAPHICS_RESET) + DRM_DEBUG_KMS("\tGraphics reset\n"); + if (val & PSR_EVENT_PCH_INTERRUPT) + DRM_DEBUG_KMS("\tPCH interrupt\n"); + if (val & PSR_EVENT_MEMORY_UP) + DRM_DEBUG_KMS("\tMemory up\n"); + if (val & PSR_EVENT_FRONT_BUFFER_MODIFY) + DRM_DEBUG_KMS("\tFront buffer modification\n"); + if (val & PSR_EVENT_WD_TIMER_EXPIRE) + DRM_DEBUG_KMS("\tPSR watchdog timer expired\n"); + if (val & PSR_EVENT_PIPE_REGISTERS_UPDATE) + DRM_DEBUG_KMS("\tPIPE registers updated\n"); + if (val & PSR_EVENT_REGISTER_UPDATE) + DRM_DEBUG_KMS("\tRegister updated\n"); + if (val & PSR_EVENT_HDCP_ENABLE) + DRM_DEBUG_KMS("\tHDCP enabled\n"); + if (val & PSR_EVENT_KVMR_SESSION_ENABLE) + DRM_DEBUG_KMS("\tKVMR session enabled\n"); + if (val & PSR_EVENT_VBI_ENABLE) + DRM_DEBUG_KMS("\tVBI enabled\n"); + if (val & PSR_EVENT_LPSP_MODE_EXIT) + DRM_DEBUG_KMS("\tLPSP mode exited\n"); + if ((val & PSR_EVENT_PSR_DISABLE) && !psr2_enabled) + DRM_DEBUG_KMS("\tPSR disabled\n"); +} + +void intel_psr_irq_handler(struct drm_i915_private *dev_priv, u32 psr_iir) +{ + u32 transcoders = BIT(TRANSCODER_EDP); + enum transcoder cpu_transcoder; + ktime_t time_ns = ktime_get(); + + if (INTEL_GEN(dev_priv) >= 8) + transcoders |= BIT(TRANSCODER_A) | + BIT(TRANSCODER_B) | + BIT(TRANSCODER_C); + + for_each_cpu_transcoder_masked(dev_priv, cpu_transcoder, transcoders) { + /* FIXME: Exit PSR and link train manually when this happens. */ + if (psr_iir & EDP_PSR_ERROR(cpu_transcoder)) + DRM_DEBUG_KMS("[transcoder %s] PSR aux error\n", + transcoder_name(cpu_transcoder)); + + if (psr_iir & EDP_PSR_PRE_ENTRY(cpu_transcoder)) { + dev_priv->psr.last_entry_attempt = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR entry attempt in 2 vblanks\n", + transcoder_name(cpu_transcoder)); + } + + if (psr_iir & EDP_PSR_POST_EXIT(cpu_transcoder)) { + dev_priv->psr.last_exit = time_ns; + DRM_DEBUG_KMS("[transcoder %s] PSR exit completed\n", + transcoder_name(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) >= 9) { + u32 val = I915_READ(PSR_EVENT(cpu_transcoder)); + bool psr2_enabled = dev_priv->psr.psr2_enabled; + + I915_WRITE(PSR_EVENT(cpu_transcoder), val); + psr_event_print(val, psr2_enabled); + } + } + } +} + +static bool intel_dp_get_y_coord_required(struct intel_dp *intel_dp) { uint8_t psr_caps = 0; @@ -122,6 +230,18 @@ static bool intel_dp_get_alpm_status(struct intel_dp *intel_dp) return alpm_caps & DP_ALPM_CAP; } +static u8 intel_dp_get_sink_sync_latency(struct intel_dp *intel_dp) +{ + u8 val = 0; + + if (drm_dp_dpcd_readb(&intel_dp->aux, + DP_SYNCHRONIZATION_LATENCY_IN_SINK, &val) == 1) + val &= DP_MAX_RESYNC_FRAME_COUNT_MASK; + else + DRM_ERROR("Unable to get sink synchronization latency\n"); + return val; +} + void intel_psr_init_dpcd(struct intel_dp *intel_dp) { struct drm_i915_private *dev_priv = @@ -130,33 +250,36 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp) drm_dp_dpcd_read(&intel_dp->aux, DP_PSR_SUPPORT, intel_dp->psr_dpcd, sizeof(intel_dp->psr_dpcd)); - if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) { + if (intel_dp->psr_dpcd[0]) { dev_priv->psr.sink_support = true; DRM_DEBUG_KMS("Detected EDP PSR Panel.\n"); } if (INTEL_GEN(dev_priv) >= 9 && - (intel_dp->psr_dpcd[0] & DP_PSR2_IS_SUPPORTED)) { - uint8_t frame_sync_cap; + (intel_dp->psr_dpcd[0] == DP_PSR2_WITH_Y_COORD_IS_SUPPORTED)) { + /* + * All panels that supports PSR version 03h (PSR2 + + * Y-coordinate) can handle Y-coordinates in VSC but we are + * only sure that it is going to be used when required by the + * panel. This way panel is capable to do selective update + * without a aux frame sync. + * + * To support PSR version 02h and PSR version 03h without + * Y-coordinate requirement panels we would need to enable + * GTC first. + */ + dev_priv->psr.sink_psr2_support = + intel_dp_get_y_coord_required(intel_dp); + DRM_DEBUG_KMS("PSR2 %s on sink", dev_priv->psr.sink_psr2_support + ? "supported" : "not supported"); - dev_priv->psr.sink_support = true; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CAP, - &frame_sync_cap) != 1) - frame_sync_cap = 0; - dev_priv->psr.aux_frame_sync = frame_sync_cap & DP_AUX_FRAME_SYNC_CAP; - /* PSR2 needs frame sync as well */ - dev_priv->psr.psr2_support = dev_priv->psr.aux_frame_sync; - DRM_DEBUG_KMS("PSR2 %s on sink", - dev_priv->psr.psr2_support ? "supported" : "not supported"); - - if (dev_priv->psr.psr2_support) { - dev_priv->psr.y_cord_support = - intel_dp_get_y_cord_status(intel_dp); + if (dev_priv->psr.sink_psr2_support) { dev_priv->psr.colorimetry_support = intel_dp_get_colorimetry_status(intel_dp); dev_priv->psr.alpm = intel_dp_get_alpm_status(intel_dp); + dev_priv->psr.sink_sync_latency = + intel_dp_get_sink_sync_latency(intel_dp); } } } @@ -193,21 +316,17 @@ static void hsw_psr_setup_vsc(struct intel_dp *intel_dp, struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); struct edp_vsc_psr psr_vsc; - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { /* Prepare VSC Header for SU as per EDP 1.4 spec, Table 6.11 */ memset(&psr_vsc, 0, sizeof(psr_vsc)); psr_vsc.sdp_header.HB0 = 0; psr_vsc.sdp_header.HB1 = 0x7; - if (dev_priv->psr.colorimetry_support && - dev_priv->psr.y_cord_support) { + if (dev_priv->psr.colorimetry_support) { psr_vsc.sdp_header.HB2 = 0x5; psr_vsc.sdp_header.HB3 = 0x13; - } else if (dev_priv->psr.y_cord_support) { + } else { psr_vsc.sdp_header.HB2 = 0x4; psr_vsc.sdp_header.HB3 = 0xe; - } else { - psr_vsc.sdp_header.HB2 = 0x3; - psr_vsc.sdp_header.HB3 = 0xc; } } else { /* Prepare VSC packet as per EDP 1.3 spec, Table 3.10 */ @@ -228,31 +347,12 @@ static void vlv_psr_enable_sink(struct intel_dp *intel_dp) DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE); } -static i915_reg_t psr_aux_ctl_reg(struct drm_i915_private *dev_priv, - enum port port) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DP_AUX_CH_CTL(port); - else - return EDP_PSR_AUX_CTL; -} - -static i915_reg_t psr_aux_data_reg(struct drm_i915_private *dev_priv, - enum port port, int index) -{ - if (INTEL_GEN(dev_priv) >= 9) - return DP_AUX_CH_DATA(port, index); - else - return EDP_PSR_AUX_DATA(index); -} - -static void hsw_psr_enable_sink(struct intel_dp *intel_dp) +static void hsw_psr_setup_aux(struct intel_dp *intel_dp) { struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); - struct drm_device *dev = dig_port->base.base.dev; - struct drm_i915_private *dev_priv = to_i915(dev); - uint32_t aux_clock_divider; - i915_reg_t aux_ctl_reg; + struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev); + u32 aux_clock_divider, aux_ctl; + int i; static const uint8_t aux_msg[] = { [0] = DP_AUX_NATIVE_WRITE << 4, [1] = DP_SET_POWER >> 8, @@ -260,41 +360,47 @@ static void hsw_psr_enable_sink(struct intel_dp *intel_dp) [3] = 1 - 1, [4] = DP_SET_POWER_D0, }; - enum port port = dig_port->base.port; - u32 aux_ctl; - int i; + u32 psr_aux_mask = EDP_PSR_AUX_CTL_TIME_OUT_MASK | + EDP_PSR_AUX_CTL_MESSAGE_SIZE_MASK | + EDP_PSR_AUX_CTL_PRECHARGE_2US_MASK | + EDP_PSR_AUX_CTL_BIT_CLOCK_2X_MASK; BUILD_BUG_ON(sizeof(aux_msg) > 20); + for (i = 0; i < sizeof(aux_msg); i += 4) + I915_WRITE(EDP_PSR_AUX_DATA(i >> 2), + intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); aux_clock_divider = intel_dp->get_aux_clock_divider(intel_dp, 0); - /* Enable AUX frame sync at sink */ - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - DP_AUX_FRAME_SYNC_ENABLE); + /* Start with bits set for DDI_AUX_CTL register */ + aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg), + aux_clock_divider); + + /* Select only valid bits for SRD_AUX_CTL */ + aux_ctl &= psr_aux_mask; + I915_WRITE(EDP_PSR_AUX_CTL, aux_ctl); +} + +static void hsw_psr_enable_sink(struct intel_dp *intel_dp) +{ + struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp); + struct drm_device *dev = dig_port->base.base.dev; + struct drm_i915_private *dev_priv = to_i915(dev); + u8 dpcd_val = DP_PSR_ENABLE; + /* Enable ALPM at sink for psr2 */ - if (dev_priv->psr.psr2_support && dev_priv->psr.alpm) + if (dev_priv->psr.psr2_enabled && dev_priv->psr.alpm) drm_dp_dpcd_writeb(&intel_dp->aux, DP_RECEIVER_ALPM_CONFIG, DP_ALPM_ENABLE); - if (dev_priv->psr.link_standby) - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, - DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE); - else - drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, - DP_PSR_ENABLE); - - aux_ctl_reg = psr_aux_ctl_reg(dev_priv, port); - /* Setup AUX registers */ - for (i = 0; i < sizeof(aux_msg); i += 4) - I915_WRITE(psr_aux_data_reg(dev_priv, port, i >> 2), - intel_dp_pack_aux(&aux_msg[i], sizeof(aux_msg) - i)); + if (dev_priv->psr.psr2_enabled) + dpcd_val |= DP_PSR_ENABLE_PSR2; + if (dev_priv->psr.link_standby) + dpcd_val |= DP_PSR_MAIN_LINK_ACTIVE; + drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG, dpcd_val); - aux_ctl = intel_dp->get_aux_send_ctl(intel_dp, 0, sizeof(aux_msg), - aux_clock_divider); - I915_WRITE(aux_ctl_reg, aux_ctl); + drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER, DP_SET_POWER_D0); } static void vlv_psr_enable_source(struct intel_dp *intel_dp, @@ -396,25 +502,16 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp) * with the 5 or 6 idle patterns. */ uint32_t idle_frames = max(6, dev_priv->vbt.psr.idle_frames); - uint32_t val; - uint8_t sink_latency; - - val = idle_frames << EDP_PSR_IDLE_FRAME_SHIFT; + u32 val = idle_frames << EDP_PSR2_IDLE_FRAME_SHIFT; /* FIXME: selective update is probably totally broken because it doesn't * mesh at all with our frontbuffer tracking. And the hw alone isn't * good enough. */ - val |= EDP_PSR2_ENABLE | - EDP_SU_TRACK_ENABLE; + val |= EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE; + if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) + val |= EDP_Y_COORDINATE_ENABLE; - if (drm_dp_dpcd_readb(&intel_dp->aux, - DP_SYNCHRONIZATION_LATENCY_IN_SINK, - &sink_latency) == 1) { - sink_latency &= DP_MAX_RESYNC_FRAME_COUNT_MASK; - } else { - sink_latency = 0; - } - val |= EDP_PSR2_FRAME_BEFORE_SU(sink_latency + 1); + val |= EDP_PSR2_FRAME_BEFORE_SU(dev_priv->psr.sink_sync_latency + 1); if (dev_priv->vbt.psr.tp2_tp3_wakeup_time > 5) val |= EDP_PSR2_TP2_TIME_2500; @@ -440,7 +537,7 @@ static void hsw_psr_activate(struct intel_dp *intel_dp) */ /* psr1 and psr2 are mutually exclusive.*/ - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) hsw_activate_psr2(intel_dp); else hsw_activate_psr1(intel_dp); @@ -460,7 +557,7 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, * dynamically during PSR enable, and extracted from sink * caps during eDP detection. */ - if (!dev_priv->psr.psr2_support) + if (!dev_priv->psr.sink_psr2_support) return false; if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) { @@ -478,15 +575,6 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp, return false; } - /* - * FIXME:enable psr2 only for y-cordinate psr2 panels - * After gtc implementation , remove this restriction. - */ - if (!dev_priv->psr.y_cord_support) { - DRM_DEBUG_KMS("PSR2 not enabled, panel does not support Y coordinate\n"); - return false; - } - return true; } @@ -568,7 +656,7 @@ static void intel_psr_activate(struct intel_dp *intel_dp) struct drm_device *dev = intel_dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); @@ -586,14 +674,24 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, struct drm_device *dev = dig_port->base.base.dev; struct drm_i915_private *dev_priv = to_i915(dev); enum transcoder cpu_transcoder = crtc_state->cpu_transcoder; - u32 chicken; psr_aux_io_power_get(intel_dp); - if (dev_priv->psr.psr2_support) { - chicken = PSR2_VSC_ENABLE_PROG_HEADER; - if (dev_priv->psr.y_cord_support) - chicken |= PSR2_ADD_VERTICAL_LINE_COUNT; + /* Only HSW and BDW have PSR AUX registers that need to be setup. SKL+ + * use hardcoded values PSR AUX transactions + */ + if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) + hsw_psr_setup_aux(intel_dp); + + if (dev_priv->psr.psr2_enabled) { + u32 chicken = I915_READ(CHICKEN_TRANS(cpu_transcoder)); + + if (INTEL_GEN(dev_priv) == 9 && !IS_GEMINILAKE(dev_priv)) + chicken |= (PSR2_VSC_ENABLE_PROG_HEADER + | PSR2_ADD_VERTICAL_LINE_COUNT); + + else + chicken &= ~VSC_DATA_SEL_SOFTWARE_CONTROL; I915_WRITE(CHICKEN_TRANS(cpu_transcoder), chicken); I915_WRITE(EDP_PSR_DEBUG, @@ -613,7 +711,8 @@ static void hsw_psr_enable_source(struct intel_dp *intel_dp, I915_WRITE(EDP_PSR_DEBUG, EDP_PSR_DEBUG_MASK_MEMUP | EDP_PSR_DEBUG_MASK_HPD | - EDP_PSR_DEBUG_MASK_LPSP); + EDP_PSR_DEBUG_MASK_LPSP | + EDP_PSR_DEBUG_MASK_DISP_REG_WRITE); } } @@ -644,7 +743,7 @@ void intel_psr_enable(struct intel_dp *intel_dp, goto unlock; } - dev_priv->psr.psr2_support = crtc_state->has_psr2; + dev_priv->psr.psr2_enabled = crtc_state->has_psr2; dev_priv->psr.busy_frontbuffer_bits = 0; dev_priv->psr.setup_vsc(intel_dp, crtc_state); @@ -714,12 +813,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, i915_reg_t psr_status; u32 psr_status_mask; - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - 0); - - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { psr_status = EDP_PSR2_STATUS; psr_status_mask = EDP_PSR2_STATUS_STATE_MASK; @@ -743,7 +837,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp, dev_priv->psr.active = false; } else { - if (dev_priv->psr.psr2_support) + if (dev_priv->psr.psr2_enabled) WARN_ON(I915_READ(EDP_PSR2_CTL) & EDP_PSR2_ENABLE); else WARN_ON(I915_READ(EDP_PSR_CTL) & EDP_PSR_ENABLE); @@ -789,53 +883,59 @@ void intel_psr_disable(struct intel_dp *intel_dp, cancel_delayed_work_sync(&dev_priv->psr.work); } -static void intel_psr_work(struct work_struct *work) +static bool psr_wait_for_idle(struct drm_i915_private *dev_priv) { - struct drm_i915_private *dev_priv = - container_of(work, typeof(*dev_priv), psr.work.work); - struct intel_dp *intel_dp = dev_priv->psr.enabled; - struct drm_crtc *crtc = dp_to_dig_port(intel_dp)->base.base.crtc; - enum pipe pipe = to_intel_crtc(crtc)->pipe; + struct intel_dp *intel_dp; + i915_reg_t reg; + u32 mask; + int err; + + intel_dp = dev_priv->psr.enabled; + if (!intel_dp) + return false; - /* We have to make sure PSR is ready for re-enable - * otherwise it keeps disabled until next full enable/disable cycle. - * PSR might take some time to get fully disabled - * and be ready for re-enable. - */ if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.psr2_support) { - if (intel_wait_for_register(dev_priv, - EDP_PSR2_STATUS, - EDP_PSR2_STATUS_STATE_MASK, - 0, - 50)) { - DRM_ERROR("Timed out waiting for PSR2 Idle for re-enable\n"); - return; - } + if (dev_priv->psr.psr2_enabled) { + reg = EDP_PSR2_STATUS; + mask = EDP_PSR2_STATUS_STATE_MASK; } else { - if (intel_wait_for_register(dev_priv, - EDP_PSR_STATUS, - EDP_PSR_STATUS_STATE_MASK, - 0, - 50)) { - DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); - return; - } + reg = EDP_PSR_STATUS; + mask = EDP_PSR_STATUS_STATE_MASK; } } else { - if (intel_wait_for_register(dev_priv, - VLV_PSRSTAT(pipe), - VLV_EDP_PSR_IN_TRANS, - 0, - 1)) { - DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); - return; - } + struct drm_crtc *crtc = + dp_to_dig_port(intel_dp)->base.base.crtc; + enum pipe pipe = to_intel_crtc(crtc)->pipe; + + reg = VLV_PSRSTAT(pipe); + mask = VLV_EDP_PSR_IN_TRANS; } + + mutex_unlock(&dev_priv->psr.lock); + + err = intel_wait_for_register(dev_priv, reg, mask, 0, 50); + if (err) + DRM_ERROR("Timed out waiting for PSR Idle for re-enable\n"); + + /* After the unlocked wait, verify that PSR is still wanted! */ mutex_lock(&dev_priv->psr.lock); - intel_dp = dev_priv->psr.enabled; + return err == 0 && dev_priv->psr.enabled; +} - if (!intel_dp) +static void intel_psr_work(struct work_struct *work) +{ + struct drm_i915_private *dev_priv = + container_of(work, typeof(*dev_priv), psr.work.work); + + mutex_lock(&dev_priv->psr.lock); + + /* + * We have to make sure PSR is ready for re-enable + * otherwise it keeps disabled until next full enable/disable cycle. + * PSR might take some time to get fully disabled + * and be ready for re-enable. + */ + if (!psr_wait_for_idle(dev_priv)) goto unlock; /* @@ -846,7 +946,7 @@ static void intel_psr_work(struct work_struct *work) if (dev_priv->psr.busy_frontbuffer_bits) goto unlock; - intel_psr_activate(intel_dp); + intel_psr_activate(dev_priv->psr.enabled); unlock: mutex_unlock(&dev_priv->psr.lock); } @@ -862,11 +962,7 @@ static void intel_psr_exit(struct drm_i915_private *dev_priv) return; if (HAS_DDI(dev_priv)) { - if (dev_priv->psr.aux_frame_sync) - drm_dp_dpcd_writeb(&intel_dp->aux, - DP_SINK_DEVICE_AUX_FRAME_SYNC_CONF, - 0); - if (dev_priv->psr.psr2_support) { + if (dev_priv->psr.psr2_enabled) { val = I915_READ(EDP_PSR2_CTL); WARN_ON(!(val & EDP_PSR2_ENABLE)); I915_WRITE(EDP_PSR2_CTL, val & ~EDP_PSR2_ENABLE); @@ -957,6 +1053,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, * intel_psr_invalidate - Invalidade PSR * @dev_priv: i915 device * @frontbuffer_bits: frontbuffer plane tracking bits + * @origin: which operation caused the invalidate * * Since the hardware frontbuffer tracking has gaps we need to integrate * with the software frontbuffer tracking. This function gets called every @@ -966,7 +1063,7 @@ void intel_psr_single_frame_update(struct drm_i915_private *dev_priv, * Dirty frontbuffers relevant to PSR are tracked in busy_frontbuffer_bits." */ void intel_psr_invalidate(struct drm_i915_private *dev_priv, - unsigned frontbuffer_bits) + unsigned frontbuffer_bits, enum fb_op_origin origin) { struct drm_crtc *crtc; enum pipe pipe; @@ -974,6 +1071,9 @@ void intel_psr_invalidate(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; + if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -1014,6 +1114,9 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, if (!CAN_PSR(dev_priv)) return; + if (dev_priv->psr.has_hw_tracking && origin == ORIGIN_FLIP) + return; + mutex_lock(&dev_priv->psr.lock); if (!dev_priv->psr.enabled) { mutex_unlock(&dev_priv->psr.lock); @@ -1027,8 +1130,23 @@ void intel_psr_flush(struct drm_i915_private *dev_priv, dev_priv->psr.busy_frontbuffer_bits &= ~frontbuffer_bits; /* By definition flush = invalidate + flush */ - if (frontbuffer_bits) - intel_psr_exit(dev_priv); + if (frontbuffer_bits) { + if (dev_priv->psr.psr2_enabled || + IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) { + intel_psr_exit(dev_priv); + } else { + /* + * Display WA #0884: all + * This documented WA for bxt can be safely applied + * broadly so we can force HW tracking to exit PSR + * instead of disabling and re-enabling. + * Workaround tells us to write 0 to CUR_SURFLIVE_A, + * but it makes more sense write to the current active + * pipe. + */ + I915_WRITE(CURSURFLIVE(pipe), 0); + } + } if (!dev_priv->psr.active && !dev_priv->psr.busy_frontbuffer_bits) if (!work_busy(&dev_priv->psr.work.work)) @@ -1055,9 +1173,12 @@ void intel_psr_init(struct drm_i915_private *dev_priv) if (!dev_priv->psr.sink_support) return; - /* Per platform default: all disabled. */ - if (i915_modparams.enable_psr == -1) + if (i915_modparams.enable_psr == -1) { + i915_modparams.enable_psr = dev_priv->vbt.psr.enable; + + /* Per platform default: all disabled. */ i915_modparams.enable_psr = 0; + } /* Set link_standby x link_off defaults */ if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -1090,6 +1211,7 @@ void intel_psr_init(struct drm_i915_private *dev_priv) dev_priv->psr.activate = vlv_psr_activate; dev_priv->psr.setup_vsc = vlv_psr_setup_vsc; } else { + dev_priv->psr.has_hw_tracking = true; dev_priv->psr.enable_source = hsw_psr_enable_source; dev_priv->psr.disable_source = hsw_psr_disable; dev_priv->psr.enable_sink = hsw_psr_enable_sink; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 1d59952..8f19349 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -36,6 +36,7 @@ #include "i915_gem_render_state.h" #include "i915_trace.h" #include "intel_drv.h" +#include "intel_workarounds.h" /* Rough estimate of the typical request size, performing a flush, * set-context and then emitting the batch. @@ -557,7 +558,8 @@ static void reset_ring_common(struct intel_engine_cs *engine, */ if (request) { struct drm_i915_private *dev_priv = request->i915; - struct intel_context *ce = &request->ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(request->ctx, + engine); struct i915_hw_ppgtt *ppgtt; if (ce->state) { @@ -599,7 +601,7 @@ static int intel_rcs_ctx_init(struct i915_request *rq) { int ret; - ret = intel_ring_workarounds_emit(rq); + ret = intel_ctx_workarounds_emit(rq); if (ret != 0) return ret; @@ -617,6 +619,8 @@ static int init_render_ring(struct intel_engine_cs *engine) if (ret) return ret; + intel_whitelist_workarounds_apply(engine); + /* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ if (IS_GEN(dev_priv, 4, 6)) I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); @@ -658,7 +662,7 @@ static int init_render_ring(struct intel_engine_cs *engine) if (INTEL_GEN(dev_priv) >= 6) I915_WRITE_IMR(engine, ~engine->irq_keep_mask); - return init_workarounds_ring(engine); + return 0; } static u32 *gen6_signal(struct i915_request *rq, u32 *cs) @@ -693,17 +697,17 @@ static void cancel_requests(struct intel_engine_cs *engine) struct i915_request *request; unsigned long flags; - spin_lock_irqsave(&engine->timeline->lock, flags); + spin_lock_irqsave(&engine->timeline.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->timeline->requests, link) { + list_for_each_entry(request, &engine->timeline.requests, link) { GEM_BUG_ON(!request->global_seqno); if (!i915_request_completed(request)) dma_fence_set_error(&request->fence, -EIO); } /* Remaining _unready_ requests will be nop'ed when submitted */ - spin_unlock_irqrestore(&engine->timeline->lock, flags); + spin_unlock_irqrestore(&engine->timeline.lock, flags); } static void i9xx_submit_request(struct i915_request *request) @@ -1062,7 +1066,6 @@ err: void intel_ring_reset(struct intel_ring *ring, u32 tail) { - GEM_BUG_ON(!list_empty(&ring->request_list)); ring->tail = tail; ring->head = tail; ring->emit = tail; @@ -1114,19 +1117,24 @@ err: } struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size) +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size) { struct intel_ring *ring; struct i915_vma *vma; GEM_BUG_ON(!is_power_of_2(size)); GEM_BUG_ON(RING_CTL_SIZE(size) & ~RING_NR_PAGES); + GEM_BUG_ON(timeline == &engine->timeline); + lockdep_assert_held(&engine->i915->drm.struct_mutex); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&ring->request_list); + ring->timeline = i915_timeline_get(timeline); ring->size = size; /* Workaround an erratum on the i830 which causes a hang if @@ -1157,12 +1165,13 @@ intel_ring_free(struct intel_ring *ring) i915_vma_close(ring->vma); __i915_gem_object_release_unless_active(obj); + i915_timeline_put(ring->timeline); kfree(ring); } -static int context_pin(struct i915_gem_context *ctx) +static int context_pin(struct intel_context *ce) { - struct i915_vma *vma = ctx->engine[RCS].state; + struct i915_vma *vma = ce->state; int ret; /* @@ -1253,7 +1262,7 @@ static struct intel_ring * intel_ring_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); int ret; lockdep_assert_held(&ctx->i915->drm.struct_mutex); @@ -1275,7 +1284,7 @@ intel_ring_context_pin(struct intel_engine_cs *engine, } if (ce->state) { - ret = context_pin(ctx); + ret = context_pin(ce); if (ret) goto err; @@ -1296,7 +1305,7 @@ err: static void intel_ring_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - struct intel_context *ce = &ctx->engine[engine->id]; + struct intel_context *ce = to_intel_context(ctx, engine); lockdep_assert_held(&ctx->i915->drm.struct_mutex); GEM_BUG_ON(ce->pin_count == 0); @@ -1315,6 +1324,7 @@ static void intel_ring_context_unpin(struct intel_engine_cs *engine, static int intel_init_ring_buffer(struct intel_engine_cs *engine) { struct intel_ring *ring; + struct i915_timeline *timeline; int err; intel_engine_setup_common(engine); @@ -1323,7 +1333,14 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine) if (err) goto err; - ring = intel_engine_create_ring(engine, 32 * PAGE_SIZE); + timeline = i915_timeline_create(engine->i915, engine->name); + if (IS_ERR(timeline)) { + err = PTR_ERR(timeline); + goto err; + } + + ring = intel_engine_create_ring(engine, timeline, 32 * PAGE_SIZE); + i915_timeline_put(timeline); if (IS_ERR(ring)) { err = PTR_ERR(ring); goto err; @@ -1424,7 +1441,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags) *cs++ = MI_NOOP; *cs++ = MI_SET_CONTEXT; - *cs++ = i915_ggtt_offset(rq->ctx->engine[RCS].state) | flags; + *cs++ = i915_ggtt_offset(to_intel_context(rq->ctx, engine)->state) | flags; /* * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP * WaMiSetContext_Hang:snb,ivb,vlv @@ -1515,7 +1532,7 @@ static int switch_context(struct i915_request *rq) hw_flags = MI_FORCE_RESTORE; } - if (to_ctx->engine[engine->id].state && + if (to_intel_context(to_ctx, engine)->state && (to_ctx != from_ctx || hw_flags & MI_FORCE_RESTORE)) { GEM_BUG_ON(engine->id != RCS); @@ -1563,7 +1580,7 @@ static int ring_request_alloc(struct i915_request *request) { int ret; - GEM_BUG_ON(!request->ctx->engine[request->engine->id].pin_count); + GEM_BUG_ON(!to_intel_context(request->ctx, request->engine)->pin_count); /* Flush enough space to reduce the likelihood of waiting after * we start building the request - in which case we will just @@ -1593,6 +1610,7 @@ static noinline int wait_for_space(struct intel_ring *ring, unsigned int bytes) if (intel_ring_update_space(ring) >= bytes) return 0; + GEM_BUG_ON(list_empty(&ring->request_list)); list_for_each_entry(target, &ring->request_list, ring_link) { /* Would completion of this request free enough space? */ if (bytes <= __intel_ring_space(target->postfix, @@ -1692,17 +1710,18 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) need_wrap &= ~1; GEM_BUG_ON(need_wrap > ring->space); GEM_BUG_ON(ring->emit + need_wrap > ring->size); + GEM_BUG_ON(!IS_ALIGNED(need_wrap, sizeof(u64))); /* Fill the tail with MI_NOOP */ - memset(ring->vaddr + ring->emit, 0, need_wrap); - ring->emit = 0; + memset64(ring->vaddr + ring->emit, 0, need_wrap / sizeof(u64)); ring->space -= need_wrap; + ring->emit = 0; } GEM_BUG_ON(ring->emit > ring->size - bytes); GEM_BUG_ON(ring->space < bytes); cs = ring->vaddr + ring->emit; - GEM_DEBUG_EXEC(memset(cs, POISON_INUSE, bytes)); + GEM_DEBUG_EXEC(memset32(cs, POISON_INUSE, bytes / sizeof(*cs))); ring->emit += bytes; ring->space -= bytes; @@ -1712,22 +1731,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - u32 *cs; + int num_dwords; + void *cs; + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); - while (num_dwords--) - *cs++ = MI_NOOP; - + memset64(cs, (u64)MI_NOOP << 32 | MI_NOOP, num_dwords / 2); intel_ring_advance(rq, cs); + GEM_BUG_ON(rq->ring->emit & (CACHELINE_BYTES - 1)); return 0; } @@ -1943,8 +1964,6 @@ static void intel_ring_init_semaphores(struct drm_i915_private *dev_priv, static void intel_ring_init_irq(struct drm_i915_private *dev_priv, struct intel_engine_cs *engine) { - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << engine->irq_shift; - if (INTEL_GEN(dev_priv) >= 6) { engine->irq_enable = gen6_irq_enable; engine->irq_disable = gen6_irq_disable; @@ -2029,6 +2048,8 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine) if (HAS_L3_DPF(dev_priv)) engine->irq_keep_mask = GT_RENDER_L3_PARITY_ERROR_INTERRUPT; + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT; + if (INTEL_GEN(dev_priv) >= 6) { engine->init_context = intel_rcs_ctx_init; engine->emit_flush = gen7_render_ring_flush; @@ -2079,7 +2100,6 @@ int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine) engine->emit_flush = gen6_bsd_ring_flush; engine->irq_enable_mask = GT_BSD_USER_INTERRUPT; } else { - engine->mmio_base = BSD_RING_BASE; engine->emit_flush = bsd_ring_flush; if (IS_GEN5(dev_priv)) engine->irq_enable_mask = ILK_BSD_USER_INTERRUPT; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 0320c2c..010750e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -3,15 +3,19 @@ #define _INTEL_RINGBUFFER_H_ #include <linux/hashtable.h> +#include <linux/seqlock.h> #include "i915_gem_batch_pool.h" -#include "i915_gem_timeline.h" +#include "i915_reg.h" #include "i915_pmu.h" #include "i915_request.h" #include "i915_selftest.h" +#include "i915_timeline.h" +#include "intel_gpu_commands.h" struct drm_printer; +struct i915_sched_attr; #define I915_CMD_HASH_ORDER 9 @@ -84,7 +88,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a) } #define I915_MAX_SLICES 3 -#define I915_MAX_SUBSLICES 3 +#define I915_MAX_SUBSLICES 8 #define instdone_slice_mask(dev_priv__) \ (INTEL_GEN(dev_priv__) == 7 ? \ @@ -125,7 +129,9 @@ struct intel_ring { struct i915_vma *vma; void *vaddr; + struct i915_timeline *timeline; struct list_head request_list; + struct list_head active_link; u32 head; u32 tail; @@ -330,10 +336,10 @@ struct intel_engine_cs { u8 instance; u32 context_size; u32 mmio_base; - unsigned int irq_shift; struct intel_ring *buffer; - struct intel_timeline *timeline; + + struct i915_timeline timeline; struct drm_i915_gem_object *default_state; @@ -459,7 +465,8 @@ struct intel_engine_cs { * * Called under the struct_mutex. */ - void (*schedule)(struct i915_request *request, int priority); + void (*schedule)(struct i915_request *request, + const struct i915_sched_attr *attr); /* * Cancel all requests on the hardware, or queued for execution. @@ -561,6 +568,7 @@ struct intel_engine_cs { #define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) #define I915_ENGINE_SUPPORTS_STATS BIT(1) +#define I915_ENGINE_HAS_PREEMPTION BIT(2) unsigned int flags; /* @@ -591,7 +599,7 @@ struct intel_engine_cs { /** * @lock: Lock protecting the below fields. */ - spinlock_t lock; + seqlock_t lock; /** * @enabled: Reference count indicating number of listeners. */ @@ -620,16 +628,29 @@ struct intel_engine_cs { } stats; }; -static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine) +static inline bool +intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; } -static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine) +static inline bool +intel_engine_supports_stats(const struct intel_engine_cs *engine) { return engine->flags & I915_ENGINE_SUPPORTS_STATS; } +static inline bool +intel_engine_has_preemption(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_PREEMPTION; +} + +static inline bool __execlists_need_preempt(int prio, int last) +{ + return prio > max(0, last); +} + static inline void execlists_set_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -637,6 +658,13 @@ execlists_set_active(struct intel_engine_execlists *execlists, __set_bit(bit, (unsigned long *)&execlists->active); } +static inline bool +execlists_set_active_once(struct intel_engine_execlists *execlists, + unsigned int bit) +{ + return !__test_and_set_bit(bit, (unsigned long *)&execlists->active); +} + static inline void execlists_clear_active(struct intel_engine_execlists *execlists, unsigned int bit) @@ -651,6 +679,10 @@ execlists_is_active(const struct intel_engine_execlists *execlists, return test_bit(bit, (unsigned long *)&execlists->active); } +void execlists_user_begin(struct intel_engine_execlists *execlists, + const struct execlist_port *port); +void execlists_user_end(struct intel_engine_execlists *execlists); + void execlists_cancel_port_requests(struct intel_engine_execlists * const execlists); @@ -663,7 +695,7 @@ execlists_num_ports(const struct intel_engine_execlists * const execlists) return execlists->port_mask + 1; } -static inline void +static inline struct execlist_port * execlists_port_complete(struct intel_engine_execlists * const execlists, struct execlist_port * const port) { @@ -674,6 +706,8 @@ execlists_port_complete(struct intel_engine_execlists * const execlists, memmove(port, port + 1, m * sizeof(struct execlist_port)); memset(port + m, 0, sizeof(struct execlist_port)); + + return port; } static inline unsigned int @@ -736,7 +770,9 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) #define CNL_HWS_CSB_WRITE_INDEX 0x2f struct intel_ring * -intel_engine_create_ring(struct intel_engine_cs *engine, int size); +intel_engine_create_ring(struct intel_engine_cs *engine, + struct i915_timeline *timeline, + int size); int intel_ring_pin(struct intel_ring *ring, struct drm_i915_private *i915, unsigned int offset_bias); @@ -854,12 +890,9 @@ static inline u32 intel_engine_last_submit(struct intel_engine_cs *engine) * wtih serialising this hint with anything, so document it as * a hint and nothing more. */ - return READ_ONCE(engine->timeline->seqno); + return READ_ONCE(engine->timeline.seqno); } -int init_workarounds_ring(struct intel_engine_cs *engine); -int intel_ring_workarounds_emit(struct i915_request *rq); - void intel_engine_get_instdone(struct intel_engine_cs *engine, struct intel_instdone *instdone); @@ -939,7 +972,7 @@ bool intel_engine_add_wait(struct intel_engine_cs *engine, struct intel_wait *wait); void intel_engine_remove_wait(struct intel_engine_cs *engine, struct intel_wait *wait); -void intel_engine_enable_signaling(struct i915_request *request, bool wakeup); +bool intel_engine_enable_signaling(struct i915_request *request, bool wakeup); void intel_engine_cancel_signaling(struct i915_request *request); static inline bool intel_engine_has_waiter(const struct intel_engine_cs *engine) @@ -1037,7 +1070,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { if (engine->stats.active++ == 0) @@ -1045,7 +1078,7 @@ static inline void intel_engine_context_in(struct intel_engine_cs *engine) GEM_BUG_ON(engine->stats.active == 0); } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } static inline void intel_engine_context_out(struct intel_engine_cs *engine) @@ -1055,7 +1088,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) if (READ_ONCE(engine->stats.enabled) == 0) return; - spin_lock_irqsave(&engine->stats.lock, flags); + write_seqlock_irqsave(&engine->stats.lock, flags); if (engine->stats.enabled > 0) { ktime_t last; @@ -1082,7 +1115,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine) } } - spin_unlock_irqrestore(&engine->stats.lock, flags); + write_sequnlock_irqrestore(&engine->stats.lock, flags); } int intel_enable_engine_stats(struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 53ea564..53a6eaa 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -542,6 +542,29 @@ void gen9_sanitize_dc_state(struct drm_i915_private *dev_priv) dev_priv->csr.dc_state = val; } +/** + * gen9_set_dc_state - set target display C power state + * @dev_priv: i915 device instance + * @state: target DC power state + * - DC_STATE_DISABLE + * - DC_STATE_EN_UPTO_DC5 + * - DC_STATE_EN_UPTO_DC6 + * - DC_STATE_EN_DC9 + * + * Signal to DMC firmware/HW the target DC power state passed in @state. + * DMC/HW can turn off individual display clocks and power rails when entering + * a deeper DC power state (higher in number) and turns these back when exiting + * that state to a shallower power state (lower in number). The HW will decide + * when to actually enter a given state on an on-demand basis, for instance + * depending on the active state of display pipes. The state of display + * registers backed by affected power rails are saved/restored as needed. + * + * Based on the above enabling a deeper DC power state is asynchronous wrt. + * enabling it. Disabling a deeper power state is synchronous: for instance + * setting %DC_STATE_DISABLE won't complete until all HW resources are turned + * back on and register state is restored. This is guaranteed by the MMIO write + * to DC_STATE_EN blocking until the state is restored. + */ static void gen9_set_dc_state(struct drm_i915_private *dev_priv, uint32_t state) { uint32_t val; @@ -635,26 +658,18 @@ static void assert_can_enable_dc6(struct drm_i915_private *dev_priv) assert_csr_loaded(dev_priv); } -void skl_enable_dc6(struct drm_i915_private *dev_priv) +static void skl_enable_dc6(struct drm_i915_private *dev_priv) { assert_can_enable_dc6(dev_priv); DRM_DEBUG_KMS("Enabling DC6\n"); - gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); - -} - -void skl_disable_dc6(struct drm_i915_private *dev_priv) -{ - DRM_DEBUG_KMS("Disabling DC6\n"); - /* Wa Display #1183: skl,kbl,cfl */ if (IS_GEN9_BC(dev_priv)) I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) | SKL_SELECT_ALTERNATE_DC_EXIT); - gen9_set_dc_state(dev_priv, DC_STATE_DISABLE); + gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC6); } static void hsw_power_well_sync_hw(struct drm_i915_private *dev_priv, @@ -2627,32 +2642,69 @@ static void intel_power_domains_sync_hw(struct drm_i915_private *dev_priv) mutex_unlock(&power_domains->lock); } -static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +static inline +bool intel_dbuf_slice_set(struct drm_i915_private *dev_priv, + i915_reg_t reg, bool enable) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) | DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + u32 val, status; + val = I915_READ(reg); + val = enable ? (val | DBUF_POWER_REQUEST) : (val & ~DBUF_POWER_REQUEST); + I915_WRITE(reg, val); + POSTING_READ(reg); udelay(10); - if (!(I915_READ(DBUF_CTL) & DBUF_POWER_STATE)) - DRM_ERROR("DBuf power enable timeout\n"); + status = I915_READ(reg) & DBUF_POWER_STATE; + if ((enable && !status) || (!enable && status)) { + DRM_ERROR("DBus power %s timeout!\n", + enable ? "enable" : "disable"); + return false; + } + return true; +} + +static void gen9_dbuf_enable(struct drm_i915_private *dev_priv) +{ + intel_dbuf_slice_set(dev_priv, DBUF_CTL, true); } static void gen9_dbuf_disable(struct drm_i915_private *dev_priv) { - I915_WRITE(DBUF_CTL, I915_READ(DBUF_CTL) & ~DBUF_POWER_REQUEST); - POSTING_READ(DBUF_CTL); + intel_dbuf_slice_set(dev_priv, DBUF_CTL, false); +} - udelay(10); +static u8 intel_dbuf_max_slices(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 11) + return 1; + return 2; +} - if (I915_READ(DBUF_CTL) & DBUF_POWER_STATE) - DRM_ERROR("DBuf power disable timeout!\n"); +void icl_dbuf_slices_update(struct drm_i915_private *dev_priv, + u8 req_slices) +{ + u8 hw_enabled_slices = dev_priv->wm.skl_hw.ddb.enabled_slices; + u32 val; + bool ret; + + if (req_slices > intel_dbuf_max_slices(dev_priv)) { + DRM_ERROR("Invalid number of dbuf slices requested\n"); + return; + } + + if (req_slices == hw_enabled_slices || req_slices == 0) + return; + + val = I915_READ(DBUF_CTL_S2); + if (req_slices > hw_enabled_slices) + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, true); + else + ret = intel_dbuf_slice_set(dev_priv, DBUF_CTL_S2, false); + + if (ret) + dev_priv->wm.skl_hw.ddb.enabled_slices = req_slices; } -/* - * TODO: we shouldn't always enable DBUF_CTL_S2, we should only enable it when - * needed and keep it disabled as much as possible. - */ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) { I915_WRITE(DBUF_CTL_S1, I915_READ(DBUF_CTL_S1) | DBUF_POWER_REQUEST); @@ -2664,6 +2716,8 @@ static void icl_dbuf_enable(struct drm_i915_private *dev_priv) if (!(I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || !(I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power enable timeout\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 2; } static void icl_dbuf_disable(struct drm_i915_private *dev_priv) @@ -2677,6 +2731,8 @@ static void icl_dbuf_disable(struct drm_i915_private *dev_priv) if ((I915_READ(DBUF_CTL_S1) & DBUF_POWER_STATE) || (I915_READ(DBUF_CTL_S2) & DBUF_POWER_STATE)) DRM_ERROR("DBuf power disable timeout!\n"); + else + dev_priv->wm.skl_hw.ddb.enabled_slices = 0; } static void icl_mbus_init(struct drm_i915_private *dev_priv) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index 96e213e..2500502 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -2779,9 +2779,8 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo, return false; for (i = 0; i < intel_sdvo_connector->format_supported_num; i++) - drm_property_add_enum( - intel_sdvo_connector->tv_format, i, - i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); + drm_property_add_enum(intel_sdvo_connector->tv_format, i, + tv_format_names[intel_sdvo_connector->tv_format_supported[i]]); intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0]; drm_object_attach_property(&intel_sdvo_connector->base.base.base, diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c index dbdcf85..ee23613f 100644 --- a/drivers/gpu/drm/i915/intel_sprite.c +++ b/drivers/gpu/drm/i915/intel_sprite.c @@ -48,6 +48,7 @@ bool intel_format_is_yuv(u32 format) case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: case DRM_FORMAT_YVYU: + case DRM_FORMAT_NV12: return true; default: return false; @@ -130,7 +131,7 @@ void intel_pipe_update_start(const struct intel_crtc_state *new_crtc_state) if (scanline < min || scanline > max) break; - if (timeout <= 0) { + if (!timeout) { DRM_ERROR("Potential atomic update failure on pipe %c\n", pipe_name(crtc->pipe)); break; @@ -935,20 +936,11 @@ intel_check_sprite_plane(struct intel_plane *plane, struct drm_i915_private *dev_priv = to_i915(plane->base.dev); struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc); struct drm_framebuffer *fb = state->base.fb; - int crtc_x, crtc_y; - unsigned int crtc_w, crtc_h; - uint32_t src_x, src_y, src_w, src_h; - struct drm_rect *src = &state->base.src; - struct drm_rect *dst = &state->base.dst; - struct drm_rect clip = {}; int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384; - int hscale, vscale; int max_scale, min_scale; bool can_scale; int ret; - - *src = drm_plane_state_src(&state->base); - *dst = drm_plane_state_dest(&state->base); + uint32_t pixel_format = 0; if (!fb) { state->base.visible = false; @@ -969,11 +961,14 @@ intel_check_sprite_plane(struct intel_plane *plane, /* setup can_scale, min_scale, max_scale */ if (INTEL_GEN(dev_priv) >= 9) { + if (state->base.fb) + pixel_format = state->base.fb->format->format; /* use scaler when colorkey is not required */ if (!state->ckey.flags) { can_scale = 1; min_scale = 1; - max_scale = skl_max_scale(crtc, crtc_state); + max_scale = + skl_max_scale(crtc, crtc_state, pixel_format); } else { can_scale = 0; min_scale = DRM_PLANE_HELPER_NO_SCALING; @@ -985,64 +980,19 @@ intel_check_sprite_plane(struct intel_plane *plane, min_scale = plane->can_scale ? 1 : (1 << 16); } - /* - * FIXME the following code does a bunch of fuzzy adjustments to the - * coordinates and sizes. We probably need some way to decide whether - * more strict checking should be done instead. - */ - drm_rect_rotate(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(hscale < 0); - - vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale); - BUG_ON(vscale < 0); - - if (crtc_state->base.enable) - drm_mode_get_hv_timing(&crtc_state->base.mode, - &clip.x2, &clip.y2); - - state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale); - - crtc_x = dst->x1; - crtc_y = dst->y1; - crtc_w = drm_rect_width(dst); - crtc_h = drm_rect_height(dst); + ret = drm_atomic_helper_check_plane_state(&state->base, + &crtc_state->base, + min_scale, max_scale, + true, true); + if (ret) + return ret; if (state->base.visible) { - /* check again in case clipping clamped the results */ - hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale); - if (hscale < 0) { - DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return hscale; - } - - vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale); - if (vscale < 0) { - DRM_DEBUG_KMS("Vertical scaling factor out of limits\n"); - drm_rect_debug_print("src: ", src, true); - drm_rect_debug_print("dst: ", dst, false); - - return vscale; - } - - /* Make the source viewport size an exact multiple of the scaling factors. */ - drm_rect_adjust_size(src, - drm_rect_width(dst) * hscale - drm_rect_width(src), - drm_rect_height(dst) * vscale - drm_rect_height(src)); - - drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, - state->base.rotation); - - /* sanity check to make sure the src viewport wasn't enlarged */ - WARN_ON(src->x1 < (int) state->base.src_x || - src->y1 < (int) state->base.src_y || - src->x2 > (int) state->base.src_x + state->base.src_w || - src->y2 > (int) state->base.src_y + state->base.src_h); + struct drm_rect *src = &state->base.src; + struct drm_rect *dst = &state->base.dst; + unsigned int crtc_w = drm_rect_width(dst); + unsigned int crtc_h = drm_rect_height(dst); + uint32_t src_x, src_y, src_w, src_h; /* * Hardware doesn't handle subpixel coordinates. @@ -1055,58 +1005,40 @@ intel_check_sprite_plane(struct intel_plane *plane, src_y = src->y1 >> 16; src_h = drm_rect_height(src) >> 16; - if (intel_format_is_yuv(fb->format->format)) { - src_x &= ~1; - src_w &= ~1; - - /* - * Must keep src and dst the - * same if we can't scale. - */ - if (!can_scale) - crtc_w &= ~1; + src->x1 = src_x << 16; + src->x2 = (src_x + src_w) << 16; + src->y1 = src_y << 16; + src->y2 = (src_y + src_h) << 16; - if (crtc_w == 0) - state->base.visible = false; + if (intel_format_is_yuv(fb->format->format) && + fb->format->format != DRM_FORMAT_NV12 && + (src_x % 2 || src_w % 2)) { + DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n", + src_x, src_w); + return -EINVAL; } - } - - /* Check size restrictions when scaling */ - if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) { - unsigned int width_bytes; - int cpp = fb->format->cpp[0]; - WARN_ON(!can_scale); + /* Check size restrictions when scaling */ + if (src_w != crtc_w || src_h != crtc_h) { + unsigned int width_bytes; + int cpp = fb->format->cpp[0]; - /* FIXME interlacing min height is 6 */ + WARN_ON(!can_scale); - if (crtc_w < 3 || crtc_h < 3) - state->base.visible = false; + width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - if (src_w < 3 || src_h < 3) - state->base.visible = false; - - width_bytes = ((src_x * cpp) & 63) + src_w * cpp; - - if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 || - width_bytes > 4096 || fb->pitches[0] > 4096)) { - DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); - return -EINVAL; + /* FIXME interlacing min height is 6 */ + if (INTEL_GEN(dev_priv) < 9 && ( + src_w < 3 || src_h < 3 || + src_w > 2048 || src_h > 2048 || + crtc_w < 3 || crtc_h < 3 || + width_bytes > 4096 || fb->pitches[0] > 4096)) { + DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n"); + return -EINVAL; + } } } - if (state->base.visible) { - src->x1 = src_x << 16; - src->x2 = (src_x + src_w) << 16; - src->y1 = src_y << 16; - src->y2 = (src_y + src_h) << 16; - } - - dst->x1 = crtc_x; - dst->x2 = crtc_x + crtc_w; - dst->y1 = crtc_y; - dst->y2 = crtc_y + crtc_h; - if (INTEL_GEN(dev_priv) >= 9) { ret = skl_check_plane_surface(crtc_state, state); if (ret) @@ -1248,6 +1180,19 @@ static uint32_t skl_plane_formats[] = { DRM_FORMAT_VYUY, }; +static uint32_t skl_planar_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_ABGR8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XBGR8888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_YUYV, + DRM_FORMAT_YVYU, + DRM_FORMAT_UYVY, + DRM_FORMAT_VYUY, + DRM_FORMAT_NV12, +}; + static const uint64_t skl_plane_format_modifiers_noccs[] = { I915_FORMAT_MOD_Yf_TILED, I915_FORMAT_MOD_Y_TILED, @@ -1342,6 +1287,7 @@ static bool skl_mod_supported(uint32_t format, uint64_t modifier) case DRM_FORMAT_YVYU: case DRM_FORMAT_UYVY: case DRM_FORMAT_VYUY: + case DRM_FORMAT_NV12: if (modifier == I915_FORMAT_MOD_Yf_TILED) return true; /* fall through */ @@ -1441,8 +1387,14 @@ intel_sprite_plane_create(struct drm_i915_private *dev_priv, intel_plane->disable_plane = skl_disable_plane; intel_plane->get_hw_state = skl_plane_get_hw_state; - plane_formats = skl_plane_formats; - num_plane_formats = ARRAY_SIZE(skl_plane_formats); + if (skl_plane_has_planar(dev_priv, pipe, + PLANE_SPRITE0 + plane)) { + plane_formats = skl_planar_formats; + num_plane_formats = ARRAY_SIZE(skl_planar_formats); + } else { + plane_formats = skl_plane_formats; + num_plane_formats = ARRAY_SIZE(skl_plane_formats); + } if (skl_plane_has_ccs(dev_priv, pipe, PLANE_SPRITE0 + plane)) modifiers = skl_plane_format_modifiers_ccs; diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c index e5bf0d3..1cffaf7 100644 --- a/drivers/gpu/drm/i915/intel_uc.c +++ b/drivers/gpu/drm/i915/intel_uc.c @@ -69,13 +69,15 @@ static int __get_platform_enable_guc(struct drm_i915_private *dev_priv) static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) { - int guc_log_level = 0; /* disabled */ + int guc_log_level; - /* Enable if we're running on platform with GuC and debug config */ - if (HAS_GUC(dev_priv) && intel_uc_is_using_guc() && - (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || - IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))) - guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + if (!HAS_GUC(dev_priv) || !intel_uc_is_using_guc()) + guc_log_level = GUC_LOG_LEVEL_DISABLED; + else if (IS_ENABLED(CONFIG_DRM_I915_DEBUG) || + IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + guc_log_level = GUC_LOG_LEVEL_MAX; + else + guc_log_level = GUC_LOG_LEVEL_NON_VERBOSE; /* Any platform specific fine-tuning can be done here */ @@ -83,7 +85,7 @@ static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) } /** - * intel_uc_sanitize_options - sanitize uC related modparam options + * sanitize_options_early - sanitize uC related modparam options * @dev_priv: device private * * In case of "enable_guc" option this function will attempt to modify @@ -99,7 +101,7 @@ static int __get_default_guc_log_level(struct drm_i915_private *dev_priv) * unless GuC is enabled on given platform and the driver is compiled with * debug config when this modparam will default to "enable(1..4)". */ -void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) +static void sanitize_options_early(struct drm_i915_private *dev_priv) { struct intel_uc_fw *guc_fw = &dev_priv->guc.fw; struct intel_uc_fw *huc_fw = &dev_priv->huc.fw; @@ -142,51 +144,53 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv) i915_modparams.guc_log_level = 0; } - if (i915_modparams.guc_log_level > 1 + GUC_LOG_VERBOSITY_MAX) { + if (i915_modparams.guc_log_level > GUC_LOG_LEVEL_MAX) { DRM_WARN("Incompatible option detected: %s=%d, %s!\n", "guc_log_level", i915_modparams.guc_log_level, "verbosity too high"); - i915_modparams.guc_log_level = 1 + GUC_LOG_VERBOSITY_MAX; + i915_modparams.guc_log_level = GUC_LOG_LEVEL_MAX; } - DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s verbosity:%d)\n", + DRM_DEBUG_DRIVER("guc_log_level=%d (enabled:%s, verbose:%s, verbosity:%d)\n", i915_modparams.guc_log_level, yesno(i915_modparams.guc_log_level), - i915_modparams.guc_log_level - 1); + yesno(GUC_LOG_LEVEL_IS_VERBOSE(i915_modparams.guc_log_level)), + GUC_LOG_LEVEL_TO_VERBOSITY(i915_modparams.guc_log_level)); /* Make sure that sanitization was done */ GEM_BUG_ON(i915_modparams.enable_guc < 0); GEM_BUG_ON(i915_modparams.guc_log_level < 0); } -void intel_uc_init_early(struct drm_i915_private *dev_priv) +void intel_uc_init_early(struct drm_i915_private *i915) { - intel_guc_init_early(&dev_priv->guc); - intel_huc_init_early(&dev_priv->huc); -} + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; -void intel_uc_init_fw(struct drm_i915_private *dev_priv) -{ - if (!USES_GUC(dev_priv)) - return; + intel_guc_init_early(guc); + intel_huc_init_early(huc); - if (USES_HUC(dev_priv)) - intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw); + sanitize_options_early(i915); - intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw); + if (USES_GUC(i915)) + intel_uc_fw_fetch(i915, &guc->fw); + + if (USES_HUC(i915)) + intel_uc_fw_fetch(i915, &huc->fw); } -void intel_uc_fini_fw(struct drm_i915_private *dev_priv) +void intel_uc_cleanup_early(struct drm_i915_private *i915) { - if (!USES_GUC(dev_priv)) - return; + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; - intel_uc_fw_fini(&dev_priv->guc.fw); + if (USES_HUC(i915)) + intel_uc_fw_fini(&huc->fw); - if (USES_HUC(dev_priv)) - intel_uc_fw_fini(&dev_priv->huc.fw); + if (USES_GUC(i915)) + intel_uc_fw_fini(&guc->fw); - guc_free_load_err_log(&dev_priv->guc); + guc_free_load_err_log(guc); } /** @@ -223,10 +227,13 @@ static int guc_enable_communication(struct intel_guc *guc) { struct drm_i915_private *dev_priv = guc_to_i915(guc); + gen9_enable_guc_interrupts(dev_priv); + if (HAS_GUC_CT(dev_priv)) - return intel_guc_enable_ct(guc); + return intel_guc_ct_enable(&guc->ct); guc->send = intel_guc_send_mmio; + guc->handler = intel_guc_to_host_event_handler_mmio; return 0; } @@ -235,9 +242,12 @@ static void guc_disable_communication(struct intel_guc *guc) struct drm_i915_private *dev_priv = guc_to_i915(guc); if (HAS_GUC_CT(dev_priv)) - intel_guc_disable_ct(guc); + intel_guc_ct_disable(&guc->ct); + + gen9_disable_guc_interrupts(dev_priv); guc->send = intel_guc_send_nop; + guc->handler = intel_guc_to_host_event_handler_nop; } int intel_uc_init_misc(struct drm_i915_private *dev_priv) @@ -248,24 +258,13 @@ int intel_uc_init_misc(struct drm_i915_private *dev_priv) if (!USES_GUC(dev_priv)) return 0; - ret = intel_guc_init_wq(guc); - if (ret) { - DRM_ERROR("Couldn't allocate workqueues for GuC\n"); - goto err; - } + intel_guc_init_ggtt_pin_bias(guc); - ret = intel_guc_log_relay_create(guc); - if (ret) { - DRM_ERROR("Couldn't allocate relay for GuC log\n"); - goto err_relay; - } + ret = intel_guc_init_wq(guc); + if (ret) + return ret; return 0; - -err_relay: - intel_guc_fini_wq(guc); -err: - return ret; } void intel_uc_fini_misc(struct drm_i915_private *dev_priv) @@ -276,8 +275,6 @@ void intel_uc_fini_misc(struct drm_i915_private *dev_priv) return; intel_guc_fini_wq(guc); - - intel_guc_log_relay_destroy(guc); } int intel_uc_init(struct drm_i915_private *dev_priv) @@ -325,6 +322,24 @@ void intel_uc_fini(struct drm_i915_private *dev_priv) intel_guc_fini(guc); } +void intel_uc_sanitize(struct drm_i915_private *i915) +{ + struct intel_guc *guc = &i915->guc; + struct intel_huc *huc = &i915->huc; + + if (!USES_GUC(i915)) + return; + + GEM_BUG_ON(!HAS_GUC(i915)); + + guc_disable_communication(guc); + + intel_huc_sanitize(huc); + intel_guc_sanitize(guc); + + __intel_uc_reset_hw(i915); +} + int intel_uc_init_hw(struct drm_i915_private *dev_priv) { struct intel_guc *guc = &dev_priv->guc; @@ -336,14 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) GEM_BUG_ON(!HAS_GUC(dev_priv)); - guc_disable_communication(guc); gen9_reset_guc_interrupts(dev_priv); - /* init WOPCM */ - I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv)); - I915_WRITE(DMA_GUC_WOPCM_OFFSET, - GUC_WOPCM_OFFSET_VALUE | HUC_LOADING_AGENT_GUC); - /* WaEnableuKernelHeaderValidFix:skl */ /* WaEnableGuCBootHashCheckNotSet:skl,bxt,kbl */ if (IS_GEN9(dev_priv)) @@ -390,12 +399,9 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) } if (USES_GUC_SUBMISSION(dev_priv)) { - if (i915_modparams.guc_log_level) - gen9_enable_guc_interrupts(dev_priv); - ret = intel_guc_submission_enable(guc); if (ret) - goto err_interrupts; + goto err_communication; } dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n", @@ -410,8 +416,6 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv) /* * We've failed to load the firmware :( */ -err_interrupts: - gen9_disable_guc_interrupts(dev_priv); err_communication: guc_disable_communication(guc); err_log_capture: @@ -441,9 +445,6 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv) intel_guc_submission_disable(guc); guc_disable_communication(guc); - - if (USES_GUC_SUBMISSION(dev_priv)) - gen9_disable_guc_interrupts(dev_priv); } int intel_uc_suspend(struct drm_i915_private *i915) @@ -479,8 +480,7 @@ int intel_uc_resume(struct drm_i915_private *i915) if (guc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS) return 0; - if (i915_modparams.guc_log_level) - gen9_enable_guc_interrupts(i915); + gen9_enable_guc_interrupts(i915); err = intel_guc_resume(guc); if (err) { diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h index f76d51d..25d73ad 100644 --- a/drivers/gpu/drm/i915/intel_uc.h +++ b/drivers/gpu/drm/i915/intel_uc.h @@ -28,13 +28,12 @@ #include "intel_huc.h" #include "i915_params.h" -void intel_uc_sanitize_options(struct drm_i915_private *dev_priv); void intel_uc_init_early(struct drm_i915_private *dev_priv); +void intel_uc_cleanup_early(struct drm_i915_private *dev_priv); void intel_uc_init_mmio(struct drm_i915_private *dev_priv); -void intel_uc_init_fw(struct drm_i915_private *dev_priv); -void intel_uc_fini_fw(struct drm_i915_private *dev_priv); int intel_uc_init_misc(struct drm_i915_private *dev_priv); void intel_uc_fini_misc(struct drm_i915_private *dev_priv); +void intel_uc_sanitize(struct drm_i915_private *dev_priv); int intel_uc_init_hw(struct drm_i915_private *dev_priv); void intel_uc_fini_hw(struct drm_i915_private *dev_priv); int intel_uc_init(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c index 3ec0ce5..6e8e0b5 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/intel_uc_fw.c @@ -95,15 +95,6 @@ void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size; uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32); - /* Header and uCode will be loaded to WOPCM */ - size = uc_fw->header_size + uc_fw->ucode_size; - if (size > intel_guc_wopcm_size(dev_priv)) { - DRM_WARN("%s: Firmware is too large to fit in WOPCM\n", - intel_uc_fw_type_repr(uc_fw->type)); - err = -E2BIG; - goto fail; - } - /* now RSA */ if (css->key_size_dw != UOS_RSA_SCRATCH_COUNT) { DRM_WARN("%s: Mismatched firmware RSA key size (%u)\n", @@ -209,6 +200,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct i915_vma *vma)) { struct i915_vma *vma; + u32 ggtt_pin_bias; int err; DRM_DEBUG_DRIVER("%s fw load %s\n", @@ -230,8 +222,9 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, goto fail; } + ggtt_pin_bias = to_i915(uc_fw->obj->base.dev)->guc.ggtt_pin_bias; vma = i915_gem_object_ggtt_pin(uc_fw->obj, NULL, 0, 0, - PIN_OFFSET_BIAS | GUC_WOPCM_TOP); + PIN_OFFSET_BIAS | ggtt_pin_bias); if (IS_ERR(vma)) { err = PTR_ERR(vma); DRM_DEBUG_DRIVER("%s fw ggtt-pin err=%d\n", diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h index d5fd460..87910aa 100644 --- a/drivers/gpu/drm/i915/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/intel_uc_fw.h @@ -30,7 +30,7 @@ struct drm_i915_private; struct i915_vma; /* Home of GuC, HuC and DMC firmwares */ -#define INTEL_UC_FIRMWARE_URL "https://01.org/linuxgraphics/downloads/firmware" +#define INTEL_UC_FIRMWARE_URL "https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/tree/i915" enum intel_uc_fw_status { INTEL_UC_FIRMWARE_FAIL = -1, @@ -115,6 +115,28 @@ static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw) return uc_fw->path != NULL; } +static inline void intel_uc_fw_sanitize(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->load_status == INTEL_UC_FIRMWARE_SUCCESS) + uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING; +} + +/** + * intel_uc_fw_get_upload_size() - Get size of firmware needed to be uploaded. + * @uc_fw: uC firmware. + * + * Get the size of the firmware and header that will be uploaded to WOPCM. + * + * Return: Upload firmware size, or zero on firmware fetch failure. + */ +static inline u32 intel_uc_fw_get_upload_size(struct intel_uc_fw *uc_fw) +{ + if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS) + return 0; + + return uc_fw->header_size + uc_fw->ucode_size; +} + void intel_uc_fw_fetch(struct drm_i915_private *dev_priv, struct intel_uc_fw *uc_fw); int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index 4df7c2ef..448293e 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -62,6 +62,11 @@ static inline void fw_domain_reset(struct drm_i915_private *i915, const struct intel_uncore_forcewake_domain *d) { + /* + * We don't really know if the powerwell for the forcewake domain we are + * trying to reset here does exist at this point (engines could be fused + * off in ICL+), so no waiting for acks + */ __raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset); } @@ -134,7 +139,9 @@ fw_domain_wait_ack_with_fallback(const struct drm_i915_private *i915, * in the hope that the original ack will be delivered along with * the fallback ack. * - * This workaround is described in HSDES #1604254524 + * This workaround is described in HSDES #1604254524 and it's known as: + * WaRsForcewakeAddDelayForAck:skl,bxt,kbl,glk,cfl,cnl,icl + * although the name is a bit misleading. */ pass = 1; @@ -1353,6 +1360,23 @@ static void fw_domain_init(struct drm_i915_private *dev_priv, fw_domain_reset(dev_priv, d); } +static void fw_domain_fini(struct drm_i915_private *dev_priv, + enum forcewake_domain_id domain_id) +{ + struct intel_uncore_forcewake_domain *d; + + if (WARN_ON(domain_id >= FW_DOMAIN_ID_COUNT)) + return; + + d = &dev_priv->uncore.fw_domain[domain_id]; + + WARN_ON(d->wake_count); + WARN_ON(hrtimer_cancel(&d->timer)); + memset(d, 0, sizeof(*d)); + + dev_priv->uncore.fw_domains &= ~BIT(domain_id); +} + static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) { if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv)) @@ -1372,7 +1396,8 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv) if (INTEL_GEN(dev_priv) >= 11) { int i; - dev_priv->uncore.funcs.force_wake_get = fw_domains_get; + dev_priv->uncore.funcs.force_wake_get = + fw_domains_get_with_fallback; dev_priv->uncore.funcs.force_wake_put = fw_domains_put; fw_domain_init(dev_priv, FW_DOMAIN_ID_RENDER, FORCEWAKE_RENDER_GEN9, @@ -1565,6 +1590,40 @@ void intel_uncore_init(struct drm_i915_private *dev_priv) &dev_priv->uncore.pmic_bus_access_nb); } +/* + * We might have detected that some engines are fused off after we initialized + * the forcewake domains. Prune them, to make sure they only reference existing + * engines. + */ +void intel_uncore_prune(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) >= 11) { + enum forcewake_domains fw_domains = dev_priv->uncore.fw_domains; + enum forcewake_domain_id domain_id; + int i; + + for (i = 0; i < I915_MAX_VCS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VDBOX0 + i; + + if (HAS_ENGINE(dev_priv, _VCS(i))) + continue; + + if (fw_domains & BIT(domain_id)) + fw_domain_fini(dev_priv, domain_id); + } + + for (i = 0; i < I915_MAX_VECS; i++) { + domain_id = FW_DOMAIN_ID_MEDIA_VEBOX0 + i; + + if (HAS_ENGINE(dev_priv, _VECS(i))) + continue; + + if (fw_domains & BIT(domain_id)) + fw_domain_fini(dev_priv, domain_id); + } + } +} + void intel_uncore_fini(struct drm_i915_private *dev_priv) { /* Paranoia: make sure we have disabled everything before we exit. */ @@ -1646,11 +1705,10 @@ static void gen3_stop_engine(struct intel_engine_cs *engine) const i915_reg_t mode = RING_MI_MODE(base); I915_WRITE_FW(mode, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register_fw(dev_priv, - mode, - MODE_IDLE, - MODE_IDLE, - 500)) + if (__intel_wait_for_register_fw(dev_priv, + mode, MODE_IDLE, MODE_IDLE, + 500, 0, + NULL)) DRM_DEBUG_DRIVER("%s: timed out on STOP_RING\n", engine->name); @@ -1804,9 +1862,10 @@ static int gen6_hw_domain_reset(struct drm_i915_private *dev_priv, __raw_i915_write32(dev_priv, GEN6_GDRST, hw_domain_mask); /* Wait for the device to ack the reset requests */ - err = intel_wait_for_register_fw(dev_priv, - GEN6_GDRST, hw_domain_mask, 0, - 500); + err = __intel_wait_for_register_fw(dev_priv, + GEN6_GDRST, hw_domain_mask, 0, + 500, 0, + NULL); if (err) DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", hw_domain_mask); @@ -1854,6 +1913,50 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, } /** + * gen11_reset_engines - reset individual engines + * @dev_priv: i915 device + * @engine_mask: mask of intel_ring_flag() engines or ALL_ENGINES for full reset + * + * This function will reset the individual engines that are set in engine_mask. + * If you provide ALL_ENGINES as mask, full global domain reset will be issued. + * + * Note: It is responsibility of the caller to handle the difference between + * asking full domain reset versus reset for all available individual engines. + * + * Returns 0 on success, nonzero on error. + */ +static int gen11_reset_engines(struct drm_i915_private *dev_priv, + unsigned engine_mask) +{ + struct intel_engine_cs *engine; + const u32 hw_engine_mask[I915_NUM_ENGINES] = { + [RCS] = GEN11_GRDOM_RENDER, + [BCS] = GEN11_GRDOM_BLT, + [VCS] = GEN11_GRDOM_MEDIA, + [VCS2] = GEN11_GRDOM_MEDIA2, + [VCS3] = GEN11_GRDOM_MEDIA3, + [VCS4] = GEN11_GRDOM_MEDIA4, + [VECS] = GEN11_GRDOM_VECS, + [VECS2] = GEN11_GRDOM_VECS2, + }; + u32 hw_mask; + + BUILD_BUG_ON(VECS2 + 1 != I915_NUM_ENGINES); + + if (engine_mask == ALL_ENGINES) { + hw_mask = GEN11_GRDOM_FULL; + } else { + unsigned int tmp; + + hw_mask = 0; + for_each_engine_masked(engine, dev_priv, engine_mask, tmp) + hw_mask |= hw_engine_mask[engine->id]; + } + + return gen6_hw_domain_reset(dev_priv, hw_mask); +} + +/** * __intel_wait_for_register_fw - wait until register matches expected state * @dev_priv: the i915 device * @reg: the register to read @@ -1940,7 +2043,7 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, u32 reg_value; int ret; - might_sleep(); + might_sleep_if(slow_timeout_ms); spin_lock_irq(&dev_priv->uncore.lock); intel_uncore_forcewake_get__locked(dev_priv, fw); @@ -1952,7 +2055,7 @@ int __intel_wait_for_register(struct drm_i915_private *dev_priv, intel_uncore_forcewake_put__locked(dev_priv, fw); spin_unlock_irq(&dev_priv->uncore.lock); - if (ret) + if (ret && slow_timeout_ms) ret = __wait_for(reg_value = I915_READ_NOTRACE(reg), (reg_value & mask) == value, slow_timeout_ms * 1000, 10, 1000); @@ -1971,11 +2074,12 @@ static int gen8_reset_engine_start(struct intel_engine_cs *engine) I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); - ret = intel_wait_for_register_fw(dev_priv, - RING_RESET_CTL(engine->mmio_base), - RESET_CTL_READY_TO_RESET, - RESET_CTL_READY_TO_RESET, - 700); + ret = __intel_wait_for_register_fw(dev_priv, + RING_RESET_CTL(engine->mmio_base), + RESET_CTL_READY_TO_RESET, + RESET_CTL_READY_TO_RESET, + 700, 0, + NULL); if (ret) DRM_ERROR("%s: reset request timeout\n", engine->name); @@ -2000,7 +2104,10 @@ static int gen8_reset_engines(struct drm_i915_private *dev_priv, if (gen8_reset_engine_start(engine)) goto not_ready; - return gen6_reset_engines(dev_priv, engine_mask); + if (INTEL_GEN(dev_priv) >= 11) + return gen11_reset_engines(dev_priv, engine_mask); + else + return gen6_reset_engines(dev_priv, engine_mask); not_ready: for_each_engine_masked(engine, dev_priv, engine_mask, tmp) @@ -2038,15 +2145,31 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) int retry; int ret; - might_sleep(); + /* + * We want to perform per-engine reset from atomic context (e.g. + * softirq), which imposes the constraint that we cannot sleep. + * However, experience suggests that spending a bit of time waiting + * for a reset helps in various cases, so for a full-device reset + * we apply the opposite rule and wait if we want to. As we should + * always follow up a failed per-engine reset with a full device reset, + * being a little faster, stricter and more error prone for the + * atomic case seems an acceptable compromise. + * + * Unfortunately this leads to a bimodal routine, when the goal was + * to have a single reset function that worked for resetting any + * number of engines simultaneously. + */ + might_sleep_if(engine_mask == ALL_ENGINES); - /* If the power well sleeps during the reset, the reset + /* + * If the power well sleeps during the reset, the reset * request may be dropped and never completes (causing -EIO). */ intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); for (retry = 0; retry < 3; retry++) { - /* We stop engines, otherwise we might get failed reset and a + /* + * We stop engines, otherwise we might get failed reset and a * dead gpu (on elk). Also as modern gpu as kbl can suffer * from system hang if batchbuffer is progressing when * the reset is issued, regardless of READY_TO_RESET ack. @@ -2060,9 +2183,11 @@ int intel_gpu_reset(struct drm_i915_private *dev_priv, unsigned engine_mask) i915_stop_engines(dev_priv, engine_mask); ret = -ENODEV; - if (reset) + if (reset) { + GEM_TRACE("engine_mask=%x\n", engine_mask); ret = reset(dev_priv, engine_mask); - if (ret != -ETIMEDOUT) + } + if (ret != -ETIMEDOUT || engine_mask != ALL_ENGINES) break; cond_resched(); @@ -2085,12 +2210,14 @@ bool intel_has_reset_engine(struct drm_i915_private *dev_priv) int intel_reset_guc(struct drm_i915_private *dev_priv) { + u32 guc_domain = INTEL_GEN(dev_priv) >= 11 ? GEN11_GRDOM_GUC : + GEN9_GRDOM_GUC; int ret; GEM_BUG_ON(!HAS_GUC(dev_priv)); intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL); - ret = gen6_hw_domain_reset(dev_priv, GEN9_GRDOM_GUC); + ret = gen6_hw_domain_reset(dev_priv, guc_domain); intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL); return ret; diff --git a/drivers/gpu/drm/i915/intel_uncore.h b/drivers/gpu/drm/i915/intel_uncore.h index dfdf444..47478d6 100644 --- a/drivers/gpu/drm/i915/intel_uncore.h +++ b/drivers/gpu/drm/i915/intel_uncore.h @@ -140,6 +140,7 @@ struct intel_uncore { void intel_uncore_sanitize(struct drm_i915_private *dev_priv); void intel_uncore_init(struct drm_i915_private *dev_priv); +void intel_uncore_prune(struct drm_i915_private *dev_priv); bool intel_uncore_unclaimed_mmio(struct drm_i915_private *dev_priv); bool intel_uncore_arm_unclaimed_mmio_detection(struct drm_i915_private *dev_priv); void intel_uncore_fini(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/intel_wopcm.c b/drivers/gpu/drm/i915/intel_wopcm.c new file mode 100644 index 0000000..74bf76f --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wopcm.c @@ -0,0 +1,275 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#include "intel_wopcm.h" +#include "i915_drv.h" + +/** + * DOC: WOPCM Layout + * + * The layout of the WOPCM will be fixed after writing to GuC WOPCM size and + * offset registers whose values are calculated and determined by HuC/GuC + * firmware size and set of hardware requirements/restrictions as shown below: + * + * :: + * + * +=========> +====================+ <== WOPCM Top + * ^ | HW contexts RSVD | + * | +===> +====================+ <== GuC WOPCM Top + * | ^ | | + * | | | | + * | | | | + * | GuC | | + * | WOPCM | | + * | Size +--------------------+ + * WOPCM | | GuC FW RSVD | + * | | +--------------------+ + * | | | GuC Stack RSVD | + * | | +------------------- + + * | v | GuC WOPCM RSVD | + * | +===> +====================+ <== GuC WOPCM base + * | | WOPCM RSVD | + * | +------------------- + <== HuC Firmware Top + * v | HuC FW | + * +=========> +====================+ <== WOPCM Base + * + * GuC accessible WOPCM starts at GuC WOPCM base and ends at GuC WOPCM top. + * The top part of the WOPCM is reserved for hardware contexts (e.g. RC6 + * context). + */ + +/* Default WOPCM size 1MB. */ +#define GEN9_WOPCM_SIZE (1024 * 1024) +/* 16KB WOPCM (RSVD WOPCM) is reserved from HuC firmware top. */ +#define WOPCM_RESERVED_SIZE (16 * 1024) + +/* 16KB reserved at the beginning of GuC WOPCM. */ +#define GUC_WOPCM_RESERVED (16 * 1024) +/* 8KB from GUC_WOPCM_RESERVED is reserved for GuC stack. */ +#define GUC_WOPCM_STACK_RESERVED (8 * 1024) + +/* GuC WOPCM Offset value needs to be aligned to 16KB. */ +#define GUC_WOPCM_OFFSET_ALIGNMENT (1UL << GUC_WOPCM_OFFSET_SHIFT) + +/* 24KB at the end of WOPCM is reserved for RC6 CTX on BXT. */ +#define BXT_WOPCM_RC6_CTX_RESERVED (24 * 1024) +/* 36KB WOPCM reserved at the end of WOPCM on CNL. */ +#define CNL_WOPCM_HW_CTX_RESERVED (36 * 1024) + +/* 128KB from GUC_WOPCM_RESERVED is reserved for FW on Gen9. */ +#define GEN9_GUC_FW_RESERVED (128 * 1024) +#define GEN9_GUC_WOPCM_OFFSET (GUC_WOPCM_RESERVED + GEN9_GUC_FW_RESERVED) + +/** + * intel_wopcm_init_early() - Early initialization of the WOPCM. + * @wopcm: pointer to intel_wopcm. + * + * Setup the size of WOPCM which will be used by later on WOPCM partitioning. + */ +void intel_wopcm_init_early(struct intel_wopcm *wopcm) +{ + wopcm->size = GEN9_WOPCM_SIZE; + + DRM_DEBUG_DRIVER("WOPCM size: %uKiB\n", wopcm->size / 1024); +} + +static inline u32 context_reserved_size(struct drm_i915_private *i915) +{ + if (IS_GEN9_LP(i915)) + return BXT_WOPCM_RC6_CTX_RESERVED; + else if (INTEL_GEN(i915) >= 10) + return CNL_WOPCM_HW_CTX_RESERVED; + else + return 0; +} + +static inline int gen9_check_dword_gap(u32 guc_wopcm_base, u32 guc_wopcm_size) +{ + u32 offset; + + /* + * GuC WOPCM size shall be at least a dword larger than the offset from + * WOPCM base (GuC WOPCM offset from WOPCM base + GEN9_GUC_WOPCM_OFFSET) + * due to hardware limitation on Gen9. + */ + offset = guc_wopcm_base + GEN9_GUC_WOPCM_OFFSET; + if (offset > guc_wopcm_size || + (guc_wopcm_size - offset) < sizeof(u32)) { + DRM_ERROR("GuC WOPCM size %uKiB is too small. %uKiB needed.\n", + guc_wopcm_size / 1024, + (u32)(offset + sizeof(u32)) / 1024); + return -E2BIG; + } + + return 0; +} + +static inline int gen9_check_huc_fw_fits(u32 guc_wopcm_size, u32 huc_fw_size) +{ + /* + * On Gen9 & CNL A0, hardware requires the total available GuC WOPCM + * size to be larger than or equal to HuC firmware size. Otherwise, + * firmware uploading would fail. + */ + if (huc_fw_size > guc_wopcm_size - GUC_WOPCM_RESERVED) { + DRM_ERROR("HuC FW (%uKiB) won't fit in GuC WOPCM (%uKiB).\n", + huc_fw_size / 1024, + (guc_wopcm_size - GUC_WOPCM_RESERVED) / 1024); + return -E2BIG; + } + + return 0; +} + +static inline int check_hw_restriction(struct drm_i915_private *i915, + u32 guc_wopcm_base, u32 guc_wopcm_size, + u32 huc_fw_size) +{ + int err = 0; + + if (IS_GEN9(i915)) + err = gen9_check_dword_gap(guc_wopcm_base, guc_wopcm_size); + + if (!err && + (IS_GEN9(i915) || IS_CNL_REVID(i915, CNL_REVID_A0, CNL_REVID_A0))) + err = gen9_check_huc_fw_fits(guc_wopcm_size, huc_fw_size); + + return err; +} + +/** + * intel_wopcm_init() - Initialize the WOPCM structure. + * @wopcm: pointer to intel_wopcm. + * + * This function will partition WOPCM space based on GuC and HuC firmware sizes + * and will allocate max remaining for use by GuC. This function will also + * enforce platform dependent hardware restrictions on GuC WOPCM offset and + * size. It will fail the WOPCM init if any of these checks were failed, so that + * the following GuC firmware uploading would be aborted. + * + * Return: 0 on success, non-zero error code on failure. + */ +int intel_wopcm_init(struct intel_wopcm *wopcm) +{ + struct drm_i915_private *i915 = wopcm_to_i915(wopcm); + u32 guc_fw_size = intel_uc_fw_get_upload_size(&i915->guc.fw); + u32 huc_fw_size = intel_uc_fw_get_upload_size(&i915->huc.fw); + u32 ctx_rsvd = context_reserved_size(i915); + u32 guc_wopcm_base; + u32 guc_wopcm_size; + u32 guc_wopcm_rsvd; + int err; + + GEM_BUG_ON(!wopcm->size); + + if (guc_fw_size >= wopcm->size) { + DRM_ERROR("GuC FW (%uKiB) is too big to fit in WOPCM.", + guc_fw_size / 1024); + return -E2BIG; + } + + if (huc_fw_size >= wopcm->size) { + DRM_ERROR("HuC FW (%uKiB) is too big to fit in WOPCM.", + huc_fw_size / 1024); + return -E2BIG; + } + + guc_wopcm_base = ALIGN(huc_fw_size + WOPCM_RESERVED_SIZE, + GUC_WOPCM_OFFSET_ALIGNMENT); + if ((guc_wopcm_base + ctx_rsvd) >= wopcm->size) { + DRM_ERROR("GuC WOPCM base (%uKiB) is too big.\n", + guc_wopcm_base / 1024); + return -E2BIG; + } + + guc_wopcm_size = wopcm->size - guc_wopcm_base - ctx_rsvd; + guc_wopcm_size &= GUC_WOPCM_SIZE_MASK; + + DRM_DEBUG_DRIVER("Calculated GuC WOPCM Region: [%uKiB, %uKiB)\n", + guc_wopcm_base / 1024, guc_wopcm_size / 1024); + + guc_wopcm_rsvd = GUC_WOPCM_RESERVED + GUC_WOPCM_STACK_RESERVED; + if ((guc_fw_size + guc_wopcm_rsvd) > guc_wopcm_size) { + DRM_ERROR("Need %uKiB WOPCM for GuC, %uKiB available.\n", + (guc_fw_size + guc_wopcm_rsvd) / 1024, + guc_wopcm_size / 1024); + return -E2BIG; + } + + err = check_hw_restriction(i915, guc_wopcm_base, guc_wopcm_size, + huc_fw_size); + if (err) + return err; + + wopcm->guc.base = guc_wopcm_base; + wopcm->guc.size = guc_wopcm_size; + + return 0; +} + +static inline int write_and_verify(struct drm_i915_private *dev_priv, + i915_reg_t reg, u32 val, u32 mask, + u32 locked_bit) +{ + u32 reg_val; + + GEM_BUG_ON(val & ~mask); + + I915_WRITE(reg, val); + + reg_val = I915_READ(reg); + + return (reg_val & mask) != (val | locked_bit) ? -EIO : 0; +} + +/** + * intel_wopcm_init_hw() - Setup GuC WOPCM registers. + * @wopcm: pointer to intel_wopcm. + * + * Setup the GuC WOPCM size and offset registers with the calculated values. It + * will verify the register values to make sure the registers are locked with + * correct values. + * + * Return: 0 on success. -EIO if registers were locked with incorrect values. + */ +int intel_wopcm_init_hw(struct intel_wopcm *wopcm) +{ + struct drm_i915_private *dev_priv = wopcm_to_i915(wopcm); + u32 huc_agent; + u32 mask; + int err; + + if (!USES_GUC(dev_priv)) + return 0; + + GEM_BUG_ON(!HAS_GUC(dev_priv)); + GEM_BUG_ON(!wopcm->guc.size); + GEM_BUG_ON(!wopcm->guc.base); + + err = write_and_verify(dev_priv, GUC_WOPCM_SIZE, wopcm->guc.size, + GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED, + GUC_WOPCM_SIZE_LOCKED); + if (err) + goto err_out; + + huc_agent = USES_HUC(dev_priv) ? HUC_LOADING_AGENT_GUC : 0; + mask = GUC_WOPCM_OFFSET_MASK | GUC_WOPCM_OFFSET_VALID | huc_agent; + err = write_and_verify(dev_priv, DMA_GUC_WOPCM_OFFSET, + wopcm->guc.base | huc_agent, mask, + GUC_WOPCM_OFFSET_VALID); + if (err) + goto err_out; + + return 0; + +err_out: + DRM_ERROR("Failed to init WOPCM registers:\n"); + DRM_ERROR("DMA_GUC_WOPCM_OFFSET=%#x\n", + I915_READ(DMA_GUC_WOPCM_OFFSET)); + DRM_ERROR("GUC_WOPCM_SIZE=%#x\n", I915_READ(GUC_WOPCM_SIZE)); + + return err; +} diff --git a/drivers/gpu/drm/i915/intel_wopcm.h b/drivers/gpu/drm/i915/intel_wopcm.h new file mode 100644 index 0000000..6298910 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_wopcm.h @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2017-2018 Intel Corporation + */ + +#ifndef _INTEL_WOPCM_H_ +#define _INTEL_WOPCM_H_ + +#include <linux/types.h> + +/** + * struct intel_wopcm - Overall WOPCM info and WOPCM regions. + * @size: Size of overall WOPCM. + * @guc: GuC WOPCM Region info. + * @guc.base: GuC WOPCM base which is offset from WOPCM base. + * @guc.size: Size of the GuC WOPCM region. + */ +struct intel_wopcm { + u32 size; + struct { + u32 base; + u32 size; + } guc; +}; + +void intel_wopcm_init_early(struct intel_wopcm *wopcm); +int intel_wopcm_init(struct intel_wopcm *wopcm); +int intel_wopcm_init_hw(struct intel_wopcm *wopcm); + +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.c b/drivers/gpu/drm/i915/intel_workarounds.c new file mode 100644 index 0000000..2df3538 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds.c @@ -0,0 +1,949 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#include "i915_drv.h" +#include "intel_workarounds.h" + +/** + * DOC: Hardware workarounds + * + * This file is intended as a central place to implement most [1]_ of the + * required workarounds for hardware to work as originally intended. They fall + * in five basic categories depending on how/when they are applied: + * + * - Workarounds that touch registers that are saved/restored to/from the HW + * context image. The list is emitted (via Load Register Immediate commands) + * everytime a new context is created. + * - GT workarounds. The list of these WAs is applied whenever these registers + * revert to default values (on GPU reset, suspend/resume [2]_, etc..). + * - Display workarounds. The list is applied during display clock-gating + * initialization. + * - Workarounds that whitelist a privileged register, so that UMDs can manage + * them directly. This is just a special case of a MMMIO workaround (as we + * write the list of these to/be-whitelisted registers to some special HW + * registers). + * - Workaround batchbuffers, that get executed automatically by the hardware + * on every HW context restore. + * + * .. [1] Please notice that there are other WAs that, due to their nature, + * cannot be applied from a central place. Those are peppered around the rest + * of the code, as needed. + * + * .. [2] Technically, some registers are powercontext saved & restored, so they + * survive a suspend/resume. In practice, writing them again is not too + * costly and simplifies things. We can revisit this in the future. + * + * Layout + * '''''' + * + * Keep things in this file ordered by WA type, as per the above (context, GT, + * display, register whitelist, batchbuffer). Then, inside each type, keep the + * following order: + * + * - Infrastructure functions and macros + * - WAs per platform in standard gen/chrono order + * - Public functions to init or apply the given workaround type. + */ + +static int wa_add(struct drm_i915_private *dev_priv, + i915_reg_t addr, + const u32 mask, const u32 val) +{ + const unsigned int idx = dev_priv->workarounds.count; + + if (WARN_ON(idx >= I915_MAX_WA_REGS)) + return -ENOSPC; + + dev_priv->workarounds.reg[idx].addr = addr; + dev_priv->workarounds.reg[idx].value = val; + dev_priv->workarounds.reg[idx].mask = mask; + + dev_priv->workarounds.count++; + + return 0; +} + +#define WA_REG(addr, mask, val) do { \ + const int r = wa_add(dev_priv, (addr), (mask), (val)); \ + if (r) \ + return r; \ + } while (0) + +#define WA_SET_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) + +#define WA_CLR_BIT_MASKED(addr, mask) \ + WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask)) + +#define WA_SET_FIELD_MASKED(addr, mask, value) \ + WA_REG(addr, (mask), _MASKED_FIELD(mask, value)) + +static int gen8_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + + /* WaDisableAsyncFlipPerfMode:bdw,chv */ + WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + + /* WaDisablePartialInstShootdown:bdw,chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Use Force Non-Coherent whenever executing a 3D context. This is a + * workaround for for a possible hang in the unlikely event a TLB + * invalidation occurs during a PSD flush. + */ + /* WaForceEnableNonCoherent:bdw,chv */ + /* WaHdcDisableFetchWhenMasked:bdw,chv */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); + + /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: + * "The Hierarchical Z RAW Stall Optimization allows non-overlapping + * polygons in the same 8x4 pixel/sample area to be processed without + * stalling waiting for the earlier ones to write to Hierarchical Z + * buffer." + * + * This optimization is off by default for BDW and CHV; turn it on. + */ + WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + + /* Wa4x4STCOptimizationDisable:bdw,chv */ + WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4); + + return 0; +} + +static int bdw_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen8_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* WaDisableDopClockGating:bdw + * + * Also see the related UCGTCL1 write in broadwell_init_clock_gating() + * to disable EUTC clock gating. + */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); + + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + WA_SET_BIT_MASKED(HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(dev_priv) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + + return 0; +} + +static int chv_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen8_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:chv */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + + /* Improve HiZ throughput on CHV. */ + WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + + return 0; +} + +static int gen9_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + if (HAS_LLC(dev_priv)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + } + + /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + + /* Syncing dependencies between camera and graphics:skl,bxt,kbl */ + if (!IS_COFFEELAKE(dev_priv)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN9_DISABLE_OCL_OOB_SUPPRESS_LOGIC); + + /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ + /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); + + /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ + /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + + /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ + WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); + + /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + + /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are + * both tied to WaForceContextSaveRestoreNonCoherent + * in some hsds for skl. We keep the tie for all gen9. The + * documentation is a bit hazy and so we want to get common behaviour, + * even though there is no clear evidence we would need both on kbl/bxt. + * This area has been source of system hangs so we play it safe + * and mimic the skl regardless of what bspec says. + * + * Use Force Non-Coherent whenever executing a 3D context. This + * is a workaround for a possible hang in the unlikely event + * a TLB invalidation occurs during a PSD flush. + */ + + /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); + + /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ + if (IS_SKYLAKE(dev_priv) || + IS_KABYLAKE(dev_priv) || + IS_COFFEELAKE(dev_priv)) + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); + + /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + + /* + * Supporting preemption with fine-granularity requires changes in the + * batch buffer programming. Since we can't break old userspace, we + * need to set our default preemption level to safe value. Userspace is + * still able to use more fine-grained preemption levels, since in + * WaEnablePreemptionGranularityControlByUMD we're whitelisting the + * per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are + * not real HW workarounds, but merely a way to start using preemption + * while maintaining old contract with userspace. + */ + + /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + + return 0; +} + +static int skl_tune_iz_hashing(struct drm_i915_private *dev_priv) +{ + u8 vals[3] = { 0, 0, 0 }; + unsigned int i; + + for (i = 0; i < 3; i++) { + u8 ss; + + /* + * Only consider slices where one, and only one, subslice has 7 + * EUs + */ + if (!is_power_of_2(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i])) + continue; + + /* + * subslice_7eu[i] != 0 (because of the check above) and + * ss_max == 4 (maximum number of subslices possible per slice) + * + * -> 0 <= ss <= 3; + */ + ss = ffs(INTEL_INFO(dev_priv)->sseu.subslice_7eu[i]) - 1; + vals[i] = 3 - ss; + } + + if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0) + return 0; + + /* Tune IZ hashing. See intel_device_info_runtime_init() */ + WA_SET_FIELD_MASKED(GEN7_GT_MODE, + GEN9_IZ_HASHING_MASK(2) | + GEN9_IZ_HASHING_MASK(1) | + GEN9_IZ_HASHING_MASK(0), + GEN9_IZ_HASHING(2, vals[2]) | + GEN9_IZ_HASHING(1, vals[1]) | + GEN9_IZ_HASHING(0, vals[0])); + + return 0; +} + +static int skl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + return skl_tune_iz_hashing(dev_priv); +} + +static int bxt_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableThreadStallDopClockGating:bxt */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:bxt */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +static int kbl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaDisableFenceDestinationToSLM:kbl (pre-prod) */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_A0, KBL_REVID_A0)) + WA_SET_BIT_MASKED(HDC_CHICKEN0, + HDC_FENCE_DEST_SLM_DISABLE); + + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:kbl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + return 0; +} + +static int glk_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:glk */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + return 0; +} + +static int cfl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int ret; + + ret = gen9_ctx_workarounds_init(dev_priv); + if (ret) + return ret; + + /* WaToEnableHwFixForPushConstHWBug:cfl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableSbeCacheDispatchPortSharing:cfl */ + WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + + return 0; +} + +static int cnl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* WaForceContextSaveRestoreNonCoherent:cnl */ + WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + + /* WaThrottleEUPerfToAvoidTDBackPressure:cnl(pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, THROTTLE_12_5); + + /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + + /* WaDisableEnhancedSBEVertexCaching:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, 0, CNL_REVID_B0)) + WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, + GEN8_CSC2_SBE_VUE_CACHE_CONSERVATIVE); + + /* WaPushConstantDereferenceHoldDisable:cnl */ + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + + /* FtrEnableFastAnisoL1BankingFix:cnl */ + WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + + /* WaDisable3DMidCmdPreemption:cnl */ + WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + + /* WaDisableGPGPUMidCmdPreemption:cnl */ + WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + GEN9_PREEMPT_GPGPU_LEVEL_MASK, + GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + + /* WaDisableEarlyEOT:cnl */ + WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); + + return 0; +} + +static int icl_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + /* Wa_1604370585:icl (pre-prod) + * Formerly known as WaPushConstantDereferenceHoldDisable + */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); + + /* WaForceEnableNonCoherent:icl + * This is not the same workaround as in early Gen9 platforms, where + * lacking this could cause system hangs, but coherency performance + * overhead is high and only a few compute workloads really need it + * (the register is whitelisted in hardware now, so UMDs can opt in + * for coherency if they have a good reason). + */ + WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + + return 0; +} + +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv) +{ + int err = 0; + + dev_priv->workarounds.count = 0; + + if (INTEL_GEN(dev_priv) < 8) + err = 0; + else if (IS_BROADWELL(dev_priv)) + err = bdw_ctx_workarounds_init(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + err = chv_ctx_workarounds_init(dev_priv); + else if (IS_SKYLAKE(dev_priv)) + err = skl_ctx_workarounds_init(dev_priv); + else if (IS_BROXTON(dev_priv)) + err = bxt_ctx_workarounds_init(dev_priv); + else if (IS_KABYLAKE(dev_priv)) + err = kbl_ctx_workarounds_init(dev_priv); + else if (IS_GEMINILAKE(dev_priv)) + err = glk_ctx_workarounds_init(dev_priv); + else if (IS_COFFEELAKE(dev_priv)) + err = cfl_ctx_workarounds_init(dev_priv); + else if (IS_CANNONLAKE(dev_priv)) + err = cnl_ctx_workarounds_init(dev_priv); + else if (IS_ICELAKE(dev_priv)) + err = icl_ctx_workarounds_init(dev_priv); + else + MISSING_CASE(INTEL_GEN(dev_priv)); + if (err) + return err; + + DRM_DEBUG_DRIVER("Number of context specific w/a: %d\n", + dev_priv->workarounds.count); + return 0; +} + +int intel_ctx_workarounds_emit(struct i915_request *rq) +{ + struct i915_workarounds *w = &rq->i915->workarounds; + u32 *cs; + int ret, i; + + if (w->count == 0) + return 0; + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + cs = intel_ring_begin(rq, (w->count * 2 + 2)); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_LOAD_REGISTER_IMM(w->count); + for (i = 0; i < w->count; i++) { + *cs++ = i915_mmio_reg_offset(w->reg[i].addr); + *cs++ = w->reg[i].value; + } + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + ret = rq->engine->emit_flush(rq, EMIT_BARRIER); + if (ret) + return ret; + + return 0; +} + +static void bdw_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ +} + +static void chv_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ +} + +static void gen9_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */ + I915_WRITE(GEN9_CSFE_CHICKEN1_RCS, + _MASKED_BIT_ENABLE(GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE)); + + /* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */ + I915_WRITE(BDW_SCRATCH1, I915_READ(BDW_SCRATCH1) | + GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE); + + /* WaDisableKillLogic:bxt,skl,kbl */ + if (!IS_COFFEELAKE(dev_priv)) + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + ECOCHK_DIS_TLB); + + if (HAS_LLC(dev_priv)) { + /* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl + * + * Must match Display Engine. See + * WaCompressedResourceDisplayNewHashMode. + */ + I915_WRITE(MMCD_MISC_CTRL, + I915_READ(MMCD_MISC_CTRL) | + MMCD_PCLA | + MMCD_HOTSPOT_EN); + } + + /* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */ + I915_WRITE(GAM_ECOCHK, I915_READ(GAM_ECOCHK) | + BDW_DISABLE_HDC_INVALIDATION); + + /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) { + u32 val = I915_READ(GEN8_L3SQCREG1); + + val &= ~L3_PRIO_CREDITS_MASK; + val |= L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2); + I915_WRITE(GEN8_L3SQCREG1, val); + } + + /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ + I915_WRITE(GEN8_L3SQCREG4, + I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); +} + +static void skl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:skl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableGafsUnitClkGating:skl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:skl */ + if (IS_SKL_REVID(dev_priv, SKL_REVID_H0, REVID_FOREVER)) + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void bxt_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaDisablePooledEuLoadBalancingFix:bxt */ + I915_WRITE(FF_SLICE_CS_CHICKEN2, + _MASKED_BIT_ENABLE(GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE)); + + /* WaInPlaceDecompressionHang:bxt */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void kbl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:kbl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableDynamicCreditSharing:kbl */ + if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + I915_WRITE(GAMT_CHKN_BIT_REG, + I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING); + + /* WaDisableGafsUnitClkGating:kbl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:kbl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void glk_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); +} + +static void cfl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + gen9_gt_workarounds_apply(dev_priv); + + /* WaEnableGapsTsvCreditFix:cfl */ + I915_WRITE(GEN8_GARBCNTL, + I915_READ(GEN8_GARBCNTL) | GEN9_GAPS_TSV_CREDIT_DISABLE); + + /* WaDisableGafsUnitClkGating:cfl */ + I915_WRITE(GEN7_UCGCTL4, + I915_READ(GEN7_UCGCTL4) | GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE); + + /* WaInPlaceDecompressionHang:cfl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); +} + +static void cnl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* WaDisableI2mCycleOnWRPort:cnl (pre-prod) */ + if (IS_CNL_REVID(dev_priv, CNL_REVID_B0, CNL_REVID_B0)) + I915_WRITE(GAMT_CHKN_BIT_REG, + I915_READ(GAMT_CHKN_BIT_REG) | + GAMT_CHKN_DISABLE_I2M_CYCLE_ON_WR_PORT); + + /* WaInPlaceDecompressionHang:cnl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, + I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + I915_WRITE(GEN7_FF_SLICE_CS_CHICKEN1, + _MASKED_BIT_ENABLE(GEN9_FFSC_PERCTX_PREEMPT_CTRL)); +} + +static void icl_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + /* This is not an Wa. Enable for better image quality */ + I915_WRITE(_3D_CHICKEN3, + _MASKED_BIT_ENABLE(_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE)); + + /* WaInPlaceDecompressionHang:icl */ + I915_WRITE(GEN9_GAMT_ECO_REG_RW_IA, I915_READ(GEN9_GAMT_ECO_REG_RW_IA) | + GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); + + /* WaPipelineFlushCoherentLines:icl */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN8_LQSC_FLUSH_COHERENT_LINES); + + /* Wa_1405543622:icl + * Formerly known as WaGAPZPriorityScheme + */ + I915_WRITE(GEN8_GARBCNTL, I915_READ(GEN8_GARBCNTL) | + GEN11_ARBITRATION_PRIO_ORDER_MASK); + + /* Wa_1604223664:icl + * Formerly known as WaL3BankAddressHashing + */ + I915_WRITE(GEN8_GARBCNTL, + (I915_READ(GEN8_GARBCNTL) & ~GEN11_HASH_CTRL_EXCL_MASK) | + GEN11_HASH_CTRL_EXCL_BIT0); + I915_WRITE(GEN11_GLBLINVL, + (I915_READ(GEN11_GLBLINVL) & ~GEN11_BANK_HASH_ADDR_EXCL_MASK) | + GEN11_BANK_HASH_ADDR_EXCL_BIT0); + + /* WaModifyGamTlbPartitioning:icl */ + I915_WRITE(GEN11_GACB_PERF_CTRL, + (I915_READ(GEN11_GACB_PERF_CTRL) & ~GEN11_HASH_CTRL_MASK) | + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + + /* Wa_1405733216:icl + * Formerly known as WaDisableCleanEvicts + */ + I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | + GEN11_LQSC_CLEAN_EVICT_DISABLE); + + /* Wa_1405766107:icl + * Formerly known as WaCL2SFHalfMaxAlloc + */ + I915_WRITE(GEN11_LSN_UNSLCVC, I915_READ(GEN11_LSN_UNSLCVC) | + GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC | + GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC); + + /* Wa_220166154:icl + * Formerly known as WaDisCtxReload + */ + I915_WRITE(GAMW_ECO_DEV_RW_IA_REG, I915_READ(GAMW_ECO_DEV_RW_IA_REG) | + GAMW_ECO_DEV_CTX_RELOAD_DISABLE); + + /* Wa_1405779004:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_A0)) + I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SLICE_UNIT_LEVEL_CLKGATE) | + MSCUNIT_CLKGATE_DIS); + + /* Wa_1406680159:icl */ + I915_WRITE(SUBSLICE_UNIT_LEVEL_CLKGATE, + I915_READ(SUBSLICE_UNIT_LEVEL_CLKGATE) | + GWUNIT_CLKGATE_DIS); + + /* Wa_1604302699:icl */ + I915_WRITE(GEN10_L3_CHICKEN_MODE_REGISTER, + I915_READ(GEN10_L3_CHICKEN_MODE_REGISTER) | + GEN11_I2M_WRITE_DISABLE); + + /* Wa_1406838659:icl (pre-prod) */ + if (IS_ICL_REVID(dev_priv, ICL_REVID_A0, ICL_REVID_B0)) + I915_WRITE(INF_UNIT_LEVEL_CLKGATE, + I915_READ(INF_UNIT_LEVEL_CLKGATE) | + CGPSF_CLKGATE_DIS); + + /* WaForwardProgressSoftReset:icl */ + I915_WRITE(GEN10_SCRATCH_LNCF2, + I915_READ(GEN10_SCRATCH_LNCF2) | + PMFLUSHDONE_LNICRSDROP | + PMFLUSH_GAPL3UNBLOCK | + PMFLUSHDONE_LNEBLK); +} + +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv) +{ + if (INTEL_GEN(dev_priv) < 8) + return; + else if (IS_BROADWELL(dev_priv)) + bdw_gt_workarounds_apply(dev_priv); + else if (IS_CHERRYVIEW(dev_priv)) + chv_gt_workarounds_apply(dev_priv); + else if (IS_SKYLAKE(dev_priv)) + skl_gt_workarounds_apply(dev_priv); + else if (IS_BROXTON(dev_priv)) + bxt_gt_workarounds_apply(dev_priv); + else if (IS_KABYLAKE(dev_priv)) + kbl_gt_workarounds_apply(dev_priv); + else if (IS_GEMINILAKE(dev_priv)) + glk_gt_workarounds_apply(dev_priv); + else if (IS_COFFEELAKE(dev_priv)) + cfl_gt_workarounds_apply(dev_priv); + else if (IS_CANNONLAKE(dev_priv)) + cnl_gt_workarounds_apply(dev_priv); + else if (IS_ICELAKE(dev_priv)) + icl_gt_workarounds_apply(dev_priv); + else + MISSING_CASE(INTEL_GEN(dev_priv)); +} + +struct whitelist { + i915_reg_t reg[RING_MAX_NONPRIV_SLOTS]; + unsigned int count; + u32 nopid; +}; + +static void whitelist_reg(struct whitelist *w, i915_reg_t reg) +{ + if (GEM_WARN_ON(w->count >= RING_MAX_NONPRIV_SLOTS)) + return; + + w->reg[w->count++] = reg; +} + +static void bdw_whitelist_build(struct whitelist *w) +{ +} + +static void chv_whitelist_build(struct whitelist *w) +{ +} + +static void gen9_whitelist_build(struct whitelist *w) +{ + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ + whitelist_reg(w, GEN9_CTX_PREEMPT_REG); + + /* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */ + whitelist_reg(w, GEN8_CS_CHICKEN1); + + /* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */ + whitelist_reg(w, GEN8_HDC_CHICKEN1); +} + +static void skl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:skl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void bxt_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); +} + +static void kbl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WaDisableLSQCROPERFforOCL:kbl */ + whitelist_reg(w, GEN8_L3SQCREG4); +} + +static void glk_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); + + /* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */ + whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1); +} + +static void cfl_whitelist_build(struct whitelist *w) +{ + gen9_whitelist_build(w); +} + +static void cnl_whitelist_build(struct whitelist *w) +{ + /* WaEnablePreemptionGranularityControlByUMD:cnl */ + whitelist_reg(w, GEN8_CS_CHICKEN1); +} + +static void icl_whitelist_build(struct whitelist *w) +{ +} + +static struct whitelist *whitelist_build(struct intel_engine_cs *engine, + struct whitelist *w) +{ + struct drm_i915_private *i915 = engine->i915; + + GEM_BUG_ON(engine->id != RCS); + + w->count = 0; + w->nopid = i915_mmio_reg_offset(RING_NOPID(engine->mmio_base)); + + if (INTEL_GEN(i915) < 8) + return NULL; + else if (IS_BROADWELL(i915)) + bdw_whitelist_build(w); + else if (IS_CHERRYVIEW(i915)) + chv_whitelist_build(w); + else if (IS_SKYLAKE(i915)) + skl_whitelist_build(w); + else if (IS_BROXTON(i915)) + bxt_whitelist_build(w); + else if (IS_KABYLAKE(i915)) + kbl_whitelist_build(w); + else if (IS_GEMINILAKE(i915)) + glk_whitelist_build(w); + else if (IS_COFFEELAKE(i915)) + cfl_whitelist_build(w); + else if (IS_CANNONLAKE(i915)) + cnl_whitelist_build(w); + else if (IS_ICELAKE(i915)) + icl_whitelist_build(w); + else + MISSING_CASE(INTEL_GEN(i915)); + + return w; +} + +static void whitelist_apply(struct intel_engine_cs *engine, + const struct whitelist *w) +{ + struct drm_i915_private *dev_priv = engine->i915; + const u32 base = engine->mmio_base; + unsigned int i; + + if (!w) + return; + + intel_uncore_forcewake_get(engine->i915, FORCEWAKE_ALL); + + for (i = 0; i < w->count; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), + i915_mmio_reg_offset(w->reg[i])); + + /* And clear the rest just in case of garbage */ + for (; i < RING_MAX_NONPRIV_SLOTS; i++) + I915_WRITE_FW(RING_FORCE_TO_NONPRIV(base, i), w->nopid); + + intel_uncore_forcewake_put(engine->i915, FORCEWAKE_ALL); +} + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine) +{ + struct whitelist w; + + whitelist_apply(engine, whitelist_build(engine, &w)); +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftests/intel_workarounds.c" +#endif diff --git a/drivers/gpu/drm/i915/intel_workarounds.h b/drivers/gpu/drm/i915/intel_workarounds.h new file mode 100644 index 0000000..b11d0623 --- /dev/null +++ b/drivers/gpu/drm/i915/intel_workarounds.h @@ -0,0 +1,17 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2014-2018 Intel Corporation + */ + +#ifndef _I915_WORKAROUNDS_H_ +#define _I915_WORKAROUNDS_H_ + +int intel_ctx_workarounds_init(struct drm_i915_private *dev_priv); +int intel_ctx_workarounds_emit(struct i915_request *rq); + +void intel_gt_workarounds_apply(struct drm_i915_private *dev_priv); + +void intel_whitelist_workarounds_apply(struct intel_engine_cs *engine); + +#endif diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c index 05bbef3..91c7291 100644 --- a/drivers/gpu/drm/i915/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/selftests/huge_pages.c @@ -1091,7 +1091,7 @@ static int __igt_write_huge(struct i915_gem_context *ctx, out_vma_unpin: i915_vma_unpin(vma); out_vma_close: - i915_vma_close(vma); + i915_vma_destroy(vma); return err; } @@ -1757,6 +1757,9 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *dev_priv) goto out_unlock; } + if (ctx->ppgtt) + ctx->ppgtt->base.scrub_64K = true; + err = i915_subtests(tests, ctx); out_unlock: diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 7ecaed5..ddb03f0 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -23,6 +23,7 @@ */ #include "../i915_selftest.h" +#include "igt_flush_test.h" #include "mock_drm.h" #include "huge_gem_object.h" @@ -411,6 +412,8 @@ static int igt_ctx_exec(void *arg) } out_unlock: + if (igt_flush_test(i915, I915_WAIT_LOCKED)) + err = -EIO; mutex_unlock(&i915->drm.struct_mutex); mock_file_free(i915, file); diff --git a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h index 9c76f03..a00e2bd 100644 --- a/drivers/gpu/drm/i915/selftests/i915_live_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_live_selftests.h @@ -11,6 +11,7 @@ */ selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */ selftest(uncore, intel_uncore_live_selftests) +selftest(workarounds, intel_workarounds_live_selftests) selftest(requests, i915_request_live_selftests) selftest(objects, i915_gem_object_live_selftests) selftest(dmabuf, i915_gem_dmabuf_live_selftests) @@ -20,4 +21,5 @@ selftest(evict, i915_gem_evict_live_selftests) selftest(hugepages, i915_gem_huge_page_live_selftests) selftest(contexts, i915_gem_context_live_selftests) selftest(hangcheck, intel_hangcheck_live_selftests) +selftest(execlists, intel_execlists_live_selftests) selftest(guc, intel_guc_live_selftest) diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h index 9a48aa44..d16d741 100644 --- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h +++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h @@ -14,6 +14,7 @@ selftest(fence, i915_sw_fence_mock_selftests) selftest(scatterlist, scatterlist_mock_selftests) selftest(syncmap, i915_syncmap_mock_selftests) selftest(uncore, intel_uncore_mock_selftests) +selftest(engine, intel_engine_cs_mock_selftests) selftest(breadcrumbs, intel_breadcrumbs_mock_selftests) selftest(timelines, i915_gem_timeline_mock_selftests) selftest(requests, i915_request_mock_selftests) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c b/drivers/gpu/drm/i915/selftests/i915_timeline.c index 3000e6a..19f1c6a 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_timeline.c +++ b/drivers/gpu/drm/i915/selftests/i915_timeline.c @@ -1,25 +1,7 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #include "../i915_selftest.h" @@ -35,21 +17,21 @@ struct __igt_sync { bool set; }; -static int __igt_sync(struct intel_timeline *tl, +static int __igt_sync(struct i915_timeline *tl, u64 ctx, const struct __igt_sync *p, const char *name) { int ret; - if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { + if (__i915_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) { pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n", name, p->name, ctx, p->seqno, yesno(p->expected)); return -EINVAL; } if (p->set) { - ret = __intel_timeline_sync_set(tl, ctx, p->seqno); + ret = __i915_timeline_sync_set(tl, ctx, p->seqno); if (ret) return ret; } @@ -77,37 +59,31 @@ static int igt_sync(void *arg) { "unwrap", UINT_MAX, true, false }, {}, }, *p; - struct intel_timeline *tl; + struct i915_timeline tl; int order, offset; int ret = -ENODEV; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; - + mock_timeline_init(&tl, 0); for (p = pass; p->name; p++) { for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; - ret = __igt_sync(tl, ctx, p, "1"); + ret = __igt_sync(&tl, ctx, p, "1"); if (ret) goto out; } } } - mock_timeline_destroy(tl); - - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_fini(&tl); + mock_timeline_init(&tl, 0); for (order = 1; order < 64; order++) { for (offset = -1; offset <= (order > 1); offset++) { u64 ctx = BIT_ULL(order) + offset; for (p = pass; p->name; p++) { - ret = __igt_sync(tl, ctx, p, "2"); + ret = __igt_sync(&tl, ctx, p, "2"); if (ret) goto out; } @@ -115,7 +91,7 @@ static int igt_sync(void *arg) } out: - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return ret; } @@ -127,15 +103,13 @@ static unsigned int random_engine(struct rnd_state *rnd) static int bench_sync(void *arg) { struct rnd_state prng; - struct intel_timeline *tl; + struct i915_timeline tl; unsigned long end_time, count; u64 prng32_1M; ktime_t kt; int order, last_order; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Lookups from cache are very fast and so the random number generation * and the loop itself becomes a significant factor in the per-iteration @@ -167,7 +141,7 @@ static int bench_sync(void *arg) do { u64 id = i915_prandom_u64_state(&prng); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); @@ -182,8 +156,8 @@ static int bench_sync(void *arg) while (end_time--) { u64 id = i915_prandom_u64_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, 0)) { - mock_timeline_destroy(tl); + if (!__i915_timeline_sync_is_later(&tl, id, 0)) { + mock_timeline_fini(&tl); pr_err("Lookup of %llu failed\n", id); return -EINVAL; } @@ -193,19 +167,17 @@ static int bench_sync(void *arg) pr_info("%s: %lu random lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark setting the first N (in order) contexts */ count = 0; kt = ktime_get(); end_time = jiffies + HZ/10; do { - __intel_timeline_sync_set(tl, count++, 0); + __i915_timeline_sync_set(&tl, count++, 0); } while (!time_after(jiffies, end_time)); kt = ktime_sub(ktime_get(), kt); pr_info("%s: %lu in-order insertions, %lluns/insert\n", @@ -215,9 +187,9 @@ static int bench_sync(void *arg) end_time = count; kt = ktime_get(); while (end_time--) { - if (!__intel_timeline_sync_is_later(tl, end_time, 0)) { + if (!__i915_timeline_sync_is_later(&tl, end_time, 0)) { pr_err("Lookup of %lu failed\n", end_time); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); return -EINVAL; } } @@ -225,12 +197,10 @@ static int bench_sync(void *arg) pr_info("%s: %lu in-order lookups, %lluns/lookup\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); /* Benchmark searching for a random context id and maybe changing it */ prandom_seed_state(&prng, i915_selftest.random_seed); @@ -241,8 +211,8 @@ static int bench_sync(void *arg) u32 id = random_engine(&prng); u32 seqno = prandom_u32_state(&prng); - if (!__intel_timeline_sync_is_later(tl, id, seqno)) - __intel_timeline_sync_set(tl, id, seqno); + if (!__i915_timeline_sync_is_later(&tl, id, seqno)) + __i915_timeline_sync_set(&tl, id, seqno); count++; } while (!time_after(jiffies, end_time)); @@ -250,7 +220,7 @@ static int bench_sync(void *arg) kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20); pr_info("%s: %lu repeated insert/lookups, %lluns/op\n", __func__, count, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); /* Benchmark searching for a known context id and changing the seqno */ @@ -258,9 +228,7 @@ static int bench_sync(void *arg) ({ int tmp = last_order; last_order = order; order += tmp; })) { unsigned int mask = BIT(order) - 1; - tl = mock_timeline(0); - if (!tl) - return -ENOMEM; + mock_timeline_init(&tl, 0); count = 0; kt = ktime_get(); @@ -272,8 +240,8 @@ static int bench_sync(void *arg) */ u64 id = (u64)(count & mask) << order; - __intel_timeline_sync_is_later(tl, id, 0); - __intel_timeline_sync_set(tl, id, 0); + __i915_timeline_sync_is_later(&tl, id, 0); + __i915_timeline_sync_set(&tl, id, 0); count++; } while (!time_after(jiffies, end_time)); @@ -281,7 +249,7 @@ static int bench_sync(void *arg) pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n", __func__, count, order, (long long)div64_ul(ktime_to_ns(kt), count)); - mock_timeline_destroy(tl); + mock_timeline_fini(&tl); cond_resched(); } diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index eb89e30..e90f972 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -81,7 +81,7 @@ checked_vma_instance(struct drm_i915_gem_object *obj, } if (i915_vma_compare(vma, vm, view)) { - pr_err("i915_vma_compare failed with create parmaters!\n"); + pr_err("i915_vma_compare failed with create parameters!\n"); return ERR_PTR(-EINVAL); } diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.c b/drivers/gpu/drm/i915/selftests/igt_flush_test.c new file mode 100644 index 0000000..0d06f55 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.c @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_drv.h" + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +struct wedge_me { + struct delayed_work work; + struct drm_i915_private *i915; + const void *symbol; +}; + +static void wedge_me(struct work_struct *work) +{ + struct wedge_me *w = container_of(work, typeof(*w), work.work); + + pr_err("%pS timed out, cancelling all further testing.\n", w->symbol); + + GEM_TRACE("%pS timed out.\n", w->symbol); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(w->i915); +} + +static void __init_wedge(struct wedge_me *w, + struct drm_i915_private *i915, + long timeout, + const void *symbol) +{ + w->i915 = i915; + w->symbol = symbol; + + INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); + schedule_delayed_work(&w->work, timeout); +} + +static void __fini_wedge(struct wedge_me *w) +{ + cancel_delayed_work_sync(&w->work); + destroy_delayed_work_on_stack(&w->work); + w->i915 = NULL; +} + +#define wedge_on_timeout(W, DEV, TIMEOUT) \ + for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ + (W)->i915; \ + __fini_wedge((W))) + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags) +{ + struct wedge_me w; + + cond_resched(); + + if (flags & I915_WAIT_LOCKED && + i915_gem_switch_to_kernel_context(i915)) { + pr_err("Failed to switch back to kernel context; declaring wedged\n"); + i915_gem_set_wedged(i915); + } + + wedge_on_timeout(&w, i915, HZ) + i915_gem_wait_for_idle(i915, flags); + + return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; +} diff --git a/drivers/gpu/drm/i915/selftests/igt_flush_test.h b/drivers/gpu/drm/i915/selftests/igt_flush_test.h new file mode 100644 index 0000000..63e0099 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/igt_flush_test.h @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#ifndef IGT_FLUSH_TEST_H +#define IGT_FLUSH_TEST_H + +struct drm_i915_private; + +int igt_flush_test(struct drm_i915_private *i915, unsigned int flags); + +#endif /* IGT_FLUSH_TEST_H */ diff --git a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c index 54fc571..d6926e7 100644 --- a/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/selftests/intel_breadcrumbs.c @@ -271,18 +271,13 @@ struct igt_wakeup { u32 seqno; }; -static int wait_atomic_timeout(atomic_t *p, unsigned int mode) -{ - return schedule_timeout(10 * HZ) ? 0 : -ETIMEDOUT; -} - static bool wait_for_ready(struct igt_wakeup *w) { DEFINE_WAIT(ready); set_bit(IDLE, &w->flags); if (atomic_dec_and_test(w->done)) - wake_up_atomic_t(w->done); + wake_up_var(w->done); if (test_bit(STOP, &w->flags)) goto out; @@ -299,7 +294,7 @@ static bool wait_for_ready(struct igt_wakeup *w) out: clear_bit(IDLE, &w->flags); if (atomic_dec_and_test(w->set)) - wake_up_atomic_t(w->set); + wake_up_var(w->set); return !test_bit(STOP, &w->flags); } @@ -342,7 +337,7 @@ static void igt_wake_all_sync(atomic_t *ready, atomic_set(ready, 0); wake_up_all(wq); - wait_on_atomic_t(set, atomic_t_wait, TASK_UNINTERRUPTIBLE); + wait_var_event(set, !atomic_read(set)); atomic_set(ready, count); atomic_set(done, count); } @@ -350,7 +345,6 @@ static void igt_wake_all_sync(atomic_t *ready, static int igt_wakeup(void *arg) { I915_RND_STATE(prng); - const int state = TASK_UNINTERRUPTIBLE; struct intel_engine_cs *engine = arg; struct igt_wakeup *waiters; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); @@ -418,10 +412,11 @@ static int igt_wakeup(void *arg) * that they are ready for the next test. We wait until all * threads are complete and waiting for us (i.e. not a seqno). */ - err = wait_on_atomic_t(&done, wait_atomic_timeout, state); - if (err) { + if (!wait_var_event_timeout(&done, + !atomic_read(&done), 10 * HZ)) { pr_err("Timed out waiting for %d remaining waiters\n", atomic_read(&done)); + err = -ETIMEDOUT; break; } diff --git a/drivers/gpu/drm/i915/selftests/intel_engine_cs.c b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c new file mode 100644 index 0000000..cfaa6b29 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_engine_cs.c @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +static int intel_mmio_bases_check(void *arg) +{ + int i, j; + + for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { + const struct engine_info *info = &intel_engines[i]; + char name[INTEL_ENGINE_CS_MAX_NAME]; + u8 prev = U8_MAX; + + __sprint_engine_name(name, info); + + for (j = 0; j < MAX_MMIO_BASES; j++) { + u8 gen = info->mmio_bases[j].gen; + u32 base = info->mmio_bases[j].base; + + if (gen >= prev) { + pr_err("%s: %s: mmio base for gen %x " + "is before the one for gen %x\n", + __func__, name, prev, gen); + return -EINVAL; + } + + if (gen == 0) + break; + + if (!base) { + pr_err("%s: %s: invalid mmio base (%x) " + "for gen %x at entry %u\n", + __func__, name, base, gen, j); + return -EINVAL; + } + + prev = gen; + } + + pr_info("%s: min gen supported for %s = %d\n", + __func__, name, prev); + } + + return 0; +} + +int intel_engine_cs_mock_selftests(void) +{ + static const struct i915_subtest tests[] = { + SUBTEST(intel_mmio_bases_check), + }; + + return i915_subtests(tests, NULL); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c index df7898c..438e0b0 100644 --- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c +++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c @@ -25,10 +25,14 @@ #include <linux/kthread.h> #include "../i915_selftest.h" +#include "i915_random.h" +#include "igt_flush_test.h" #include "mock_context.h" #include "mock_drm.h" +#define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ + struct hang { struct drm_i915_private *i915; struct drm_i915_gem_object *hws; @@ -250,58 +254,6 @@ static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); } -struct wedge_me { - struct delayed_work work; - struct drm_i915_private *i915; - const void *symbol; -}; - -static void wedge_me(struct work_struct *work) -{ - struct wedge_me *w = container_of(work, typeof(*w), work.work); - - pr_err("%pS timed out, cancelling all further testing.\n", - w->symbol); - i915_gem_set_wedged(w->i915); -} - -static void __init_wedge(struct wedge_me *w, - struct drm_i915_private *i915, - long timeout, - const void *symbol) -{ - w->i915 = i915; - w->symbol = symbol; - - INIT_DELAYED_WORK_ONSTACK(&w->work, wedge_me); - schedule_delayed_work(&w->work, timeout); -} - -static void __fini_wedge(struct wedge_me *w) -{ - cancel_delayed_work_sync(&w->work); - destroy_delayed_work_on_stack(&w->work); - w->i915 = NULL; -} - -#define wedge_on_timeout(W, DEV, TIMEOUT) \ - for (__init_wedge((W), (DEV), (TIMEOUT), __builtin_return_address(0)); \ - (W)->i915; \ - __fini_wedge((W))) - -static noinline int -flush_test(struct drm_i915_private *i915, unsigned int flags) -{ - struct wedge_me w; - - cond_resched(); - - wedge_on_timeout(&w, i915, HZ) - i915_gem_wait_for_idle(i915, flags); - - return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0; -} - static void hang_fini(struct hang *h) { *h->batch = MI_BATCH_BUFFER_END; @@ -315,10 +267,10 @@ static void hang_fini(struct hang *h) kernel_context_close(h->ctx); - flush_test(h->i915, I915_WAIT_LOCKED); + igt_flush_test(h->i915, I915_WAIT_LOCKED); } -static bool wait_for_hang(struct hang *h, struct i915_request *rq) +static bool wait_until_running(struct hang *h, struct i915_request *rq) { return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), rq->fence.seqno), @@ -433,7 +385,7 @@ static int igt_global_reset(void *arg) mutex_lock(&i915->drm.struct_mutex); reset_count = i915_reset_count(&i915->gpu_error); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915, ALL_ENGINES, NULL); if (i915_reset_count(&i915->gpu_error) == reset_count) { pr_err("No GPU reset recorded!\n"); @@ -450,6 +402,11 @@ static int igt_global_reset(void *arg) return err; } +static bool wait_for_idle(struct intel_engine_cs *engine) +{ + return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; +} + static int __igt_reset_engine(struct drm_i915_private *i915, bool active) { struct intel_engine_cs *engine; @@ -477,12 +434,21 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) if (active && !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + reset_count = i915_reset_count(&i915->gpu_error); reset_engine_count = i915_reset_engine_count(&i915->gpu_error, engine); set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { + u32 seqno = intel_engine_get_seqno(engine); + if (active) { struct i915_request *rq; @@ -498,7 +464,7 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", @@ -511,14 +477,12 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; i915_request_put(rq); } - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); - - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { pr_err("i915_reset_engine failed\n"); break; @@ -539,14 +503,25 @@ static int __igt_reset_engine(struct drm_i915_private *i915, bool active) break; } - engine->hangcheck.stalled = false; + if (!wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("%s failed to idle after reset\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -573,11 +548,25 @@ static int igt_reset_active_engine(void *arg) return __igt_reset_engine(arg, true); } +struct active_engine { + struct task_struct *task; + struct intel_engine_cs *engine; + unsigned long resets; + unsigned int flags; +}; + +#define TEST_ACTIVE BIT(0) +#define TEST_OTHERS BIT(1) +#define TEST_SELF BIT(2) +#define TEST_PRIORITY BIT(3) + static int active_engine(void *data) { - struct intel_engine_cs *engine = data; - struct i915_request *rq[2] = {}; - struct i915_gem_context *ctx[2]; + I915_RND_STATE(prng); + struct active_engine *arg = data; + struct intel_engine_cs *engine = arg->engine; + struct i915_request *rq[8] = {}; + struct i915_gem_context *ctx[ARRAY_SIZE(rq)]; struct drm_file *file; unsigned long count = 0; int err = 0; @@ -586,25 +575,20 @@ static int active_engine(void *data) if (IS_ERR(file)) return PTR_ERR(file); - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[0] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[0])) { - err = PTR_ERR(ctx[0]); - goto err_file; - } - - mutex_lock(&engine->i915->drm.struct_mutex); - ctx[1] = live_context(engine->i915, file); - mutex_unlock(&engine->i915->drm.struct_mutex); - if (IS_ERR(ctx[1])) { - err = PTR_ERR(ctx[1]); - i915_gem_context_put(ctx[0]); - goto err_file; + for (count = 0; count < ARRAY_SIZE(ctx); count++) { + mutex_lock(&engine->i915->drm.struct_mutex); + ctx[count] = live_context(engine->i915, file); + mutex_unlock(&engine->i915->drm.struct_mutex); + if (IS_ERR(ctx[count])) { + err = PTR_ERR(ctx[count]); + while (--count) + i915_gem_context_put(ctx[count]); + goto err_file; + } } while (!kthread_should_stop()) { - unsigned int idx = count++ & 1; + unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); struct i915_request *old = rq[idx]; struct i915_request *new; @@ -616,14 +600,28 @@ static int active_engine(void *data) break; } + if (arg->flags & TEST_PRIORITY) + ctx[idx]->sched.priority = + i915_prandom_u32_max_state(512, &prng); + rq[idx] = i915_request_get(new); i915_request_add(new); mutex_unlock(&engine->i915->drm.struct_mutex); if (old) { - i915_request_wait(old, 0, MAX_SCHEDULE_TIMEOUT); + if (i915_request_wait(old, 0, HZ) < 0) { + GEM_TRACE("%s timed out.\n", engine->name); + GEM_TRACE_DUMP(); + + i915_gem_set_wedged(engine->i915); + i915_request_put(old); + err = -EIO; + break; + } i915_request_put(old); } + + cond_resched(); } for (count = 0; count < ARRAY_SIZE(rq); count++) @@ -634,8 +632,9 @@ err_file: return err; } -static int __igt_reset_engine_others(struct drm_i915_private *i915, - bool active) +static int __igt_reset_engines(struct drm_i915_private *i915, + const char *test_name, + unsigned int flags) { struct intel_engine_cs *engine, *other; enum intel_engine_id id, tmp; @@ -649,50 +648,68 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, if (!intel_has_reset_engine(i915)) return 0; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); err = hang_init(&h, i915); mutex_unlock(&i915->drm.struct_mutex); if (err) return err; + + if (flags & TEST_PRIORITY) + h.ctx->sched.priority = 1024; } for_each_engine(engine, i915, id) { - struct task_struct *threads[I915_NUM_ENGINES] = {}; - unsigned long resets[I915_NUM_ENGINES]; + struct active_engine threads[I915_NUM_ENGINES] = {}; unsigned long global = i915_reset_count(&i915->gpu_error); - unsigned long count = 0; + unsigned long count = 0, reported; IGT_TIMEOUT(end_time); - if (active && !intel_engine_can_store_dword(engine)) + if (flags & TEST_ACTIVE && + !intel_engine_can_store_dword(engine)) continue; + if (!wait_for_idle(engine)) { + pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", + engine->name, test_name); + err = -EIO; + break; + } + memset(threads, 0, sizeof(threads)); for_each_engine(other, i915, tmp) { struct task_struct *tsk; - resets[tmp] = i915_reset_engine_count(&i915->gpu_error, - other); + threads[tmp].resets = + i915_reset_engine_count(&i915->gpu_error, + other); + + if (!(flags & TEST_OTHERS)) + continue; - if (other == engine) + if (other == engine && !(flags & TEST_SELF)) continue; - tsk = kthread_run(active_engine, other, + threads[tmp].engine = other; + threads[tmp].flags = flags; + + tsk = kthread_run(active_engine, &threads[tmp], "igt/%s", other->name); if (IS_ERR(tsk)) { err = PTR_ERR(tsk); goto unwind; } - threads[tmp] = tsk; + threads[tmp].task = tsk; get_task_struct(tsk); } set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); do { - if (active) { - struct i915_request *rq; + u32 seqno = intel_engine_get_seqno(engine); + struct i915_request *rq = NULL; + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); rq = hang_create_request(&h, engine); if (IS_ERR(rq)) { @@ -705,7 +722,7 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, __i915_request_add(rq, true); mutex_unlock(&i915->drm.struct_mutex); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", @@ -718,33 +735,48 @@ static int __igt_reset_engine_others(struct drm_i915_private *i915, break; } - i915_request_put(rq); + GEM_BUG_ON(!rq->global_seqno); + seqno = rq->global_seqno - 1; } - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = - intel_engine_get_seqno(engine); - - err = i915_reset_engine(engine, I915_RESET_QUIET); + err = i915_reset_engine(engine, NULL); if (err) { - pr_err("i915_reset_engine(%s:%s) failed, err=%d\n", - engine->name, active ? "active" : "idle", err); + pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", + engine->name, test_name, err); break; } - engine->hangcheck.stalled = false; count++; + + if (rq) { + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); + i915_request_put(rq); + } + + if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { + struct drm_printer p = + drm_info_printer(i915->drm.dev); + + pr_err("i915_reset_engine(%s:%s):" + " failed to idle after reset\n", + engine->name, test_name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -EIO; + break; + } } while (time_before(jiffies, end_time)); clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags); pr_info("i915_reset_engine(%s:%s): %lu resets\n", - engine->name, active ? "active" : "idle", count); - - if (i915_reset_engine_count(&i915->gpu_error, engine) - - resets[engine->id] != (active ? count : 0)) { - pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", - engine->name, active ? "active" : "idle", count, - i915_reset_engine_count(&i915->gpu_error, - engine) - resets[engine->id]); + engine->name, test_name, count); + + reported = i915_reset_engine_count(&i915->gpu_error, engine); + reported -= threads[engine->id].resets; + if (reported != (flags & TEST_ACTIVE ? count : 0)) { + pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu, expected %lu reported\n", + engine->name, test_name, count, reported, + (flags & TEST_ACTIVE ? count : 0)); if (!err) err = -EINVAL; } @@ -753,24 +785,26 @@ unwind: for_each_engine(other, i915, tmp) { int ret; - if (!threads[tmp]) + if (!threads[tmp].task) continue; - ret = kthread_stop(threads[tmp]); + ret = kthread_stop(threads[tmp].task); if (ret) { pr_err("kthread for other engine %s failed, err=%d\n", other->name, ret); if (!err) err = ret; } - put_task_struct(threads[tmp]); + put_task_struct(threads[tmp].task); - if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error, - other)) { + if (other != engine && + threads[tmp].resets != + i915_reset_engine_count(&i915->gpu_error, other)) { pr_err("Innocent engine %s was reset (count=%ld)\n", other->name, i915_reset_engine_count(&i915->gpu_error, - other) - resets[tmp]); + other) - + threads[tmp].resets); if (!err) err = -EINVAL; } @@ -786,7 +820,7 @@ unwind: if (err) break; - err = flush_test(i915, 0); + err = igt_flush_test(i915, 0); if (err) break; } @@ -794,7 +828,7 @@ unwind: if (i915_terminally_wedged(&i915->gpu_error)) err = -EIO; - if (active) { + if (flags & TEST_ACTIVE) { mutex_lock(&i915->drm.struct_mutex); hang_fini(&h); mutex_unlock(&i915->drm.struct_mutex); @@ -803,27 +837,56 @@ unwind: return err; } -static int igt_reset_idle_engine_others(void *arg) +static int igt_reset_engines(void *arg) { - return __igt_reset_engine_others(arg, false); -} + static const struct { + const char *name; + unsigned int flags; + } phases[] = { + { "idle", 0 }, + { "active", TEST_ACTIVE }, + { "others-idle", TEST_OTHERS }, + { "others-active", TEST_OTHERS | TEST_ACTIVE }, + { + "others-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY + }, + { + "self-priority", + TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, + }, + { } + }; + struct drm_i915_private *i915 = arg; + typeof(*phases) *p; + int err; -static int igt_reset_active_engine_others(void *arg) -{ - return __igt_reset_engine_others(arg, true); + for (p = phases; p->name; p++) { + if (p->flags & TEST_PRIORITY) { + if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) + continue; + } + + err = __igt_reset_engines(arg, p->name, p->flags); + if (err) + return err; + } + + return 0; } -static u32 fake_hangcheck(struct i915_request *rq) +static u32 fake_hangcheck(struct i915_request *rq, u32 mask) { - u32 reset_count; + struct i915_gpu_error *error = &rq->i915->gpu_error; + u32 reset_count = i915_reset_count(error); - rq->engine->hangcheck.stalled = true; - rq->engine->hangcheck.seqno = intel_engine_get_seqno(rq->engine); + error->stalled_mask = mask; - reset_count = i915_reset_count(&rq->i915->gpu_error); + /* set_bit() must be after we have setup the backchannel (mask) */ + smp_mb__before_atomic(); + set_bit(I915_RESET_HANDOFF, &error->flags); - set_bit(I915_RESET_HANDOFF, &rq->i915->gpu_error.flags); - wake_up_all(&rq->i915->gpu_error.wait_queue); + wake_up_all(&error->wait_queue); return reset_count; } @@ -858,21 +921,20 @@ static int igt_wait_reset(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; goto out_rq; } - reset_count = fake_hangcheck(rq); + reset_count = fake_hangcheck(rq, ALL_ENGINES); timeout = i915_request_wait(rq, I915_WAIT_LOCKED, 10); if (timeout < 0) { @@ -903,6 +965,23 @@ unlock: return err; } +static int wait_for_others(struct drm_i915_private *i915, + struct intel_engine_cs *exclude) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + + for_each_engine(engine, i915, id) { + if (engine == exclude) + continue; + + if (!wait_for_idle(engine)) + return -EIO; + } + + return 0; +} + static int igt_reset_queue(void *arg) { struct drm_i915_private *i915 = arg; @@ -951,27 +1030,49 @@ static int igt_reset_queue(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, prev)) { + /* + * XXX We don't handle resetting the kernel context + * very well. If we trigger a device reset twice in + * quick succession while the kernel context is + * executing, we may end up skipping the breadcrumb. + * This is really only a problem for the selftest as + * normally there is a large interlude between resets + * (hangcheck), or we focus on resetting just one + * engine and so avoid repeatedly resetting innocents. + */ + err = wait_for_others(i915, engine); + if (err) { + pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", + __func__, engine->name); + i915_request_put(rq); + i915_request_put(prev); + + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + goto fini; + } + + if (!wait_until_running(&h, prev)) { struct drm_printer p = drm_info_printer(i915->drm.dev); - pr_err("%s: Failed to start request %x, at %x\n", - __func__, prev->fence.seqno, hws_seqno(&h, prev)); - intel_engine_dump(prev->engine, &p, - "%s\n", prev->engine->name); + pr_err("%s(%s): Failed to start request %x, at %x\n", + __func__, engine->name, + prev->fence.seqno, hws_seqno(&h, prev)); + intel_engine_dump(engine, &p, + "%s\n", engine->name); i915_request_put(rq); i915_request_put(prev); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; goto fini; } - reset_count = fake_hangcheck(prev); + reset_count = fake_hangcheck(prev, ENGINE_MASK(id)); - i915_reset(i915, I915_RESET_QUIET); + i915_reset(i915, ENGINE_MASK(id), NULL); GEM_BUG_ON(test_bit(I915_RESET_HANDOFF, &i915->gpu_error.flags)); @@ -1013,7 +1114,7 @@ static int igt_reset_queue(void *arg) i915_request_put(prev); - err = flush_test(i915, I915_WAIT_LOCKED); + err = igt_flush_test(i915, I915_WAIT_LOCKED); if (err) break; } @@ -1044,7 +1145,7 @@ static int igt_handle_error(void *arg) if (!intel_has_reset_engine(i915)) return 0; - if (!intel_engine_can_store_dword(i915->engine[RCS])) + if (!engine || !intel_engine_can_store_dword(engine)) return 0; mutex_lock(&i915->drm.struct_mutex); @@ -1062,14 +1163,13 @@ static int igt_handle_error(void *arg) i915_request_get(rq); __i915_request_add(rq, true); - if (!wait_for_hang(&h, rq)) { + if (!wait_until_running(&h, rq)) { struct drm_printer p = drm_info_printer(i915->drm.dev); pr_err("%s: Failed to start request %x, at %x\n", __func__, rq->fence.seqno, hws_seqno(&h, rq)); intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); - i915_reset(i915, 0); i915_gem_set_wedged(i915); err = -EIO; @@ -1081,10 +1181,7 @@ static int igt_handle_error(void *arg) /* Temporarily disable error capture */ error = xchg(&i915->gpu_error.first_error, (void *)-1); - engine->hangcheck.stalled = true; - engine->hangcheck.seqno = intel_engine_get_seqno(engine); - - i915_handle_error(i915, intel_engine_flag(engine), "%s", __func__); + i915_handle_error(i915, ENGINE_MASK(engine->id), 0, NULL); xchg(&i915->gpu_error.first_error, error); @@ -1112,8 +1209,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_hang_sanitycheck), SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), - SUBTEST(igt_reset_idle_engine_others), - SUBTEST(igt_reset_active_engine_others), + SUBTEST(igt_reset_engines), SUBTEST(igt_wait_reset), SUBTEST(igt_reset_queue), SUBTEST(igt_handle_error), @@ -1129,6 +1225,10 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) err = i915_subtests(tests, i915); + mutex_lock(&i915->drm.struct_mutex); + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + i915_modparams.enable_hangcheck = saved_hangcheck; intel_runtime_pm_put(i915); diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c b/drivers/gpu/drm/i915/selftests/intel_lrc.c new file mode 100644 index 0000000..1b8a071 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c @@ -0,0 +1,459 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" +#include "igt_flush_test.h" + +#include "mock_context.h" + +struct spinner { + struct drm_i915_private *i915; + struct drm_i915_gem_object *hws; + struct drm_i915_gem_object *obj; + u32 *batch; + void *seqno; +}; + +static int spinner_init(struct spinner *spin, struct drm_i915_private *i915) +{ + unsigned int mode; + void *vaddr; + int err; + + GEM_BUG_ON(INTEL_GEN(i915) < 8); + + memset(spin, 0, sizeof(*spin)); + spin->i915 = i915; + + spin->hws = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(spin->hws)) { + err = PTR_ERR(spin->hws); + goto err; + } + + spin->obj = i915_gem_object_create_internal(i915, PAGE_SIZE); + if (IS_ERR(spin->obj)) { + err = PTR_ERR(spin->obj); + goto err_hws; + } + + i915_gem_object_set_cache_level(spin->hws, I915_CACHE_LLC); + vaddr = i915_gem_object_pin_map(spin->hws, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + spin->seqno = memset(vaddr, 0xff, PAGE_SIZE); + + mode = HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; + vaddr = i915_gem_object_pin_map(spin->obj, mode); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_unpin_hws; + } + spin->batch = vaddr; + + return 0; + +err_unpin_hws: + i915_gem_object_unpin_map(spin->hws); +err_obj: + i915_gem_object_put(spin->obj); +err_hws: + i915_gem_object_put(spin->hws); +err: + return err; +} + +static unsigned int seqno_offset(u64 fence) +{ + return offset_in_page(sizeof(u32) * fence); +} + +static u64 hws_address(const struct i915_vma *hws, + const struct i915_request *rq) +{ + return hws->node.start + seqno_offset(rq->fence.context); +} + +static int emit_recurse_batch(struct spinner *spin, + struct i915_request *rq, + u32 arbitration_command) +{ + struct i915_address_space *vm = &rq->ctx->ppgtt->base; + struct i915_vma *hws, *vma; + u32 *batch; + int err; + + vma = i915_vma_instance(spin->obj, vm, NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + hws = i915_vma_instance(spin->hws, vm, NULL); + if (IS_ERR(hws)) + return PTR_ERR(hws); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + + err = i915_vma_pin(hws, 0, 0, PIN_USER); + if (err) + goto unpin_vma; + + i915_vma_move_to_active(vma, rq, 0); + if (!i915_gem_object_has_active_reference(vma->obj)) { + i915_gem_object_get(vma->obj); + i915_gem_object_set_active_reference(vma->obj); + } + + i915_vma_move_to_active(hws, rq, 0); + if (!i915_gem_object_has_active_reference(hws->obj)) { + i915_gem_object_get(hws->obj); + i915_gem_object_set_active_reference(hws->obj); + } + + batch = spin->batch; + + *batch++ = MI_STORE_DWORD_IMM_GEN4; + *batch++ = lower_32_bits(hws_address(hws, rq)); + *batch++ = upper_32_bits(hws_address(hws, rq)); + *batch++ = rq->fence.seqno; + + *batch++ = arbitration_command; + + *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; + *batch++ = lower_32_bits(vma->node.start); + *batch++ = upper_32_bits(vma->node.start); + *batch++ = MI_BATCH_BUFFER_END; /* not reached */ + + i915_gem_chipset_flush(spin->i915); + + err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, 0); + + i915_vma_unpin(hws); +unpin_vma: + i915_vma_unpin(vma); + return err; +} + +static struct i915_request * +spinner_create_request(struct spinner *spin, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 arbitration_command) +{ + struct i915_request *rq; + int err; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) + return rq; + + err = emit_recurse_batch(spin, rq, arbitration_command); + if (err) { + __i915_request_add(rq, false); + return ERR_PTR(err); + } + + return rq; +} + +static u32 hws_seqno(const struct spinner *spin, const struct i915_request *rq) +{ + u32 *seqno = spin->seqno + seqno_offset(rq->fence.context); + + return READ_ONCE(*seqno); +} + +static void spinner_end(struct spinner *spin) +{ + *spin->batch = MI_BATCH_BUFFER_END; + i915_gem_chipset_flush(spin->i915); +} + +static void spinner_fini(struct spinner *spin) +{ + spinner_end(spin); + + i915_gem_object_unpin_map(spin->obj); + i915_gem_object_put(spin->obj); + + i915_gem_object_unpin_map(spin->hws); + i915_gem_object_put(spin->hws); +} + +static bool wait_for_spinner(struct spinner *spin, struct i915_request *rq) +{ + if (!wait_event_timeout(rq->execute, + READ_ONCE(rq->global_seqno), + msecs_to_jiffies(10))) + return false; + + return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), + rq->fence.seqno), + 10) && + wait_for(i915_seqno_passed(hws_seqno(spin, rq), + rq->fence.seqno), + 1000)); +} + +static int live_sanitycheck(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine; + struct i915_gem_context *ctx; + enum intel_engine_id id; + struct spinner spin; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_CONTEXTS(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin, i915)) + goto err_unlock; + + ctx = kernel_context(i915); + if (!ctx) + goto err_spin; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin, ctx, engine, MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin, rq)) { + GEM_TRACE("spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx; + } + + spinner_end(&spin); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx; + } + } + + err = 0; +err_ctx: + kernel_context_close(ctx); +err_spin: + spinner_fini(&spin); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = I915_CONTEXT_MAX_USER_PRIORITY; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = I915_CONTEXT_MIN_USER_PRIORITY; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; + } + + spinner_end(&spin_hi); + spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + spinner_fini(&spin_lo); +err_spin_hi: + spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; +} + +static int live_late_preempt(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; + enum intel_engine_id id; + int err = -ENOMEM; + + if (!HAS_LOGICAL_RING_PREEMPTION(i915)) + return 0; + + mutex_lock(&i915->drm.struct_mutex); + + if (spinner_init(&spin_hi, i915)) + goto err_unlock; + + if (spinner_init(&spin_lo, i915)) + goto err_spin_hi; + + ctx_hi = kernel_context(i915); + if (!ctx_hi) + goto err_spin_lo; + + ctx_lo = kernel_context(i915); + if (!ctx_lo) + goto err_ctx_hi; + + for_each_engine(engine, i915, id) { + struct i915_request *rq; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!wait_for_spinner(&spin_lo, rq)) { + pr_err("First context failed to start\n"); + goto err_wedged; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, MI_NOOP); + if (IS_ERR(rq)) { + spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (wait_for_spinner(&spin_hi, rq)) { + pr_err("Second context overtook first?\n"); + goto err_wedged; + } + + attr.priority = I915_PRIORITY_MAX; + engine->schedule(rq, &attr); + + if (!wait_for_spinner(&spin_hi, rq)) { + pr_err("High priority context failed to preempt the low priority context\n"); + GEM_TRACE_DUMP(); + goto err_wedged; + } + + spinner_end(&spin_hi); + spinner_end(&spin_lo); + if (igt_flush_test(i915, I915_WAIT_LOCKED)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + spinner_fini(&spin_lo); +err_spin_hi: + spinner_fini(&spin_hi); +err_unlock: + igt_flush_test(i915, I915_WAIT_LOCKED); + mutex_unlock(&i915->drm.struct_mutex); + return err; + +err_wedged: + spinner_end(&spin_hi); + spinner_end(&spin_lo); + i915_gem_set_wedged(i915); + err = -EIO; + goto err_ctx_lo; +} + +int intel_execlists_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_preempt), + SUBTEST(live_late_preempt), + }; + + if (!HAS_EXECLISTS(i915)) + return 0; + + return i915_subtests(tests, i915); +} diff --git a/drivers/gpu/drm/i915/selftests/intel_workarounds.c b/drivers/gpu/drm/i915/selftests/intel_workarounds.c new file mode 100644 index 0000000..17444a3 --- /dev/null +++ b/drivers/gpu/drm/i915/selftests/intel_workarounds.c @@ -0,0 +1,291 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include "../i915_selftest.h" + +#include "mock_context.h" + +static struct drm_i915_gem_object * +read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *result; + struct i915_request *rq; + struct i915_vma *vma; + const u32 base = engine->mmio_base; + u32 srm, *cs; + int err; + int i; + + result = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); + if (IS_ERR(result)) + return result; + + i915_gem_object_set_cache_level(result, I915_CACHE_LLC); + + cs = i915_gem_object_pin_map(result, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + memset(cs, 0xc5, PAGE_SIZE); + i915_gem_object_unpin_map(result); + + vma = i915_vma_instance(result, &engine->i915->ggtt.base, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_obj; + + rq = i915_request_alloc(engine, ctx); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_pin; + } + + srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; + if (INTEL_GEN(ctx->i915) >= 8) + srm++; + + cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_req; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + *cs++ = srm; + *cs++ = i915_mmio_reg_offset(RING_FORCE_TO_NONPRIV(base, i)); + *cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i; + *cs++ = 0; + } + intel_ring_advance(rq, cs); + + i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + reservation_object_lock(vma->resv, NULL); + reservation_object_add_excl_fence(vma->resv, &rq->fence); + reservation_object_unlock(vma->resv); + + i915_gem_object_get(result); + i915_gem_object_set_active_reference(result); + + __i915_request_add(rq, true); + i915_vma_unpin(vma); + + return result; + +err_req: + i915_request_add(rq); +err_pin: + i915_vma_unpin(vma); +err_obj: + i915_gem_object_put(result); + return ERR_PTR(err); +} + +static u32 get_whitelist_reg(const struct whitelist *w, unsigned int i) +{ + return i < w->count ? i915_mmio_reg_offset(w->reg[i]) : w->nopid; +} + +static void print_results(const struct whitelist *w, const u32 *results) +{ + unsigned int i; + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = results[i]; + + pr_info("RING_NONPRIV[%d]: expected 0x%08x, found 0x%08x\n", + i, expected, actual); + } +} + +static int check_whitelist(const struct whitelist *w, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine) +{ + struct drm_i915_gem_object *results; + u32 *vaddr; + int err; + int i; + + results = read_nonprivs(ctx, engine); + if (IS_ERR(results)) + return PTR_ERR(results); + + err = i915_gem_object_set_to_cpu_domain(results, false); + if (err) + goto out_put; + + vaddr = i915_gem_object_pin_map(results, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_put; + } + + for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) { + u32 expected = get_whitelist_reg(w, i); + u32 actual = vaddr[i]; + + if (expected != actual) { + print_results(w, vaddr); + pr_err("Invalid RING_NONPRIV[%d], expected 0x%08x, found 0x%08x\n", + i, expected, actual); + + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(results); +out_put: + i915_gem_object_put(results); + return err; +} + +static int do_device_reset(struct intel_engine_cs *engine) +{ + i915_reset(engine->i915, ENGINE_MASK(engine->id), NULL); + return 0; +} + +static int do_engine_reset(struct intel_engine_cs *engine) +{ + return i915_reset_engine(engine, NULL); +} + +static int switch_to_scratch_context(struct intel_engine_cs *engine) +{ + struct i915_gem_context *ctx; + struct i915_request *rq; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + rq = i915_request_alloc(engine, ctx); + kernel_context_close(ctx); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + i915_request_add(rq); + + return 0; +} + +static int check_whitelist_across_reset(struct intel_engine_cs *engine, + int (*reset)(struct intel_engine_cs *), + const struct whitelist *w, + const char *name) +{ + struct i915_gem_context *ctx; + int err; + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *before* %s reset!\n", name); + goto out; + } + + err = switch_to_scratch_context(engine); + if (err) + goto out; + + err = reset(engine); + if (err) { + pr_err("%s reset failed\n", name); + goto out; + } + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Whitelist not preserved in context across %s reset!\n", + name); + goto out; + } + + kernel_context_close(ctx); + + ctx = kernel_context(engine->i915); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + err = check_whitelist(w, ctx, engine); + if (err) { + pr_err("Invalid whitelist *after* %s reset in fresh context!\n", + name); + goto out; + } + +out: + kernel_context_close(ctx); + return err; +} + +static int live_reset_whitelist(void *arg) +{ + struct drm_i915_private *i915 = arg; + struct intel_engine_cs *engine = i915->engine[RCS]; + struct i915_gpu_error *error = &i915->gpu_error; + struct whitelist w; + int err = 0; + + /* If we reset the gpu, we should not lose the RING_NONPRIV */ + + if (!engine) + return 0; + + if (!whitelist_build(engine, &w)) + return 0; + + pr_info("Checking %d whitelisted registers (RING_NONPRIV)\n", w.count); + + set_bit(I915_RESET_BACKOFF, &error->flags); + set_bit(I915_RESET_ENGINE + engine->id, &error->flags); + + if (intel_has_reset_engine(i915)) { + err = check_whitelist_across_reset(engine, + do_engine_reset, &w, + "engine"); + if (err) + goto out; + } + + if (intel_has_gpu_reset(i915)) { + err = check_whitelist_across_reset(engine, + do_device_reset, &w, + "device"); + if (err) + goto out; + } + +out: + clear_bit(I915_RESET_ENGINE + engine->id, &error->flags); + clear_bit(I915_RESET_BACKOFF, &error->flags); + return err; +} + +int intel_workarounds_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_reset_whitelist), + }; + int err; + + mutex_lock(&i915->drm.struct_mutex); + err = i915_subtests(tests, i915); + mutex_unlock(&i915->drm.struct_mutex); + + return err; +} diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c index 78a89ef..26bf29d 100644 --- a/drivers/gpu/drm/i915/selftests/mock_engine.c +++ b/drivers/gpu/drm/i915/selftests/mock_engine.c @@ -25,6 +25,11 @@ #include "mock_engine.h" #include "mock_request.h" +struct mock_ring { + struct intel_ring base; + struct i915_timeline timeline; +}; + static struct mock_request *first_request(struct mock_engine *engine) { return list_first_entry_or_null(&engine->hw_queue, @@ -71,14 +76,21 @@ static struct intel_ring * mock_context_pin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_get(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!ce->pin_count++) + i915_gem_context_get(ctx); + return engine->buffer; } static void mock_context_unpin(struct intel_engine_cs *engine, struct i915_gem_context *ctx) { - i915_gem_context_put(ctx); + struct intel_context *ce = to_intel_context(ctx, engine); + + if (!--ce->pin_count) + i915_gem_context_put(ctx); } static int mock_request_alloc(struct i915_request *request) @@ -125,7 +137,7 @@ static void mock_submit_request(struct i915_request *request) static struct intel_ring *mock_ring(struct intel_engine_cs *engine) { const unsigned long sz = PAGE_SIZE / 2; - struct intel_ring *ring; + struct mock_ring *ring; BUILD_BUG_ON(MIN_SPACE_FOR_ADD_REQUEST > sz); @@ -133,14 +145,25 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine) if (!ring) return NULL; - ring->size = sz; - ring->effective_size = sz; - ring->vaddr = (void *)(ring + 1); + i915_timeline_init(engine->i915, &ring->timeline, engine->name); + + ring->base.size = sz; + ring->base.effective_size = sz; + ring->base.vaddr = (void *)(ring + 1); + ring->base.timeline = &ring->timeline; - INIT_LIST_HEAD(&ring->request_list); - intel_ring_update_space(ring); + INIT_LIST_HEAD(&ring->base.request_list); + intel_ring_update_space(&ring->base); - return ring; + return &ring->base; +} + +static void mock_ring_free(struct intel_ring *base) +{ + struct mock_ring *ring = container_of(base, typeof(*ring), base); + + i915_timeline_fini(&ring->timeline); + kfree(ring); } struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, @@ -155,12 +178,6 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, if (!engine) return NULL; - engine->base.buffer = mock_ring(&engine->base); - if (!engine->base.buffer) { - kfree(engine); - return NULL; - } - /* minimal engine setup for requests */ engine->base.i915 = i915; snprintf(engine->base.name, sizeof(engine->base.name), "%s", name); @@ -174,9 +191,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, engine->base.emit_breadcrumb = mock_emit_breadcrumb; engine->base.submit_request = mock_submit_request; - engine->base.timeline = - &i915->gt.global_timeline.engine[engine->base.id]; - + i915_timeline_init(i915, &engine->base.timeline, engine->base.name); intel_engine_init_breadcrumbs(&engine->base); engine->base.breadcrumbs.mock = true; /* prevent touching HW for irqs */ @@ -185,7 +200,17 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915, timer_setup(&engine->hw_delay, hw_delay_complete, 0); INIT_LIST_HEAD(&engine->hw_queue); + engine->base.buffer = mock_ring(&engine->base); + if (!engine->base.buffer) + goto err_breadcrumbs; + return &engine->base; + +err_breadcrumbs: + intel_engine_fini_breadcrumbs(&engine->base); + i915_timeline_fini(&engine->base.timeline); + kfree(engine); + return NULL; } void mock_engine_flush(struct intel_engine_cs *engine) @@ -217,10 +242,12 @@ void mock_engine_free(struct intel_engine_cs *engine) GEM_BUG_ON(timer_pending(&mock->hw_delay)); if (engine->last_retired_context) - engine->context_unpin(engine, engine->last_retired_context); + intel_context_unpin(engine->last_retired_context, engine); + + mock_ring_free(engine->buffer); intel_engine_fini_breadcrumbs(engine); + i915_timeline_fini(&engine->timeline); - kfree(engine->buffer); kfree(engine); } diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e6d4b88..94baedf 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -44,6 +44,7 @@ void mock_device_flush(struct drm_i915_private *i915) mock_engine_flush(engine); i915_retire_requests(i915); + GEM_BUG_ON(i915->gt.active_requests); } static void mock_device_release(struct drm_device *dev) @@ -72,8 +73,8 @@ static void mock_device_release(struct drm_device *dev) mutex_lock(&i915->drm.struct_mutex); mock_fini_ggtt(i915); - i915_gem_timeline_fini(&i915->gt.global_timeline); mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(!list_empty(&i915->gt.timelines)); destroy_workqueue(i915->wq); @@ -223,26 +224,25 @@ struct drm_i915_private *mock_gem_device(void) if (!i915->priorities) goto err_dependencies; - mutex_lock(&i915->drm.struct_mutex); INIT_LIST_HEAD(&i915->gt.timelines); - err = i915_gem_timeline_init__global(i915); - if (err) { - mutex_unlock(&i915->drm.struct_mutex); - goto err_priorities; - } + INIT_LIST_HEAD(&i915->gt.active_rings); + INIT_LIST_HEAD(&i915->gt.closed_vma); + + mutex_lock(&i915->drm.struct_mutex); mock_init_ggtt(i915); - mutex_unlock(&i915->drm.struct_mutex); mkwrite_device_info(i915)->ring_mask = BIT(0); i915->engine[RCS] = mock_engine(i915, "mock", RCS); if (!i915->engine[RCS]) - goto err_priorities; + goto err_unlock; i915->kernel_context = mock_context(i915, NULL); if (!i915->kernel_context) goto err_engine; + mutex_unlock(&i915->drm.struct_mutex); + WARN_ON(i915_gemfs_init(i915)); return i915; @@ -250,7 +250,8 @@ struct drm_i915_private *mock_gem_device(void) err_engine: for_each_engine(engine, i915, id) mock_engine_free(engine); -err_priorities: +err_unlock: + mutex_unlock(&i915->drm.struct_mutex); kmem_cache_destroy(i915->priorities); err_dependencies: kmem_cache_destroy(i915->dependencies); diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c index e96873f..36c1120 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c @@ -76,7 +76,6 @@ mock_ppgtt(struct drm_i915_private *i915, INIT_LIST_HEAD(&ppgtt->base.global_link); drm_mm_init(&ppgtt->base.mm, 0, ppgtt->base.total); - i915_gem_timeline_init(i915, &ppgtt->base.timeline, name); ppgtt->base.clear_range = nop_clear_range; ppgtt->base.insert_page = mock_insert_page; diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.c b/drivers/gpu/drm/i915/selftests/mock_timeline.c index 47b1f47..dcf3b16 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.c +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.c @@ -1,45 +1,28 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ +#include "../i915_timeline.h" + #include "mock_timeline.h" -struct intel_timeline *mock_timeline(u64 context) +void mock_timeline_init(struct i915_timeline *timeline, u64 context) { - static struct lock_class_key class; - struct intel_timeline *tl; + timeline->fence_context = context; + + spin_lock_init(&timeline->lock); - tl = kzalloc(sizeof(*tl), GFP_KERNEL); - if (!tl) - return NULL; + init_request_active(&timeline->last_request, NULL); + INIT_LIST_HEAD(&timeline->requests); - __intel_timeline_init(tl, NULL, context, &class, "mock"); + i915_syncmap_init(&timeline->sync); - return tl; + INIT_LIST_HEAD(&timeline->link); } -void mock_timeline_destroy(struct intel_timeline *tl) +void mock_timeline_fini(struct i915_timeline *timeline) { - __intel_timeline_fini(tl); - kfree(tl); + i915_timeline_fini(timeline); } diff --git a/drivers/gpu/drm/i915/selftests/mock_timeline.h b/drivers/gpu/drm/i915/selftests/mock_timeline.h index c27ff46..b6deaa6 100644 --- a/drivers/gpu/drm/i915/selftests/mock_timeline.h +++ b/drivers/gpu/drm/i915/selftests/mock_timeline.h @@ -1,33 +1,15 @@ /* - * Copyright © 2017 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. + * SPDX-License-Identifier: MIT * + * Copyright © 2017-2018 Intel Corporation */ #ifndef __MOCK_TIMELINE__ #define __MOCK_TIMELINE__ -#include "../i915_gem_timeline.h" +struct i915_timeline; -struct intel_timeline *mock_timeline(u64 context); -void mock_timeline_destroy(struct intel_timeline *tl); +void mock_timeline_init(struct i915_timeline *timeline, u64 context); +void mock_timeline_fini(struct i915_timeline *timeline); #endif /* !__MOCK_TIMELINE__ */ diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 294de45..119ec0a 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -11,6 +11,7 @@ config DRM_MEDIATEK select DRM_PANEL select MEMORY select MTK_SMI + select VIDEOMODE_HELPERS help Choose this option if you have a Mediatek SoCs. The module will be called mediatek-drm diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e80a603..6c0ea39 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -22,6 +22,7 @@ #include <linux/interrupt.h> #include <linux/types.h> #include <linux/clk.h> +#include <video/videomode.h> #include "mtk_dpi_regs.h" #include "mtk_drm_ddp_comp.h" @@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, struct mtk_dpi_sync_param vsync_leven = { 0 }; struct mtk_dpi_sync_param vsync_rodd = { 0 }; struct mtk_dpi_sync_param vsync_reven = { 0 }; - unsigned long pix_rate; + struct videomode vm = { 0 }; unsigned long pll_rate; unsigned int factor; /* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */ - pix_rate = 1000UL * mode->clock; + if (mode->clock <= 27000) - factor = 16 * 3; + factor = 3 << 4; else if (mode->clock <= 84000) - factor = 8 * 3; + factor = 3 << 3; else if (mode->clock <= 167000) - factor = 4 * 3; + factor = 3 << 2; else - factor = 2 * 3; - pll_rate = pix_rate * factor; + factor = 3 << 1; + drm_display_mode_to_videomode(mode, &vm); + pll_rate = vm.pixelclock * factor; dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); clk_set_rate(dpi->tvd_clk, pll_rate); pll_rate = clk_get_rate(dpi->tvd_clk); - pix_rate = pll_rate / factor; - clk_set_rate(dpi->pixel_clk, pix_rate); - pix_rate = clk_get_rate(dpi->pixel_clk); + vm.pixelclock = pll_rate / factor; + clk_set_rate(dpi->pixel_clk, vm.pixelclock); + vm.pixelclock = clk_get_rate(dpi->pixel_clk); dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n", - pll_rate, pix_rate); + pll_rate, vm.pixelclock); limit.c_bottom = 0x0010; limit.c_top = 0x0FE0; @@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING; dpi_pol.de_pol = MTK_DPI_POLARITY_RISING; - dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ? + dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ? + dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ? MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING; - - hsync.sync_width = mode->hsync_end - mode->hsync_start; - hsync.back_porch = mode->htotal - mode->hsync_end; - hsync.front_porch = mode->hsync_start - mode->hdisplay; + hsync.sync_width = vm.hsync_len; + hsync.back_porch = vm.hback_porch; + hsync.front_porch = vm.hfront_porch; hsync.shift_half_line = false; - - vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start; - vsync_lodd.back_porch = mode->vtotal - mode->vsync_end; - vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay; + vsync_lodd.sync_width = vm.vsync_len; + vsync_lodd.back_porch = vm.vback_porch; + vsync_lodd.front_porch = vm.vfront_porch; vsync_lodd.shift_half_line = false; - if (mode->flags & DRM_MODE_FLAG_INTERLACE && + if (vm.flags & DISPLAY_FLAGS_INTERLACED && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_leven = vsync_lodd; vsync_rodd = vsync_lodd; vsync_reven = vsync_lodd; vsync_leven.shift_half_line = true; vsync_reven.shift_half_line = true; - } else if (mode->flags & DRM_MODE_FLAG_INTERLACE && + } else if (vm.flags & DISPLAY_FLAGS_INTERLACED && !(mode->flags & DRM_MODE_FLAG_3D_MASK)) { vsync_leven = vsync_lodd; vsync_leven.shift_half_line = true; - } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) && + } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) && mode->flags & DRM_MODE_FLAG_3D_MASK) { vsync_rodd = vsync_lodd; } @@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi, mtk_dpi_config_vsync_reven(dpi, &vsync_reven); mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK)); - mtk_dpi_config_interface(dpi, !!(mode->flags & - DRM_MODE_FLAG_INTERLACE)); - if (mode->flags & DRM_MODE_FLAG_INTERLACE) - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2); + mtk_dpi_config_interface(dpi, !!(vm.flags & + DISPLAY_FLAGS_INTERLACED)); + if (vm.flags & DISPLAY_FLAGS_INTERLACED) + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1); else - mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay); + mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive); mtk_dpi_config_channel_limit(dpi, &limit); mtk_dpi_config_bit_num(dpi, dpi->bit_num); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c index f595ac8..259b7b0 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c @@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev, mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size); if (IS_ERR(mtk_gem)) - return ERR_PTR(PTR_ERR(mtk_gem)); + return ERR_CAST(mtk_gem); expected = sg_dma_address(sg->sgl); for_each_sg(sg->sgl, s, sg->nents, i) { diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index 7e5e24c..aa0943e 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) } /** - * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000 * htotal_time = htotal * byte_per_pixel / num_lanes * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit * mipi_ratio = (htotal_time + overhead_time) / htotal_time * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes; */ - pixel_clock = dsi->vm.pixelclock * 1000; + pixel_clock = dsi->vm.pixelclock; htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch + dsi->vm.hsync_len; htotal_bits = htotal * bit_per_pixel; @@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder, { struct mtk_dsi *dsi = encoder_to_dsi(encoder); - dsi->vm.pixelclock = adjusted->clock; - dsi->vm.hactive = adjusted->hdisplay; - dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end; - dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay; - dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start; - - dsi->vm.vactive = adjusted->vdisplay; - dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end; - dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay; - dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start; + drm_display_mode_to_videomode(adjusted, &dsi->vm); } static void mtk_dsi_encoder_disable(struct drm_encoder *encoder) diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index fb50a9d..8918539 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -1586,7 +1586,7 @@ static uint32_t mga_vga_calculate_mode_bandwidth(struct drm_display_mode *mode, #define MODE_BANDWIDTH MODE_BAD -static int mga_vga_mode_valid(struct drm_connector *connector, +static enum drm_mode_status mga_vga_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a4f68af..d39400e 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -89,14 +89,14 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname) */ if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) { ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID, - mem_region, mem_phys, mem_size); + mem_region, mem_phys, mem_size, NULL); } else { char newname[strlen("qcom/") + strlen(fwname) + 1]; sprintf(newname, "qcom/%s", fwname); ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID, - mem_region, mem_phys, mem_size); + mem_region, mem_phys, mem_size, NULL); } if (ret) goto out; diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c index 8e0cb16..0ae5ace 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_device.c +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c @@ -168,7 +168,6 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev) if (gpu->funcs->debugfs_init) { gpu->funcs->debugfs_init(gpu, dev->primary); gpu->funcs->debugfs_init(gpu, dev->render); - gpu->funcs->debugfs_init(gpu, dev->control); } #endif diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index 6e5e1aa..b001699 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -351,6 +351,7 @@ static void mdp4_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); mdp4_crtc->event = crtc->state->event; + crtc->state->event = NULL; spin_unlock_irqrestore(&dev->event_lock, flags); blend_setup(crtc); diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 9893e43..76b9608 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -708,6 +708,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc, spin_lock_irqsave(&dev->event_lock, flags); mdp5_crtc->event = crtc->state->event; + crtc->state->event = NULL; spin_unlock_irqrestore(&dev->event_lock, flags); /* diff --git a/drivers/gpu/drm/msm/disp/mdp_format.c b/drivers/gpu/drm/msm/disp/mdp_format.c index b4a8aa4..005760b 100644 --- a/drivers/gpu/drm/msm/disp/mdp_format.c +++ b/drivers/gpu/drm/msm/disp/mdp_format.c @@ -171,7 +171,8 @@ uint32_t mdp_get_formats(uint32_t *pixel_formats, uint32_t max_formats, return i; } -const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format) +const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, + uint64_t modifier) { int i; for (i = 0; i < ARRAY_SIZE(formats); i++) { diff --git a/drivers/gpu/drm/msm/disp/mdp_kms.h b/drivers/gpu/drm/msm/disp/mdp_kms.h index 1185487..4fa8dbe 100644 --- a/drivers/gpu/drm/msm/disp/mdp_kms.h +++ b/drivers/gpu/drm/msm/disp/mdp_kms.h @@ -98,7 +98,7 @@ struct mdp_format { #define MDP_FORMAT_IS_YUV(mdp_format) ((mdp_format)->is_yuv) uint32_t mdp_get_formats(uint32_t *formats, uint32_t max_formats, bool rgb_only); -const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format); +const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format, uint64_t modifier); /* MDP capabilities */ #define MDP_CAP_SMP BIT(0) /* Shared Memory Pool */ diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index 7a03a94..8baba30 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -173,6 +173,7 @@ struct msm_dsi_host { bool registered; bool power_on; + bool enabled; int irq; }; @@ -775,7 +776,7 @@ static inline enum dsi_cmd_dst_format dsi_get_cmd_fmt( switch (mipi_fmt) { case MIPI_DSI_FMT_RGB888: return CMD_DST_FORMAT_RGB888; case MIPI_DSI_FMT_RGB666_PACKED: - case MIPI_DSI_FMT_RGB666: return VID_DST_FORMAT_RGB666; + case MIPI_DSI_FMT_RGB666: return CMD_DST_FORMAT_RGB666; case MIPI_DSI_FMT_RGB565: return CMD_DST_FORMAT_RGB565; default: return CMD_DST_FORMAT_RGB888; } @@ -986,13 +987,19 @@ static void dsi_set_tx_power_mode(int mode, struct msm_dsi_host *msm_host) static void dsi_wait4video_done(struct msm_dsi_host *msm_host) { + u32 ret = 0; + struct device *dev = &msm_host->pdev->dev; + dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 1); reinit_completion(&msm_host->video_comp); - wait_for_completion_timeout(&msm_host->video_comp, + ret = wait_for_completion_timeout(&msm_host->video_comp, msecs_to_jiffies(70)); + if (ret <= 0) + dev_err(dev, "wait for video done timed out\n"); + dsi_intr_ctrl(msm_host, DSI_IRQ_MASK_VIDEO_DONE, 0); } @@ -1001,7 +1008,7 @@ static void dsi_wait4video_eng_busy(struct msm_dsi_host *msm_host) if (!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO)) return; - if (msm_host->power_on) { + if (msm_host->power_on && msm_host->enabled) { dsi_wait4video_done(msm_host); /* delay 4 ms to skip BLLP */ usleep_range(2000, 4000); @@ -2203,7 +2210,7 @@ int msm_dsi_host_enable(struct mipi_dsi_host *host) * pm_runtime_put_autosuspend(&msm_host->pdev->dev); * } */ - + msm_host->enabled = true; return 0; } @@ -2211,6 +2218,7 @@ int msm_dsi_host_disable(struct mipi_dsi_host *host) { struct msm_dsi_host *msm_host = to_msm_dsi_host(host); + msm_host->enabled = false; dsi_op_mode_config(msm_host, !!(msm_host->mode_flags & MIPI_DSI_MODE_VIDEO), false); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c index 8e9d5c2..9a9fa0c 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c @@ -265,6 +265,115 @@ int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, return 0; } +int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req) +{ + const unsigned long bit_rate = clk_req->bitclk_rate; + const unsigned long esc_rate = clk_req->escclk_rate; + s32 ui, ui_x8, lpx; + s32 tmax, tmin; + s32 pcnt0 = 50; + s32 pcnt1 = 50; + s32 pcnt2 = 10; + s32 pcnt3 = 30; + s32 pcnt4 = 10; + s32 pcnt5 = 2; + s32 coeff = 1000; /* Precision, should avoid overflow */ + s32 hb_en, hb_en_ckln; + s32 temp; + + if (!bit_rate || !esc_rate) + return -EINVAL; + + timing->hs_halfbyte_en = 0; + hb_en = 0; + timing->hs_halfbyte_en_ckln = 0; + hb_en_ckln = 0; + + ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000); + ui_x8 = ui << 3; + lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000); + + temp = S_DIV_ROUND_UP(38 * coeff, ui_x8); + tmin = max_t(s32, temp, 0); + temp = (95 * coeff) / ui_x8; + tmax = max_t(s32, temp, 0); + timing->clk_prepare = linear_inter(tmax, tmin, pcnt0, 0, false); + + temp = 300 * coeff - (timing->clk_prepare << 3) * ui; + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = (tmin > 255) ? 511 : 255; + timing->clk_zero = linear_inter(tmax, tmin, pcnt5, 0, false); + + tmin = DIV_ROUND_UP(60 * coeff + 3 * ui, ui_x8); + temp = 105 * coeff + 12 * ui - 20 * coeff; + tmax = (temp + 3 * ui) / ui_x8; + timing->clk_trail = linear_inter(tmax, tmin, pcnt3, 0, false); + + temp = S_DIV_ROUND_UP(40 * coeff + 4 * ui, ui_x8); + tmin = max_t(s32, temp, 0); + temp = (85 * coeff + 6 * ui) / ui_x8; + tmax = max_t(s32, temp, 0); + timing->hs_prepare = linear_inter(tmax, tmin, pcnt1, 0, false); + + temp = 145 * coeff + 10 * ui - (timing->hs_prepare << 3) * ui; + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 255; + timing->hs_zero = linear_inter(tmax, tmin, pcnt4, 0, false); + + tmin = DIV_ROUND_UP(60 * coeff + 4 * ui, ui_x8) - 1; + temp = 105 * coeff + 12 * ui - 20 * coeff; + tmax = (temp / ui_x8) - 1; + timing->hs_trail = linear_inter(tmax, tmin, pcnt3, 0, false); + + temp = 50 * coeff + ((hb_en << 2) - 8) * ui; + timing->hs_rqst = S_DIV_ROUND_UP(temp, ui_x8); + + tmin = DIV_ROUND_UP(100 * coeff, ui_x8) - 1; + tmax = 255; + timing->hs_exit = linear_inter(tmax, tmin, pcnt2, 0, false); + + temp = 50 * coeff + ((hb_en_ckln << 2) - 8) * ui; + timing->hs_rqst_ckln = S_DIV_ROUND_UP(temp, ui_x8); + + temp = 60 * coeff + 52 * ui - 43 * ui; + tmin = DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 63; + timing->shared_timings.clk_post = + linear_inter(tmax, tmin, pcnt2, 0, false); + + temp = 8 * ui + (timing->clk_prepare << 3) * ui; + temp += (((timing->clk_zero + 3) << 3) + 11) * ui; + temp += hb_en_ckln ? (((timing->hs_rqst_ckln << 3) + 4) * ui) : + (((timing->hs_rqst_ckln << 3) + 8) * ui); + tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1; + tmax = 63; + if (tmin > tmax) { + temp = linear_inter(tmax << 1, tmin, pcnt2, 0, false); + timing->shared_timings.clk_pre = temp >> 1; + timing->shared_timings.clk_pre_inc_by_2 = 1; + } else { + timing->shared_timings.clk_pre = + linear_inter(tmax, tmin, pcnt2, 0, false); + timing->shared_timings.clk_pre_inc_by_2 = 0; + } + + timing->ta_go = 3; + timing->ta_sure = 0; + timing->ta_get = 4; + + DBG("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d", + timing->shared_timings.clk_pre, timing->shared_timings.clk_post, + timing->shared_timings.clk_pre_inc_by_2, timing->clk_zero, + timing->clk_trail, timing->clk_prepare, timing->hs_exit, + timing->hs_zero, timing->hs_prepare, timing->hs_trail, + timing->hs_rqst, timing->hs_rqst_ckln, timing->hs_halfbyte_en, + timing->hs_halfbyte_en_ckln, timing->hs_prep_dly, + timing->hs_prep_dly_ckln); + + return 0; +} + void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg, u32 bit_mask) { diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h index c56268c..a24ab80 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h @@ -101,6 +101,8 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing, struct msm_dsi_phy_clk_request *clk_req); +int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, + struct msm_dsi_phy_clk_request *clk_req); void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg, u32 bit_mask); int msm_dsi_phy_init_common(struct msm_dsi_phy *phy); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c index 0af951a..b3fffc8 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_10nm.c @@ -79,34 +79,6 @@ static void dsi_phy_hw_v3_0_lane_settings(struct msm_dsi_phy *phy) dsi_phy_write(lane_base + REG_DSI_10nm_PHY_LN_TX_DCTRL(3), 0x04); } -static int msm_dsi_dphy_timing_calc_v3(struct msm_dsi_dphy_timing *timing, - struct msm_dsi_phy_clk_request *clk_req) -{ - /* - * TODO: These params need to be computed, they're currently hardcoded - * for a 1440x2560@60Hz panel with a byteclk of 100.618 Mhz, and a - * default escape clock of 19.2 Mhz. - */ - - timing->hs_halfbyte_en = 0; - timing->clk_zero = 0x1c; - timing->clk_prepare = 0x07; - timing->clk_trail = 0x07; - timing->hs_exit = 0x23; - timing->hs_zero = 0x21; - timing->hs_prepare = 0x07; - timing->hs_trail = 0x07; - timing->hs_rqst = 0x05; - timing->ta_sure = 0x00; - timing->ta_go = 0x03; - timing->ta_get = 0x04; - - timing->shared_timings.clk_pre = 0x2d; - timing->shared_timings.clk_post = 0x0d; - - return 0; -} - static int dsi_10nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id, struct msm_dsi_phy_clk_request *clk_req) { diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index ba74cb4..1ff3fda 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -140,9 +140,6 @@ int msm_debugfs_late_init(struct drm_device *dev) if (ret) return ret; ret = late_init_minor(dev->render); - if (ret) - return ret; - ret = late_init_minor(dev->control); return ret; } diff --git a/drivers/gpu/drm/msm/msm_fb.c b/drivers/gpu/drm/msm/msm_fb.c index 0e0c872..7a16242 100644 --- a/drivers/gpu/drm/msm/msm_fb.c +++ b/drivers/gpu/drm/msm/msm_fb.c @@ -183,7 +183,8 @@ static struct drm_framebuffer *msm_framebuffer_init(struct drm_device *dev, hsub = drm_format_horz_chroma_subsampling(mode_cmd->pixel_format); vsub = drm_format_vert_chroma_subsampling(mode_cmd->pixel_format); - format = kms->funcs->get_format(kms, mode_cmd->pixel_format); + format = kms->funcs->get_format(kms, mode_cmd->pixel_format, + mode_cmd->modifier[0]); if (!format) { dev_err(dev->dev, "unsupported pixel format: %4.4s\n", (char *)&mode_cmd->pixel_format); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index c178563..456622b 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -92,8 +92,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, if (IS_ERR(fb)) { dev_err(dev->dev, "failed to allocate fb\n"); - ret = PTR_ERR(fb); - goto fail; + return PTR_ERR(fb); } bo = msm_framebuffer_bo(fb, 0); @@ -151,13 +150,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper, fail_unlock: mutex_unlock(&dev->struct_mutex); -fail: - - if (ret) { - if (fb) - drm_framebuffer_remove(fb); - } - + drm_framebuffer_remove(fb); return ret; } diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 9519647..f583bb4 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -132,17 +132,19 @@ static void put_pages(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj); if (msm_obj->pages) { - /* For non-cached buffers, ensure the new pages are clean - * because display controller, GPU, etc. are not coherent: - */ - if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) - dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, - msm_obj->sgt->nents, DMA_BIDIRECTIONAL); + if (msm_obj->sgt) { + /* For non-cached buffers, ensure the new + * pages are clean because display controller, + * GPU, etc. are not coherent: + */ + if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED)) + dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl, + msm_obj->sgt->nents, + DMA_BIDIRECTIONAL); - if (msm_obj->sgt) sg_free_table(msm_obj->sgt); - - kfree(msm_obj->sgt); + kfree(msm_obj->sgt); + } if (use_pages(obj)) drm_gem_put_pages(obj, msm_obj->pages, true, false); diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index 17d5824..aaa329d 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -48,8 +48,11 @@ struct msm_kms_funcs { /* functions to wait for atomic commit completed on each CRTC */ void (*wait_for_crtc_commit_done)(struct msm_kms *kms, struct drm_crtc *crtc); + /* get msm_format w/ optional format modifiers from drm_mode_fb_cmd2 */ + const struct msm_format *(*get_format)(struct msm_kms *kms, + const uint32_t format, + const uint64_t modifiers); /* misc: */ - const struct msm_format *(*get_format)(struct msm_kms *kms, uint32_t format); long (*round_pixclk)(struct msm_kms *kms, unsigned long rate, struct drm_encoder *encoder); int (*set_split_display)(struct msm_kms *kms, diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c index 5cae8db..ffe5137 100644 --- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c +++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c @@ -99,7 +99,8 @@ static const struct drm_mode_config_funcs mxsfb_mode_config_funcs = { }; static void mxsfb_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); @@ -125,12 +126,6 @@ static void mxsfb_pipe_update(struct drm_simple_display_pipe *pipe, mxsfb_plane_atomic_update(mxsfb, plane_state); } -static int mxsfb_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static int mxsfb_pipe_enable_vblank(struct drm_simple_display_pipe *pipe) { struct mxsfb_drm_private *mxsfb = drm_pipe_to_mxsfb_drm_private(pipe); @@ -159,7 +154,7 @@ static struct drm_simple_display_pipe_funcs mxsfb_funcs = { .enable = mxsfb_pipe_enable, .disable = mxsfb_pipe_disable, .update = mxsfb_pipe_update, - .prepare_fb = mxsfb_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, .enable_vblank = mxsfb_pipe_enable_vblank, .disable_vblank = mxsfb_pipe_disable_vblank, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 0097134..7d0bec8d 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -338,11 +338,9 @@ static struct nouveau_drm_prop_enum_list dither_depth[] = { if (c) { \ p = drm_property_create(dev, DRM_MODE_PROP_ENUM, n, c); \ l = (list); \ - c = 0; \ while (p && l->gen_mask) { \ if (l->gen_mask & (1 << (gen))) { \ - drm_property_add_enum(p, c, l->type, l->name); \ - c++; \ + drm_property_add_enum(p, l->type, l->name); \ } \ l++; \ } \ diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c index e4c8d31..81c3567 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c @@ -134,7 +134,7 @@ nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate, nvkm_volt_map(volt, volt->max2_id, clk->temp)); for (cstate = start; &cstate->head != &pstate->list; - cstate = list_entry(cstate->head.prev, typeof(*cstate), head)) { + cstate = list_prev_entry(cstate, head)) { if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp)) break; } diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c index fb1c27f..3d662e6 100644 --- a/drivers/gpu/drm/omapdrm/dss/dpi.c +++ b/drivers/gpu/drm/omapdrm/dss/dpi.c @@ -142,7 +142,7 @@ static enum dss_clk_source dpi_get_clk_src(struct dpi_data *dpi) } struct dpi_clk_calc_ctx { - struct dss_pll *pll; + struct dpi_data *dpi; unsigned int clkout_idx; /* inputs */ @@ -191,7 +191,7 @@ static bool dpi_calc_hsdiv_cb(int m_dispc, unsigned long dispc, ctx->pll_cinfo.mX[ctx->clkout_idx] = m_dispc; ctx->pll_cinfo.clkout[ctx->clkout_idx] = dispc; - return dispc_div_calc(ctx->pll->dss->dispc, dispc, + return dispc_div_calc(ctx->dpi->dss->dispc, dispc, ctx->pck_min, ctx->pck_max, dpi_calc_dispc_cb, ctx); } @@ -208,8 +208,8 @@ static bool dpi_calc_pll_cb(int n, int m, unsigned long fint, ctx->pll_cinfo.fint = fint; ctx->pll_cinfo.clkdco = clkdco; - return dss_pll_hsdiv_calc_a(ctx->pll, clkdco, - ctx->pck_min, dss_get_max_fck_rate(ctx->pll->dss), + return dss_pll_hsdiv_calc_a(ctx->dpi->pll, clkdco, + ctx->pck_min, dss_get_max_fck_rate(ctx->dpi->dss), dpi_calc_hsdiv_cb, ctx); } @@ -219,7 +219,7 @@ static bool dpi_calc_dss_cb(unsigned long fck, void *data) ctx->fck = fck; - return dispc_div_calc(ctx->pll->dss->dispc, fck, + return dispc_div_calc(ctx->dpi->dss->dispc, fck, ctx->pck_min, ctx->pck_max, dpi_calc_dispc_cb, ctx); } @@ -230,7 +230,7 @@ static bool dpi_pll_clk_calc(struct dpi_data *dpi, unsigned long pck, unsigned long clkin; memset(ctx, 0, sizeof(*ctx)); - ctx->pll = dpi->pll; + ctx->dpi = dpi; ctx->clkout_idx = dss_pll_get_clkout_idx_for_src(dpi->clk_src); clkin = clk_get_rate(dpi->pll->clkin); @@ -244,7 +244,7 @@ static bool dpi_pll_clk_calc(struct dpi_data *dpi, unsigned long pck, pll_min = 0; pll_max = 0; - return dss_pll_calc_a(ctx->pll, clkin, + return dss_pll_calc_a(ctx->dpi->pll, clkin, pll_min, pll_max, dpi_calc_pll_cb, ctx); } else { /* DSS_PLL_TYPE_B */ @@ -275,6 +275,7 @@ static bool dpi_dss_clk_calc(struct dpi_data *dpi, unsigned long pck, bool ok; memset(ctx, 0, sizeof(*ctx)); + ctx->dpi = dpi; if (pck > 1000 * i * i * i) ctx->pck_min = max(pck - 1000 * i * i * i, 0lu); else diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c index 3632854..ef3b0e3 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.c +++ b/drivers/gpu/drm/omapdrm/omap_drv.c @@ -319,6 +319,9 @@ static int omap_modeset_init(struct drm_device *dev) dev->mode_config.max_width = 8192; dev->mode_config.max_height = 8192; + /* We want the zpos to be normalized */ + dev->mode_config.normalize_zpos = true; + dev->mode_config.funcs = &omap_mode_config_funcs; dev->mode_config.helper_private = &omap_mode_config_helper_funcs; diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index 2899435..161233c 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -65,7 +65,7 @@ static void omap_plane_atomic_update(struct drm_plane *plane, info.rotation_type = OMAP_DSS_ROT_NONE; info.rotation = DRM_MODE_ROTATE_0; info.global_alpha = 0xff; - info.zorder = state->zpos; + info.zorder = state->normalized_zpos; /* update scanout: */ omap_framebuffer_update_scanout(state->fb, state, &info); diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c index d964d45..2c9c972 100644 --- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c +++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c @@ -238,12 +238,6 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts, static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) { -#if 0 - /* The firmware uses LP DSI transactions like this to bring up - * the hardware, which should be faster than using I2C to then - * pass to the Toshiba. However, I was unable to get it to - * work. - */ u8 msg[] = { reg, reg >> 8, @@ -253,13 +247,7 @@ static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val) val >> 24, }; - mipi_dsi_dcs_write_buffer(ts->dsi, msg, sizeof(msg)); -#else - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRH, reg >> 8); - rpi_touchscreen_i2c_write(ts, REG_WR_ADDRL, reg); - rpi_touchscreen_i2c_write(ts, REG_WRITEH, val >> 8); - rpi_touchscreen_i2c_write(ts, REG_WRITEL, val); -#endif + mipi_dsi_generic_write(ts->dsi, msg, sizeof(msg)); return 0; } diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile index 9c5e8db..19a8189 100644 --- a/drivers/gpu/drm/pl111/Makefile +++ b/drivers/gpu/drm/pl111/Makefile @@ -3,6 +3,7 @@ pl111_drm-y += pl111_display.o \ pl111_versatile.o \ pl111_drv.o +pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o obj-$(CONFIG_DRM_PL111) += pl111_drm.o diff --git a/drivers/gpu/drm/pl111/pl111_display.c b/drivers/gpu/drm/pl111/pl111_display.c index 3106464..19b0d00 100644 --- a/drivers/gpu/drm/pl111/pl111_display.c +++ b/drivers/gpu/drm/pl111/pl111_display.c @@ -120,7 +120,8 @@ static int pl111_display_check(struct drm_simple_display_pipe *pipe, } static void pl111_display_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *cstate) + struct drm_crtc_state *cstate, + struct drm_plane_state *plane_state) { struct drm_crtc *crtc = &pipe->crtc; struct drm_plane *plane = &pipe->plane; @@ -376,19 +377,13 @@ static void pl111_display_disable_vblank(struct drm_simple_display_pipe *pipe) writel(0, priv->regs + priv->ienb); } -static int pl111_display_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static struct drm_simple_display_pipe_funcs pl111_display_funcs = { .mode_valid = pl111_mode_valid, .check = pl111_display_check, .enable = pl111_display_enable, .disable = pl111_display_disable, .update = pl111_display_update, - .prepare_fb = pl111_display_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static int pl111_clk_div_choose_div(struct clk_hw *hw, unsigned long rate, diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h index 8639b2d..ce4501d 100644 --- a/drivers/gpu/drm/pl111/pl111_drm.h +++ b/drivers/gpu/drm/pl111/pl111_drm.h @@ -79,6 +79,7 @@ struct pl111_drm_dev_private { const struct pl111_variant_data *variant; void (*variant_display_enable) (struct drm_device *drm, u32 format); void (*variant_display_disable) (struct drm_device *drm); + bool use_device_memory; }; int pl111_display_init(struct drm_device *dev); diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c index 4621259..454ff08 100644 --- a/drivers/gpu/drm/pl111/pl111_drv.c +++ b/drivers/gpu/drm/pl111/pl111_drv.c @@ -60,6 +60,7 @@ #include <linux/slab.h> #include <linux/of.h> #include <linux/of_graph.h> +#include <linux/of_reserved_mem.h> #include <drm/drmP.h> #include <drm/drm_atomic_helper.h> @@ -207,6 +208,24 @@ finish: return ret; } +static struct drm_gem_object * +pl111_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct pl111_drm_dev_private *priv = dev->dev_private; + + /* + * When using device-specific reserved memory we can't import + * DMA buffers: those are passed by reference in any global + * memory and we can only handle a specific range of memory. + */ + if (priv->use_device_memory) + return ERR_PTR(-EINVAL); + + return drm_gem_cma_prime_import_sg_table(dev, attach, sgt); +} + DEFINE_DRM_GEM_CMA_FOPS(drm_fops); static struct drm_driver pl111_drm_driver = { @@ -227,7 +246,7 @@ static struct drm_driver pl111_drm_driver = { .prime_handle_to_fd = drm_gem_prime_handle_to_fd, .prime_fd_to_handle = drm_gem_prime_fd_to_handle, .gem_prime_import = drm_gem_prime_import, - .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table, + .gem_prime_import_sg_table = pl111_gem_import_sg_table, .gem_prime_export = drm_gem_prime_export, .gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table, @@ -257,6 +276,12 @@ static int pl111_amba_probe(struct amba_device *amba_dev, drm->dev_private = priv; priv->variant = variant; + ret = of_reserved_mem_device_init(dev); + if (!ret) { + dev_info(dev, "using device-specific reserved memory\n"); + priv->use_device_memory = true; + } + if (of_property_read_u32(dev->of_node, "max-memory-bandwidth", &priv->memory_bw)) { dev_info(dev, "no max memory bandwidth specified, assume unlimited\n"); @@ -275,7 +300,8 @@ static int pl111_amba_probe(struct amba_device *amba_dev, priv->regs = devm_ioremap_resource(dev, &amba_dev->res); if (IS_ERR(priv->regs)) { dev_err(dev, "%s failed mmio\n", __func__); - return PTR_ERR(priv->regs); + ret = PTR_ERR(priv->regs); + goto dev_unref; } /* This may override some variant settings */ @@ -305,11 +331,14 @@ static int pl111_amba_probe(struct amba_device *amba_dev, dev_unref: drm_dev_unref(drm); + of_reserved_mem_device_release(dev); + return ret; } static int pl111_amba_remove(struct amba_device *amba_dev) { + struct device *dev = &amba_dev->dev; struct drm_device *drm = amba_get_drvdata(amba_dev); struct pl111_drm_dev_private *priv = drm->dev_private; @@ -319,6 +348,7 @@ static int pl111_amba_remove(struct amba_device *amba_dev) drm_panel_bridge_remove(priv->bridge); drm_mode_config_cleanup(drm); drm_dev_unref(drm); + of_reserved_mem_device_release(dev); return 0; } diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c index 9302f51..b9baefd 100644 --- a/drivers/gpu/drm/pl111/pl111_versatile.c +++ b/drivers/gpu/drm/pl111/pl111_versatile.c @@ -1,12 +1,14 @@ #include <linux/amba/clcd-regs.h> #include <linux/device.h> #include <linux/of.h> +#include <linux/of_platform.h> #include <linux/regmap.h> #include <linux/mfd/syscon.h> #include <linux/bitops.h> #include <linux/module.h> #include <drm/drmP.h> #include "pl111_versatile.h" +#include "pl111_vexpress.h" #include "pl111_drm.h" static struct regmap *versatile_syscon_map; @@ -22,6 +24,7 @@ enum versatile_clcd { REALVIEW_CLCD_PB11MP, REALVIEW_CLCD_PBA8, REALVIEW_CLCD_PBX, + VEXPRESS_CLCD_V2M, }; static const struct of_device_id versatile_clcd_of_match[] = { @@ -53,6 +56,10 @@ static const struct of_device_id versatile_clcd_of_match[] = { .compatible = "arm,realview-pbx-syscon", .data = (void *)REALVIEW_CLCD_PBX, }, + { + .compatible = "arm,vexpress-muxfpga", + .data = (void *)VEXPRESS_CLCD_V2M, + }, {}, }; @@ -286,12 +293,26 @@ static const struct pl111_variant_data pl111_realview = { .fb_bpp = 16, }; +/* + * Versatile Express PL111 variant, again we just push the maximum + * BPP to 16 to be able to get 1024x768 without saturating the memory + * bus. The clockdivider also seems broken on the Versatile Express. + */ +static const struct pl111_variant_data pl111_vexpress = { + .name = "PL111 Versatile Express", + .formats = pl111_realview_pixel_formats, + .nformats = ARRAY_SIZE(pl111_realview_pixel_formats), + .fb_bpp = 16, + .broken_clockdivider = true, +}; + int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) { const struct of_device_id *clcd_id; enum versatile_clcd versatile_clcd_type; struct device_node *np; struct regmap *map; + int ret; np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match, &clcd_id); @@ -301,7 +322,33 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) } versatile_clcd_type = (enum versatile_clcd)clcd_id->data; - map = syscon_node_to_regmap(np); + /* Versatile Express special handling */ + if (versatile_clcd_type == VEXPRESS_CLCD_V2M) { + struct platform_device *pdev; + + /* Registers a driver for the muxfpga */ + ret = vexpress_muxfpga_init(); + if (ret) { + dev_err(dev, "unable to initialize muxfpga driver\n"); + return ret; + } + + /* Call into deep Vexpress configuration API */ + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "can't find the sysreg device, deferring\n"); + return -EPROBE_DEFER; + } + map = dev_get_drvdata(&pdev->dev); + if (!map) { + dev_err(dev, "sysreg has not yet probed\n"); + platform_device_put(pdev); + return -EPROBE_DEFER; + } + } else { + map = syscon_node_to_regmap(np); + } + if (IS_ERR(map)) { dev_err(dev, "no Versatile syscon regmap\n"); return PTR_ERR(map); @@ -340,6 +387,13 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv) priv->variant_display_disable = pl111_realview_clcd_disable; dev_info(dev, "set up callbacks for RealView PL111\n"); break; + case VEXPRESS_CLCD_V2M: + priv->variant = &pl111_vexpress; + dev_info(dev, "initializing Versatile Express PL111\n"); + ret = pl111_vexpress_clcd_init(dev, priv, map); + if (ret) + return ret; + break; default: dev_info(dev, "unknown Versatile system controller\n"); break; diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c new file mode 100644 index 0000000..a534b22 --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Versatile Express PL111 handling + * Copyright (C) 2018 Linus Walleij + * + * This module binds to the "arm,vexpress-muxfpga" device on the + * Versatile Express configuration bus and sets up which CLCD instance + * gets muxed out on the DVI bridge. + */ +#include <linux/device.h> +#include <linux/module.h> +#include <linux/regmap.h> +#include <linux/vexpress.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include "pl111_drm.h" +#include "pl111_vexpress.h" + +#define VEXPRESS_FPGAMUX_MOTHERBOARD 0x00 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1 0x01 +#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2 0x02 + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + struct device_node *root; + struct device_node *child; + struct device_node *ct_clcd = NULL; + bool has_coretile_clcd = false; + bool has_coretile_hdlcd = false; + bool mux_motherboard = true; + u32 val; + int ret; + + /* + * Check if we have a CLCD or HDLCD on the core tile by checking if a + * CLCD or HDLCD is available in the root of the device tree. + */ + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + for_each_available_child_of_node(root, child) { + if (of_device_is_compatible(child, "arm,pl111")) { + has_coretile_clcd = true; + ct_clcd = child; + break; + } + if (of_device_is_compatible(child, "arm,hdlcd")) { + has_coretile_hdlcd = true; + break; + } + } + + /* + * If there is a coretile HDLCD and it has a driver, + * do not mux the CLCD on the motherboard to the DVI. + */ + if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD)) + mux_motherboard = false; + + /* + * On the Vexpress CA9 we let the CLCD on the coretile + * take precedence, so also in this case do not mux the + * motherboard to the DVI. + */ + if (has_coretile_clcd) + mux_motherboard = false; + + if (mux_motherboard) { + dev_info(dev, "DVI muxed to motherboard CLCD\n"); + val = VEXPRESS_FPGAMUX_MOTHERBOARD; + } else if (ct_clcd == dev->of_node) { + dev_info(dev, + "DVI muxed to daughterboard 1 (core tile) CLCD\n"); + val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1; + } else { + dev_info(dev, "core tile graphics present\n"); + dev_info(dev, "this device will be deactivated\n"); + return -ENODEV; + } + + ret = regmap_write(map, 0, val); + if (ret) { + dev_err(dev, "error setting DVI muxmode\n"); + return -ENODEV; + } + + return 0; +} + +/* + * This sets up the regmap pointer that will then be retrieved by + * the detection code in pl111_versatile.c and passed in to the + * pl111_vexpress_clcd_init() function above. + */ +static int vexpress_muxfpga_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct regmap *map; + + map = devm_regmap_init_vexpress_config(&pdev->dev); + if (IS_ERR(map)) + return PTR_ERR(map); + dev_set_drvdata(dev, map); + + return 0; +} + +static const struct of_device_id vexpress_muxfpga_match[] = { + { .compatible = "arm,vexpress-muxfpga", } +}; + +static struct platform_driver vexpress_muxfpga_driver = { + .driver = { + .name = "vexpress-muxfpga", + .of_match_table = of_match_ptr(vexpress_muxfpga_match), + }, + .probe = vexpress_muxfpga_probe, +}; + +int vexpress_muxfpga_init(void) +{ + int ret; + + ret = platform_driver_register(&vexpress_muxfpga_driver); + /* -EBUSY just means this driver is already registered */ + if (ret == -EBUSY) + ret = 0; + return ret; +} diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h new file mode 100644 index 0000000..5d3681b --- /dev/null +++ b/drivers/gpu/drm/pl111/pl111_vexpress.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 + +struct device; +struct pl111_drm_dev_private; +struct regmap; + +#ifdef CONFIG_ARCH_VEXPRESS + +int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map); + +int vexpress_muxfpga_init(void); + +#else + +static inline int pl111_vexpress_clcd_init(struct device *dev, + struct pl111_drm_dev_private *priv, + struct regmap *map) +{ + return -ENODEV; +} + +static inline int vexpress_muxfpga_init(void) +{ + return 0; +} + +#endif diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c index c0fb52c..208af9f 100644 --- a/drivers/gpu/drm/qxl/qxl_cmd.c +++ b/drivers/gpu/drm/qxl/qxl_cmd.c @@ -179,10 +179,9 @@ qxl_push_command_ring_release(struct qxl_device *qdev, struct qxl_release *relea uint32_t type, bool interruptible) { struct qxl_command cmd; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset); return qxl_ring_push(qdev->command_ring, &cmd, interruptible); } @@ -192,10 +191,9 @@ qxl_push_cursor_ring_release(struct qxl_device *qdev, struct qxl_release *releas uint32_t type, bool interruptible) { struct qxl_command cmd; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); cmd.type = type; - cmd.data = qxl_bo_physical_address(qdev, to_qxl_bo(entry->tv.bo), release->release_offset); + cmd.data = qxl_bo_physical_address(qdev, release->release_bo, release->release_offset); return qxl_ring_push(qdev->cursor_ring, &cmd, interruptible); } @@ -341,12 +339,9 @@ int qxl_io_update_area(struct qxl_device *qdev, struct qxl_bo *surf, surface_height = surf->surf.height; if (area->left < 0 || area->top < 0 || - area->right > surface_width || area->bottom > surface_height) { - qxl_io_log(qdev, "%s: not doing area update for " - "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left, - area->top, area->right, area->bottom, surface_width, surface_height); + area->right > surface_width || area->bottom > surface_height) return -EINVAL; - } + mutex_lock(&qdev->update_area_mutex); qdev->ram_header->update_area = *area; qdev->ram_header->update_surface = surface_id; @@ -374,6 +369,7 @@ void qxl_io_flush_surfaces(struct qxl_device *qdev) void qxl_io_destroy_primary(struct qxl_device *qdev) { wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC); + qdev->primary_created = false; } void qxl_io_create_primary(struct qxl_device *qdev, @@ -399,6 +395,7 @@ void qxl_io_create_primary(struct qxl_device *qdev, create->type = QXL_SURF_TYPE_PRIMARY; wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC); + qdev->primary_created = true; } void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) @@ -407,20 +404,6 @@ void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id) wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC); } -void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args); - va_end(args); - /* - * DO not do a DRM output here - this will call printk, which will - * call back into qxl for rendering (qxl_fb) - */ - outb(0, qdev->io_base + QXL_IO_LOG); -} - void qxl_io_reset(struct qxl_device *qdev) { outb(0, qdev->io_base + QXL_IO_RESET); @@ -428,19 +411,6 @@ void qxl_io_reset(struct qxl_device *qdev) void qxl_io_monitors_config(struct qxl_device *qdev) { - qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__, - qdev->monitors_config ? - qdev->monitors_config->count : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].width : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].height : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].x : -1, - qdev->monitors_config && qdev->monitors_config->count ? - qdev->monitors_config->heads[0].y : -1 - ); - wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC); } diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index ecb35ed..b8cda94 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -48,12 +48,8 @@ static void qxl_alloc_client_monitors_config(struct qxl_device *qdev, unsigned c qdev->client_monitors_config = kzalloc( sizeof(struct qxl_monitors_config) + sizeof(struct qxl_head) * count, GFP_KERNEL); - if (!qdev->client_monitors_config) { - qxl_io_log(qdev, - "%s: allocation failure for %u heads\n", - __func__, count); + if (!qdev->client_monitors_config) return; - } } qdev->client_monitors_config->count = count; } @@ -74,12 +70,8 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev) num_monitors = qdev->rom->client_monitors_config.count; crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config, sizeof(qdev->rom->client_monitors_config)); - if (crc != qdev->rom->client_monitors_config_crc) { - qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc, - sizeof(qdev->rom->client_monitors_config), - qdev->rom->client_monitors_config_crc); + if (crc != qdev->rom->client_monitors_config_crc) return MONITORS_CONFIG_BAD_CRC; - } if (!num_monitors) { DRM_DEBUG_KMS("no client monitors configured\n"); return status; @@ -170,12 +162,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev) udelay(5); } if (status == MONITORS_CONFIG_BAD_CRC) { - qxl_io_log(qdev, "config: bad crc\n"); DRM_DEBUG_KMS("ignoring client monitors config: bad crc"); return; } if (status == MONITORS_CONFIG_UNCHANGED) { - qxl_io_log(qdev, "config: unchanged\n"); DRM_DEBUG_KMS("ignoring client monitors config: unchanged"); return; } @@ -268,6 +258,89 @@ static int qxl_add_common_modes(struct drm_connector *connector, return i - 1; } +static void qxl_send_monitors_config(struct qxl_device *qdev) +{ + int i; + + BUG_ON(!qdev->ram_header->monitors_config); + + if (qdev->monitors_config->count == 0) + return; + + for (i = 0 ; i < qdev->monitors_config->count ; ++i) { + struct qxl_head *head = &qdev->monitors_config->heads[i]; + + if (head->y > 8192 || head->x > 8192 || + head->width > 8192 || head->height > 8192) { + DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", + i, head->width, head->height, + head->x, head->y); + return; + } + } + qxl_io_monitors_config(qdev); +} + +static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc, + const char *reason) +{ + struct drm_device *dev = crtc->dev; + struct qxl_device *qdev = dev->dev_private; + struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); + struct qxl_head head; + int oldcount, i = qcrtc->index; + + if (!qdev->primary_created) { + DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason); + return; + } + + if (!qdev->monitors_config || + qdev->monitors_config->max_allowed <= i) + return; + + head.id = i; + head.flags = 0; + oldcount = qdev->monitors_config->count; + if (crtc->state->active) { + struct drm_display_mode *mode = &crtc->mode; + head.width = mode->hdisplay; + head.height = mode->vdisplay; + head.x = crtc->x; + head.y = crtc->y; + if (qdev->monitors_config->count < i + 1) + qdev->monitors_config->count = i + 1; + } else if (i > 0) { + head.width = 0; + head.height = 0; + head.x = 0; + head.y = 0; + if (qdev->monitors_config->count == i + 1) + qdev->monitors_config->count = i; + } else { + DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason); + return; + } + + if (head.width == qdev->monitors_config->heads[i].width && + head.height == qdev->monitors_config->heads[i].height && + head.x == qdev->monitors_config->heads[i].x && + head.y == qdev->monitors_config->heads[i].y && + oldcount == qdev->monitors_config->count) + return; + + DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n", + i, head.width, head.height, head.x, head.y, + crtc->state->active ? "on" : "off", reason); + if (oldcount != qdev->monitors_config->count) + DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n", + oldcount, qdev->monitors_config->count, + qdev->monitors_config->max_allowed); + + qdev->monitors_config->heads[i] = head; + qxl_send_monitors_config(qdev); +} + static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_crtc_state *old_crtc_state) { @@ -283,6 +356,8 @@ static void qxl_crtc_atomic_flush(struct drm_crtc *crtc, drm_crtc_send_vblank_event(crtc, event); spin_unlock_irqrestore(&dev->event_lock, flags); } + + qxl_crtc_update_monitors_config(crtc, "flush"); } static void qxl_crtc_destroy(struct drm_crtc *crtc) @@ -381,95 +456,19 @@ qxl_framebuffer_init(struct drm_device *dev, return 0; } -static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc, - const struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - struct drm_device *dev = crtc->dev; - struct qxl_device *qdev = dev->dev_private; - - qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n", - __func__, - mode->hdisplay, mode->vdisplay, - adjusted_mode->hdisplay, - adjusted_mode->vdisplay); - return true; -} - -static void -qxl_send_monitors_config(struct qxl_device *qdev) -{ - int i; - - BUG_ON(!qdev->ram_header->monitors_config); - - if (qdev->monitors_config->count == 0) { - qxl_io_log(qdev, "%s: 0 monitors??\n", __func__); - return; - } - for (i = 0 ; i < qdev->monitors_config->count ; ++i) { - struct qxl_head *head = &qdev->monitors_config->heads[i]; - - if (head->y > 8192 || head->x > 8192 || - head->width > 8192 || head->height > 8192) { - DRM_ERROR("head %d wrong: %dx%d+%d+%d\n", - i, head->width, head->height, - head->x, head->y); - return; - } - } - qxl_io_monitors_config(qdev); -} - -static void qxl_monitors_config_set(struct qxl_device *qdev, - int index, - unsigned x, unsigned y, - unsigned width, unsigned height, - unsigned surf_id) -{ - DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y); - qdev->monitors_config->heads[index].x = x; - qdev->monitors_config->heads[index].y = y; - qdev->monitors_config->heads[index].width = width; - qdev->monitors_config->heads[index].height = height; - qdev->monitors_config->heads[index].surface_id = surf_id; - -} - -static void qxl_mode_set_nofb(struct drm_crtc *crtc) -{ - struct qxl_device *qdev = crtc->dev->dev_private; - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct drm_display_mode *mode = &crtc->mode; - - DRM_DEBUG("Mode set (%d,%d)\n", - mode->hdisplay, mode->vdisplay); - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, - mode->hdisplay, mode->vdisplay, 0); - -} - static void qxl_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - DRM_DEBUG("\n"); + qxl_crtc_update_monitors_config(crtc, "enable"); } static void qxl_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_crtc_state *old_state) { - struct qxl_crtc *qcrtc = to_qxl_crtc(crtc); - struct qxl_device *qdev = crtc->dev->dev_private; - - qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0); - - qxl_send_monitors_config(qdev); + qxl_crtc_update_monitors_config(crtc, "disable"); } static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = { - .mode_fixup = qxl_crtc_mode_fixup, - .mode_set_nofb = qxl_mode_set_nofb, .atomic_flush = qxl_crtc_atomic_flush, .atomic_enable = qxl_crtc_atomic_enable, .atomic_disable = qxl_crtc_atomic_disable, @@ -613,12 +612,6 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane, } } -static int qxl_plane_atomic_check(struct drm_plane *plane, - struct drm_plane_state *state) -{ - return 0; -} - static void qxl_cursor_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) { @@ -824,7 +817,6 @@ static const uint32_t qxl_cursor_plane_formats[] = { }; static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = { - .atomic_check = qxl_plane_atomic_check, .atomic_update = qxl_cursor_atomic_update, .atomic_disable = qxl_cursor_atomic_disable, .prepare_fb = qxl_plane_prepare_fb, @@ -949,81 +941,6 @@ free_mem: return r; } -static void qxl_enc_dpms(struct drm_encoder *encoder, int mode) -{ - DRM_DEBUG("\n"); -} - -static void qxl_enc_prepare(struct drm_encoder *encoder) -{ - DRM_DEBUG("\n"); -} - -static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev, - struct drm_encoder *encoder) -{ - int i; - struct qxl_output *output = drm_encoder_to_qxl_output(encoder); - struct qxl_head *head; - struct drm_display_mode *mode; - - BUG_ON(!encoder); - /* TODO: ugly, do better */ - i = output->index; - if (!qdev->monitors_config || - qdev->monitors_config->max_allowed <= i) { - DRM_ERROR( - "head number too large or missing monitors config: %p, %d", - qdev->monitors_config, - qdev->monitors_config ? - qdev->monitors_config->max_allowed : -1); - return; - } - if (!encoder->crtc) { - DRM_ERROR("missing crtc on encoder %p\n", encoder); - return; - } - if (i != 0) - DRM_DEBUG("missing for multiple monitors: no head holes\n"); - head = &qdev->monitors_config->heads[i]; - head->id = i; - if (encoder->crtc->enabled) { - mode = &encoder->crtc->mode; - head->width = mode->hdisplay; - head->height = mode->vdisplay; - head->x = encoder->crtc->x; - head->y = encoder->crtc->y; - if (qdev->monitors_config->count < i + 1) - qdev->monitors_config->count = i + 1; - } else { - head->width = 0; - head->height = 0; - head->x = 0; - head->y = 0; - } - DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n", - i, head->x, head->y, head->width, head->height, qdev->monitors_config->count); - head->flags = 0; - /* TODO - somewhere else to call this for multiple monitors - * (config_commit?) */ - qxl_send_monitors_config(qdev); -} - -static void qxl_enc_commit(struct drm_encoder *encoder) -{ - struct qxl_device *qdev = encoder->dev->dev_private; - - qxl_write_monitors_config_for_encoder(qdev, encoder); - DRM_DEBUG("\n"); -} - -static void qxl_enc_mode_set(struct drm_encoder *encoder, - struct drm_display_mode *mode, - struct drm_display_mode *adjusted_mode) -{ - DRM_DEBUG("\n"); -} - static int qxl_conn_get_modes(struct drm_connector *connector) { unsigned pwidth = 1024; @@ -1037,7 +954,7 @@ static int qxl_conn_get_modes(struct drm_connector *connector) return ret; } -static int qxl_conn_mode_valid(struct drm_connector *connector, +static enum drm_mode_status qxl_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct drm_device *ddev = connector->dev; @@ -1069,10 +986,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector) static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = { - .dpms = qxl_enc_dpms, - .prepare = qxl_enc_prepare, - .mode_set = qxl_enc_mode_set, - .commit = qxl_enc_commit, }; static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = { @@ -1100,21 +1013,11 @@ static enum drm_connector_status qxl_conn_detect( qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]); DRM_DEBUG("#%d connected: %d\n", output->index, connected); - if (!connected) - qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0); return connected ? connector_status_connected : connector_status_disconnected; } -static int qxl_conn_set_property(struct drm_connector *connector, - struct drm_property *property, - uint64_t value) -{ - DRM_DEBUG("\n"); - return 0; -} - static void qxl_conn_destroy(struct drm_connector *connector) { struct qxl_output *qxl_output = @@ -1129,7 +1032,6 @@ static const struct drm_connector_funcs qxl_connector_funcs = { .dpms = drm_helper_connector_dpms, .detect = qxl_conn_detect, .fill_modes = drm_helper_probe_single_connector_modes, - .set_property = qxl_conn_set_property, .destroy = qxl_conn_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h index 00a1a66..01220d3 100644 --- a/drivers/gpu/drm/qxl/qxl_drv.h +++ b/drivers/gpu/drm/qxl/qxl_drv.h @@ -167,6 +167,7 @@ struct qxl_release { int id; int type; + struct qxl_bo *release_bo; uint32_t release_offset; uint32_t surface_release_id; struct ww_acquire_ctx ticket; @@ -298,9 +299,6 @@ struct qxl_device { int monitors_config_height; }; -/* forward declaration for QXL_INFO_IO */ -__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...); - extern const struct drm_ioctl_desc qxl_ioctls[]; extern int qxl_max_ioctl; diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index 3388914..9a67526 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -185,8 +185,6 @@ static int qxlfb_framebuffer_dirty(struct drm_framebuffer *fb, /* * we are using a shadow draw buffer, at qdev->surface0_shadow */ - qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2, - clips->y1, clips->y2); image->dx = clips->x1; image->dy = clips->y1; image->width = clips->x2 - clips->x1; diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c index e238a1a..6cc9f33 100644 --- a/drivers/gpu/drm/qxl/qxl_ioctl.c +++ b/drivers/gpu/drm/qxl/qxl_ioctl.c @@ -182,9 +182,9 @@ static int qxl_process_single_command(struct qxl_device *qdev, goto out_free_reloc; /* TODO copy slow path code from i915 */ - fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_SIZE)); + fb_cmd = qxl_bo_kmap_atomic_page(qdev, cmd_bo, (release->release_offset & PAGE_MASK)); unwritten = __copy_from_user_inatomic_nocache - (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_SIZE), + (fb_cmd + sizeof(union qxl_release_info) + (release->release_offset & ~PAGE_MASK), u64_to_user_ptr(cmd->command), cmd->command_size); { diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c index 23a4010..3bb31ad 100644 --- a/drivers/gpu/drm/qxl/qxl_irq.c +++ b/drivers/gpu/drm/qxl/qxl_irq.c @@ -57,10 +57,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg) * to avoid endless loops). */ qdev->irq_received_error++; - qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__); + DRM_WARN("driver is in bug mode\n"); } if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) { - qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n"); schedule_work(&qdev->client_monitors_config_work); } qdev->ram_header->int_mask = QXL_INTERRUPT_MASK; diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c index 5d84a66..7cb2145 100644 --- a/drivers/gpu/drm/qxl/qxl_release.c +++ b/drivers/gpu/drm/qxl/qxl_release.c @@ -173,6 +173,7 @@ qxl_release_free_list(struct qxl_release *release) list_del(&entry->tv.head); kfree(entry); } + release->release_bo = NULL; } void @@ -296,7 +297,6 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, { if (surface_cmd_type == QXL_SURFACE_CMD_DESTROY && create_rel) { int idr_ret; - struct qxl_bo_list *entry = list_first_entry(&create_rel->bos, struct qxl_bo_list, tv.head); struct qxl_bo *bo; union qxl_release_info *info; @@ -304,8 +304,9 @@ int qxl_alloc_surface_release_reserved(struct qxl_device *qdev, idr_ret = qxl_release_alloc(qdev, QXL_RELEASE_SURFACE_CMD, release); if (idr_ret < 0) return idr_ret; - bo = to_qxl_bo(entry->tv.bo); + bo = create_rel->release_bo; + (*release)->release_bo = bo; (*release)->release_offset = create_rel->release_offset + 64; qxl_release_list_add(*release, bo); @@ -365,6 +366,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size, bo = qxl_bo_ref(qdev->current_release_bo[cur_idx]); + (*release)->release_bo = bo; (*release)->release_offset = qdev->current_release_bo_offset[cur_idx] * release_size_per_bo[cur_idx]; qdev->current_release_bo_offset[cur_idx]++; @@ -408,13 +410,12 @@ union qxl_release_info *qxl_release_map(struct qxl_device *qdev, { void *ptr; union qxl_release_info *info; - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); - struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + struct qxl_bo *bo = release->release_bo; - ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_SIZE); + ptr = qxl_bo_kmap_atomic_page(qdev, bo, release->release_offset & PAGE_MASK); if (!ptr) return NULL; - info = ptr + (release->release_offset & ~PAGE_SIZE); + info = ptr + (release->release_offset & ~PAGE_MASK); return info; } @@ -422,11 +423,10 @@ void qxl_release_unmap(struct qxl_device *qdev, struct qxl_release *release, union qxl_release_info *info) { - struct qxl_bo_list *entry = list_first_entry(&release->bos, struct qxl_bo_list, tv.head); - struct qxl_bo *bo = to_qxl_bo(entry->tv.bo); + struct qxl_bo *bo = release->release_bo; void *ptr; - ptr = ((void *)info) - (release->release_offset & ~PAGE_SIZE); + ptr = ((void *)info) - (release->release_offset & ~PAGE_MASK); qxl_bo_kunmap_atomic_page(qdev, bo, ptr); } diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c index ee2340e3..86a1fb3 100644 --- a/drivers/gpu/drm/qxl/qxl_ttm.c +++ b/drivers/gpu/drm/qxl/qxl_ttm.c @@ -105,16 +105,16 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev) static struct vm_operations_struct qxl_ttm_vm_ops; static const struct vm_operations_struct *ttm_vm_ops; -static int qxl_ttm_fault(struct vm_fault *vmf) +static vm_fault_t qxl_ttm_fault(struct vm_fault *vmf) { struct ttm_buffer_object *bo; - int r; + vm_fault_t ret; bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data; if (bo == NULL) return VM_FAULT_NOPAGE; - r = ttm_vm_ops->fault(vmf); - return r; + ret = ttm_vm_ops->fault(vmf); + return ret; } int qxl_mmap(struct file *filp, struct vm_area_struct *vma) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index c442053..f2a0bd1 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -767,7 +767,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg) * Initialization */ -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex) { static const unsigned int mmio_offsets[] = { DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET @@ -775,7 +776,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) struct rcar_du_device *rcdu = rgrp->dev; struct platform_device *pdev = to_platform_device(rcdu->dev); - struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index]; + struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex]; struct drm_crtc *crtc = &rcrtc->crtc; struct drm_plane *primary; unsigned int irqflags; @@ -787,7 +788,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Get the CRTC clock and the optional external clock. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - sprintf(clk_name, "du.%u", index); + sprintf(clk_name, "du.%u", hwindex); name = clk_name; } else { name = NULL; @@ -795,16 +796,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) rcrtc->clock = devm_clk_get(rcdu->dev, name); if (IS_ERR(rcrtc->clock)) { - dev_err(rcdu->dev, "no clock for CRTC %u\n", index); + dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex); return PTR_ERR(rcrtc->clock); } - sprintf(clk_name, "dclkin.%u", index); + sprintf(clk_name, "dclkin.%u", hwindex); clk = devm_clk_get(rcdu->dev, clk_name); if (!IS_ERR(clk)) { rcrtc->extclock = clk; } else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) { - dev_info(rcdu->dev, "can't get external clock %u\n", index); + dev_info(rcdu->dev, "can't get external clock %u\n", hwindex); return -EPROBE_DEFER; } @@ -813,13 +814,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) spin_lock_init(&rcrtc->vblank_lock); rcrtc->group = rgrp; - rcrtc->mmio_offset = mmio_offsets[index]; - rcrtc->index = index; + rcrtc->mmio_offset = mmio_offsets[hwindex]; + rcrtc->index = hwindex; if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE)) primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane; else - primary = &rgrp->planes[index % 2].plane; + primary = &rgrp->planes[swindex % 2].plane; ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary, NULL, &crtc_funcs, NULL); @@ -833,7 +834,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) /* Register the interrupt handler. */ if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) { - irq = platform_get_irq(pdev, index); + /* The IRQ's are associated with the CRTC (sw)index. */ + irq = platform_get_irq(pdev, swindex); irqflags = 0; } else { irq = platform_get_irq(pdev, 0); @@ -841,7 +843,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) } if (irq < 0) { - dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index); + dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex); return irq; } @@ -849,7 +851,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index) dev_name(rcdu->dev), rcrtc); if (ret < 0) { dev_err(rcdu->dev, - "failed to register IRQ for CRTC %u\n", index); + "failed to register IRQ for CRTC %u\n", swindex); return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h index fdc2bf9..84b5e23 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h @@ -80,7 +80,8 @@ enum rcar_du_output { RCAR_DU_OUTPUT_MAX, }; -int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index); +int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex, + unsigned int hwindex); void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc); void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c index 3917d83..02aee6c 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c @@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7743 has one RGB output and one LVDS output @@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7745 has two RGB outputs @@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = { static const struct rcar_du_device_info rcar_du_r8a7779_info = { .gen = 2, .features = 0, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7779 has two RGB outputs and one (currently unsupported) @@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, .quirks = RCAR_DU_QUIRK_ALIGN_128B, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7790 has one RGB output, two LVDS outputs and one @@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A779[13] has one RGB output, one LVDS output and one @@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* R8A7792 has two RGB outputs. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = { .gen = 2, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS, - .num_crtcs = 2, + .channels_mask = BIT(1) | BIT(0), .routes = { /* * R8A7794 has two RGB outputs and one (currently unsupported) @@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 4, + .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7795 has one RGB output, two HDMI outputs and one @@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = { }, }, .num_lvds = 1, - .dpll_ch = BIT(1) | BIT(2), + .dpll_ch = BIT(2) | BIT(1), }; static const struct rcar_du_device_info rcar_du_r8a7796_info = { @@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 3, + .channels_mask = BIT(2) | BIT(1) | BIT(0), .routes = { /* * R8A7796 has one RGB output, one LVDS output and one HDMI @@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = { .dpll_ch = BIT(1), }; +static const struct rcar_du_device_info rcar_du_r8a77965_info = { + .gen = 3, + .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK + | RCAR_DU_FEATURE_EXT_CTRL_REGS + | RCAR_DU_FEATURE_VSP1_SOURCE, + .channels_mask = BIT(3) | BIT(1) | BIT(0), + .routes = { + /* + * R8A77965 has one RGB output, one LVDS output and one HDMI + * output. + */ + [RCAR_DU_OUTPUT_DPAD0] = { + .possible_crtcs = BIT(2), + .port = 0, + }, + [RCAR_DU_OUTPUT_HDMI0] = { + .possible_crtcs = BIT(1), + .port = 1, + }, + [RCAR_DU_OUTPUT_LVDS0] = { + .possible_crtcs = BIT(0), + .port = 2, + }, + }, + .num_lvds = 1, + .dpll_ch = BIT(1), +}; + static const struct rcar_du_device_info rcar_du_r8a77970_info = { .gen = 3, .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK | RCAR_DU_FEATURE_EXT_CTRL_REGS | RCAR_DU_FEATURE_VSP1_SOURCE, - .num_crtcs = 1, + .channels_mask = BIT(0), .routes = { /* R8A77970 has one RGB output and one LVDS output. */ [RCAR_DU_OUTPUT_DPAD0] = { @@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = { { .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info }, { .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info }, { .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info }, + { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info }, { .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info }, { } }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 5c7ec15..b3a25e8 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -52,7 +52,7 @@ struct rcar_du_output_routing { * @gen: device generation (2 or 3) * @features: device features (RCAR_DU_FEATURE_*) * @quirks: device quirks (RCAR_DU_QUIRK_*) - * @num_crtcs: total number of CRTCs + * @channels_mask: bit mask of available DU channels * @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*) * @num_lvds: number of internal LVDS encoders */ @@ -60,7 +60,7 @@ struct rcar_du_device_info { unsigned int gen; unsigned int features; unsigned int quirks; - unsigned int num_crtcs; + unsigned int channels_mask; struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX]; unsigned int num_lvds; unsigned int dpll_ch; @@ -87,7 +87,6 @@ struct rcar_du_device { struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS]; struct { - struct drm_property *alpha; struct drm_property *colorkey; } props; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c index 2f37ea9..d539cb2 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c @@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data) static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp) { - u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP; + u32 defr6 = DEFR6_CODE; - if (rgrp->num_crtcs > 1) - defr6 |= DEFR6_ODPM22_DISP; + if (rgrp->channels_mask & BIT(0)) + defr6 |= DEFR6_ODPM02_DISP; + + if (rgrp->channels_mask & BIT(1)) + defr6 |= DEFR6_ODPM12_DISP; rcar_du_group_write(rgrp, DEFR6, defr6); } @@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp) * On Gen3 VSPD routing can't be configured, but DPAD routing * needs to be set despite having a single option available. */ - u32 crtc = ffs(possible_crtcs) - 1; + unsigned int rgb_crtc = ffs(possible_crtcs) - 1; + struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc]; - if (crtc / 2 == rgrp->index) - defr8 |= DEFR8_DRGBS_DU(crtc); + if (crtc->index / 2 == rgrp->index) + defr8 |= DEFR8_DRGBS_DU(crtc->index); } rcar_du_group_write(rgrp, DEFR8, defr8); diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h index 5e3adc6..42105ae 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_group.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h @@ -25,6 +25,7 @@ struct rcar_du_device; * @dev: the DU device * @mmio_offset: registers offset in the device memory map * @index: group index + * @channels_mask: bitmask of populated DU channels in this group * @num_crtcs: number of CRTCs in this group (1 or 2) * @use_count: number of users of the group (rcar_du_group_(get|put)) * @used_crtcs: number of CRTCs currently in use @@ -39,6 +40,7 @@ struct rcar_du_group { unsigned int mmio_offset; unsigned int index; + unsigned int channels_mask; unsigned int num_crtcs; unsigned int use_count; unsigned int used_crtcs; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 0329b35..f0bc7cc 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -233,15 +233,7 @@ static int rcar_du_atomic_check(struct drm_device *dev, struct rcar_du_device *rcdu = dev->dev_private; int ret; - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); + ret = drm_atomic_helper_check(dev, state); if (ret) return ret; @@ -415,11 +407,6 @@ static int rcar_du_encoders_init(struct rcar_du_device *rcdu) static int rcar_du_properties_init(struct rcar_du_device *rcdu) { - rcdu->props.alpha = - drm_property_create_range(rcdu->ddev, 0, "alpha", 0, 255); - if (rcdu->props.alpha == NULL) - return -ENOMEM; - /* * The color key is expressed as an RGB888 triplet stored in a 32-bit * integer in XRGB8888 format. Bit 24 is used as a flag to disable (0) @@ -441,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu) struct { struct device_node *np; unsigned int crtcs_mask; - } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, }; + } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, }; unsigned int vsps_count = 0; unsigned int cells; unsigned int i; @@ -520,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) struct drm_fbdev_cma *fbdev; unsigned int num_encoders; unsigned int num_groups; + unsigned int swindex; + unsigned int hwindex; unsigned int i; int ret; @@ -529,10 +518,11 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) dev->mode_config.min_height = 0; dev->mode_config.max_width = 4095; dev->mode_config.max_height = 2047; + dev->mode_config.normalize_zpos = true; dev->mode_config.funcs = &rcar_du_mode_config_funcs; dev->mode_config.helper_private = &rcar_du_mode_config_helper; - rcdu->num_crtcs = rcdu->info->num_crtcs; + rcdu->num_crtcs = hweight8(rcdu->info->channels_mask); ret = rcar_du_properties_init(rcdu); if (ret < 0) @@ -542,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) * Initialize vertical blanking interrupts handling. Start with vblank * disabled for all CRTCs. */ - ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1); + ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1); if (ret < 0) return ret; @@ -557,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) rgrp->dev = rcdu; rgrp->mmio_offset = mmio_offsets[i]; rgrp->index = i; - rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U); + /* Extract the channel mask for this group only. */ + rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i)) + & GENMASK(1, 0); + rgrp->num_crtcs = hweight8(rgrp->channels_mask); /* * If we have more than one CRTCs in this group pre-associate @@ -584,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu) } /* Create the CRTCs. */ - for (i = 0; i < rcdu->num_crtcs; ++i) { - struct rcar_du_group *rgrp = &rcdu->groups[i / 2]; + for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) { + struct rcar_du_group *rgrp; + + /* Skip unpopulated DU channels. */ + if (!(rcdu->info->channels_mask & BIT(hwindex))) + continue; + + rgrp = &rcdu->groups[hwindex / 2]; - ret = rcar_du_crtc_create(rgrp, i); + ret = rcar_du_crtc_create(rgrp, swindex++, hwindex); if (ret < 0) return ret; } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c index 68a0b82..afef696 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_of.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c @@ -18,6 +18,7 @@ #include "rcar_du_crtc.h" #include "rcar_du_drv.h" +#include "rcar_du_of.h" /* ----------------------------------------------------------------------------- * Generic Overlay Handling diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.c b/drivers/gpu/drm/rcar-du/rcar_du_plane.c index 68556bd..c20f7ed 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.c @@ -423,7 +423,7 @@ static void rcar_du_plane_setup_mode(struct rcar_du_group *rgrp, rcar_du_plane_write(rgrp, index, PnALPHAR, PnALPHAR_ABIT_0); else rcar_du_plane_write(rgrp, index, PnALPHAR, - PnALPHAR_ABIT_X | state->alpha); + PnALPHAR_ABIT_X | state->state.alpha >> 8); pnmr = PnMR_BM_MD | state->format->pnmr; @@ -692,11 +692,11 @@ static void rcar_du_plane_reset(struct drm_plane *plane) state->hwindex = -1; state->source = RCAR_DU_PLANE_MEMORY; - state->alpha = 255; state->colorkey = RCAR_DU_COLORKEY_NONE; state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane->state->plane = plane; } @@ -708,9 +708,7 @@ static int rcar_du_plane_atomic_set_property(struct drm_plane *plane, struct rcar_du_plane_state *rstate = to_rcar_plane_state(state); struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rcdu->props.alpha) - rstate->alpha = val; - else if (property == rcdu->props.colorkey) + if (property == rcdu->props.colorkey) rstate->colorkey = val; else return -EINVAL; @@ -726,9 +724,7 @@ static int rcar_du_plane_atomic_get_property(struct drm_plane *plane, container_of(state, const struct rcar_du_plane_state, state); struct rcar_du_device *rcdu = to_rcar_plane(plane)->group->dev; - if (property == rcdu->props.alpha) - *val = rstate->alpha; - else if (property == rcdu->props.colorkey) + if (property == rcdu->props.colorkey) *val = rstate->colorkey; else return -EINVAL; @@ -797,10 +793,9 @@ int rcar_du_planes_init(struct rcar_du_group *rgrp) continue; drm_object_attach_property(&plane->plane.base, - rcdu->props.alpha, 255); - drm_object_attach_property(&plane->plane.base, rcdu->props.colorkey, RCAR_DU_COLORKEY_NONE); + drm_plane_create_alpha_property(&plane->plane); drm_plane_create_zpos_property(&plane->plane, 1, 1, 7); } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_plane.h b/drivers/gpu/drm/rcar-du/rcar_du_plane.h index 890321b..5c19c69 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_plane.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_plane.h @@ -50,7 +50,6 @@ static inline struct rcar_du_plane *to_rcar_plane(struct drm_plane *plane) * @state: base DRM plane state * @format: information about the pixel format used by the plane * @hwindex: 0-based hardware plane index, -1 means unused - * @alpha: value of the plane alpha property * @colorkey: value of the plane colorkey property */ struct rcar_du_plane_state { @@ -60,7 +59,6 @@ struct rcar_du_plane_state { int hwindex; enum rcar_du_plane_source source; - unsigned int alpha; unsigned int colorkey; }; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h index d5bae99..9dfd220 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h @@ -187,14 +187,14 @@ #define DEFR6 0x000e8 #define DEFR6_CODE (0x7778 << 16) -#define DEFR6_ODPM22_DSMR (0 << 10) -#define DEFR6_ODPM22_DISP (2 << 10) -#define DEFR6_ODPM22_CDE (3 << 10) -#define DEFR6_ODPM22_MASK (3 << 10) -#define DEFR6_ODPM12_DSMR (0 << 8) -#define DEFR6_ODPM12_DISP (2 << 8) -#define DEFR6_ODPM12_CDE (3 << 8) -#define DEFR6_ODPM12_MASK (3 << 8) +#define DEFR6_ODPM12_DSMR (0 << 10) +#define DEFR6_ODPM12_DISP (2 << 10) +#define DEFR6_ODPM12_CDE (3 << 10) +#define DEFR6_ODPM12_MASK (3 << 10) +#define DEFR6_ODPM02_DSMR (0 << 8) +#define DEFR6_ODPM02_DISP (2 << 8) +#define DEFR6_ODPM02_CDE (3 << 8) +#define DEFR6_ODPM02_MASK (3 << 8) #define DEFR6_TCNE1 (1 << 6) #define DEFR6_TCNE0 (1 << 4) #define DEFR6_MLOS1 (1 << 2) diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c index 2c260c3..27a4408 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c @@ -17,6 +17,7 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_plane_helper.h> #include <linux/bitops.h> @@ -54,6 +55,7 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) }; struct rcar_du_plane_state state = { .state = { + .alpha = DRM_BLEND_ALPHA_OPAQUE, .crtc = &crtc->crtc, .dst.x1 = 0, .dst.y1 = 0, @@ -67,7 +69,6 @@ void rcar_du_vsp_enable(struct rcar_du_crtc *crtc) }, .format = rcar_du_format_info(DRM_FORMAT_ARGB8888), .source = RCAR_DU_PLANE_VSPD1, - .alpha = 255, .colorkey = 0, }; @@ -173,7 +174,7 @@ static void rcar_du_vsp_plane_setup(struct rcar_du_vsp_plane *plane) struct vsp1_du_atomic_config cfg = { .pixelformat = 0, .pitch = fb->pitches[0], - .alpha = state->alpha, + .alpha = state->state.alpha >> 8, .zpos = state->state.zpos, }; unsigned int i; @@ -237,6 +238,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane, } } + ret = drm_gem_fb_prepare_fb(plane, state); + if (ret) + goto fail; + return 0; fail: @@ -299,18 +304,17 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = { static struct drm_plane_state * rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane) { - struct rcar_du_vsp_plane_state *state; struct rcar_du_vsp_plane_state *copy; if (WARN_ON(!plane->state)) return NULL; - state = to_rcar_vsp_plane_state(plane->state); - copy = kmemdup(state, sizeof(*state), GFP_KERNEL); + copy = kzalloc(sizeof(*copy), GFP_KERNEL); if (copy == NULL) return NULL; __drm_atomic_helper_plane_duplicate_state(plane, ©->state); + copy->alpha = to_rcar_vsp_plane_state(plane->state)->alpha; return ©->state; } @@ -335,44 +339,13 @@ static void rcar_du_vsp_plane_reset(struct drm_plane *plane) if (state == NULL) return; - state->alpha = 255; + state->state.alpha = DRM_BLEND_ALPHA_OPAQUE; state->state.zpos = plane->type == DRM_PLANE_TYPE_PRIMARY ? 0 : 1; plane->state = &state->state; plane->state->plane = plane; } -static int rcar_du_vsp_plane_atomic_set_property(struct drm_plane *plane, - struct drm_plane_state *state, struct drm_property *property, - uint64_t val) -{ - struct rcar_du_vsp_plane_state *rstate = to_rcar_vsp_plane_state(state); - struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev; - - if (property == rcdu->props.alpha) - rstate->alpha = val; - else - return -EINVAL; - - return 0; -} - -static int rcar_du_vsp_plane_atomic_get_property(struct drm_plane *plane, - const struct drm_plane_state *state, struct drm_property *property, - uint64_t *val) -{ - const struct rcar_du_vsp_plane_state *rstate = - container_of(state, const struct rcar_du_vsp_plane_state, state); - struct rcar_du_device *rcdu = to_rcar_vsp_plane(plane)->vsp->dev; - - if (property == rcdu->props.alpha) - *val = rstate->alpha; - else - return -EINVAL; - - return 0; -} - static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -380,8 +353,6 @@ static const struct drm_plane_funcs rcar_du_vsp_plane_funcs = { .destroy = drm_plane_cleanup, .atomic_duplicate_state = rcar_du_vsp_plane_atomic_duplicate_state, .atomic_destroy_state = rcar_du_vsp_plane_atomic_destroy_state, - .atomic_set_property = rcar_du_vsp_plane_atomic_set_property, - .atomic_get_property = rcar_du_vsp_plane_atomic_get_property, }; int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np, @@ -438,8 +409,7 @@ int rcar_du_vsp_init(struct rcar_du_vsp *vsp, struct device_node *np, if (type == DRM_PLANE_TYPE_PRIMARY) continue; - drm_object_attach_property(&plane->plane.base, - rcdu->props.alpha, 255); + drm_plane_create_alpha_property(&plane->plane); drm_plane_create_zpos_property(&plane->plane, 1, 1, vsp->num_planes - 1); } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h index 4c5d7bb..8a8a25c 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.h @@ -44,15 +44,12 @@ static inline struct rcar_du_vsp_plane *to_rcar_vsp_plane(struct drm_plane *p) * @state: base DRM plane state * @format: information about the pixel format used by the plane * @sg_tables: scatter-gather tables for the frame buffer memory - * @alpha: value of the plane alpha property */ struct rcar_du_vsp_plane_state { struct drm_plane_state state; const struct rcar_du_format_info *format; struct sg_table sg_tables[3]; - - unsigned int alpha; }; static inline struct rcar_du_vsp_plane_state * diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 3e8bf79..080f053 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -77,13 +77,13 @@ struct rockchip_dp_device { struct analogix_dp_plat_data plat_data; }; -static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) +static int analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) { struct rockchip_dp_device *dp = to_dp(encoder); int ret; if (!analogix_dp_psr_enabled(dp->adp)) - return; + return 0; DRM_DEV_DEBUG(dp->dev, "%s PSR...\n", enabled ? "Entry" : "Exit"); @@ -91,13 +91,13 @@ static void analogix_dp_psr_set(struct drm_encoder *encoder, bool enabled) PSR_WAIT_LINE_FLAG_TIMEOUT_MS); if (ret) { DRM_DEV_ERROR(dp->dev, "line flag interrupt did not arrive\n"); - return; + return -ETIMEDOUT; } if (enabled) - analogix_dp_enable_psr(dp->adp); + return analogix_dp_enable_psr(dp->adp); else - analogix_dp_disable_psr(dp->adp); + return analogix_dp_disable_psr(dp->adp); } static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) @@ -109,7 +109,7 @@ static int rockchip_dp_pre_init(struct rockchip_dp_device *dp) return 0; } -static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data) +static int rockchip_dp_poweron_start(struct analogix_dp_plat_data *plat_data) { struct rockchip_dp_device *dp = to_dp(plat_data); int ret; @@ -127,7 +127,14 @@ static int rockchip_dp_poweron(struct analogix_dp_plat_data *plat_data) return ret; } - return rockchip_drm_psr_activate(&dp->encoder); + return ret; +} + +static int rockchip_dp_poweron_end(struct analogix_dp_plat_data *plat_data) +{ + struct rockchip_dp_device *dp = to_dp(plat_data); + + return rockchip_drm_psr_inhibit_put(&dp->encoder); } static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data) @@ -135,7 +142,7 @@ static int rockchip_dp_powerdown(struct analogix_dp_plat_data *plat_data) struct rockchip_dp_device *dp = to_dp(plat_data); int ret; - ret = rockchip_drm_psr_deactivate(&dp->encoder); + ret = rockchip_drm_psr_inhibit_get(&dp->encoder); if (ret != 0) return ret; @@ -218,6 +225,7 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder, struct drm_connector_state *conn_state) { struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state); + struct drm_display_info *di = &conn_state->connector->display_info; /* * The hardware IC designed that VOP must output the RGB10 video @@ -229,6 +237,7 @@ rockchip_dp_drm_encoder_atomic_check(struct drm_encoder *encoder, s->output_mode = ROCKCHIP_OUT_MODE_AAAA; s->output_type = DRM_MODE_CONNECTOR_eDP; + s->output_bpc = di->bpc; return 0; } @@ -328,7 +337,8 @@ static int rockchip_dp_bind(struct device *dev, struct device *master, dp->plat_data.encoder = &dp->encoder; dp->plat_data.dev_type = dp->data->chip_type; - dp->plat_data.power_on = rockchip_dp_poweron; + dp->plat_data.power_on_start = rockchip_dp_poweron_start; + dp->plat_data.power_on_end = rockchip_dp_poweron_end; dp->plat_data.power_off = rockchip_dp_powerdown; dp->plat_data.get_modes = rockchip_dp_get_modes; @@ -358,6 +368,8 @@ static void rockchip_dp_unbind(struct device *dev, struct device *master, analogix_dp_unbind(dp->adp); rockchip_drm_psr_unregister(&dp->encoder); dp->encoder.funcs->destroy(&dp->encoder); + + dp->adp = ERR_PTR(-ENODEV); } static const struct component_ops rockchip_dp_component_ops = { @@ -381,6 +393,7 @@ static int rockchip_dp_probe(struct platform_device *pdev) return -ENOMEM; dp->dev = dev; + dp->adp = ERR_PTR(-ENODEV); dp->plat_data.panel = panel; ret = rockchip_dp_of_probe(dp); @@ -404,6 +417,9 @@ static int rockchip_dp_suspend(struct device *dev) { struct rockchip_dp_device *dp = dev_get_drvdata(dev); + if (IS_ERR(dp->adp)) + return 0; + return analogix_dp_suspend(dp->adp); } @@ -411,6 +427,9 @@ static int rockchip_dp_resume(struct device *dev) { struct rockchip_dp_device *dp = dev_get_drvdata(dev); + if (IS_ERR(dp->adp)) + return 0; + return analogix_dp_resume(dp->adp); } #endif diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h index 9c064a4..3a6ebfc 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h @@ -36,6 +36,7 @@ struct rockchip_crtc_state { struct drm_crtc_state base; int output_type; int output_mode; + int output_bpc; }; #define to_rockchip_crtc_state(s) \ container_of(s, struct rockchip_crtc_state, base) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c index e266539..d4f4118 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c @@ -167,8 +167,67 @@ err_gem_object_unreference: return ERR_PTR(ret); } +static void +rockchip_drm_psr_inhibit_get_state(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + struct drm_encoder *encoder; + u32 encoder_mask = 0; + int i; + + for_each_old_crtc_in_state(state, crtc, crtc_state, i) { + encoder_mask |= crtc_state->encoder_mask; + encoder_mask |= crtc->state->encoder_mask; + } + + drm_for_each_encoder_mask(encoder, state->dev, encoder_mask) + rockchip_drm_psr_inhibit_get(encoder); +} + +static void +rockchip_drm_psr_inhibit_put_state(struct drm_atomic_state *state) +{ + struct drm_crtc *crtc; + struct drm_crtc_state *crtc_state; + struct drm_encoder *encoder; + u32 encoder_mask = 0; + int i; + + for_each_old_crtc_in_state(state, crtc, crtc_state, i) { + encoder_mask |= crtc_state->encoder_mask; + encoder_mask |= crtc->state->encoder_mask; + } + + drm_for_each_encoder_mask(encoder, state->dev, encoder_mask) + rockchip_drm_psr_inhibit_put(encoder); +} + +static void +rockchip_atomic_helper_commit_tail_rpm(struct drm_atomic_state *old_state) +{ + struct drm_device *dev = old_state->dev; + + rockchip_drm_psr_inhibit_get_state(old_state); + + drm_atomic_helper_commit_modeset_disables(dev, old_state); + + drm_atomic_helper_commit_modeset_enables(dev, old_state); + + drm_atomic_helper_commit_planes(dev, old_state, + DRM_PLANE_COMMIT_ACTIVE_ONLY); + + rockchip_drm_psr_inhibit_put_state(old_state); + + drm_atomic_helper_commit_hw_done(old_state); + + drm_atomic_helper_wait_for_vblanks(dev, old_state); + + drm_atomic_helper_cleanup_planes(dev, old_state); +} + static const struct drm_mode_config_helper_funcs rockchip_mode_config_helpers = { - .atomic_commit_tail = drm_atomic_helper_commit_tail_rpm, + .atomic_commit_tail = rockchip_atomic_helper_commit_tail_rpm, }; static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c index 074db7a..a8db758 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c @@ -357,8 +357,8 @@ err_free_rk_obj: } /* - * rockchip_gem_free_object - (struct drm_driver)->gem_free_object callback - * function + * rockchip_gem_free_object - (struct drm_driver)->gem_free_object_unlocked + * callback function */ void rockchip_gem_free_object(struct drm_gem_object *obj) { diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c index b339ca9..79d00d8 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.c @@ -20,42 +20,19 @@ #define PSR_FLUSH_TIMEOUT_MS 100 -enum psr_state { - PSR_FLUSH, - PSR_ENABLE, - PSR_DISABLE, -}; - struct psr_drv { struct list_head list; struct drm_encoder *encoder; struct mutex lock; - bool active; - enum psr_state state; + int inhibit_count; + bool enabled; struct delayed_work flush_work; - void (*set)(struct drm_encoder *encoder, bool enable); + int (*set)(struct drm_encoder *encoder, bool enable); }; -static struct psr_drv *find_psr_by_crtc(struct drm_crtc *crtc) -{ - struct rockchip_drm_private *drm_drv = crtc->dev->dev_private; - struct psr_drv *psr; - - mutex_lock(&drm_drv->psr_list_lock); - list_for_each_entry(psr, &drm_drv->psr_list, list) { - if (psr->encoder->crtc == crtc) - goto out; - } - psr = ERR_PTR(-ENODEV); - -out: - mutex_unlock(&drm_drv->psr_list_lock); - return psr; -} - static struct psr_drv *find_psr_by_encoder(struct drm_encoder *encoder) { struct rockchip_drm_private *drm_drv = encoder->dev->dev_private; @@ -73,46 +50,22 @@ out: return psr; } -static void psr_set_state_locked(struct psr_drv *psr, enum psr_state state) +static int psr_set_state_locked(struct psr_drv *psr, bool enable) { - /* - * Allowed finite state machine: - * - * PSR_ENABLE < = = = = = > PSR_FLUSH - * | ^ | - * | | | - * v | | - * PSR_DISABLE < - - - - - - - - - - */ - if (state == psr->state || !psr->active) - return; - - /* Already disabled in flush, change the state, but not the hardware */ - if (state == PSR_DISABLE && psr->state == PSR_FLUSH) { - psr->state = state; - return; - } + int ret; - psr->state = state; + if (psr->inhibit_count > 0) + return -EINVAL; - /* Actually commit the state change to hardware */ - switch (psr->state) { - case PSR_ENABLE: - psr->set(psr->encoder, true); - break; + if (enable == psr->enabled) + return 0; - case PSR_DISABLE: - case PSR_FLUSH: - psr->set(psr->encoder, false); - break; - } -} + ret = psr->set(psr->encoder, enable); + if (ret) + return ret; -static void psr_set_state(struct psr_drv *psr, enum psr_state state) -{ - mutex_lock(&psr->lock); - psr_set_state_locked(psr, state); - mutex_unlock(&psr->lock); + psr->enabled = enable; + return 0; } static void psr_flush_handler(struct work_struct *work) @@ -120,21 +73,24 @@ static void psr_flush_handler(struct work_struct *work) struct psr_drv *psr = container_of(to_delayed_work(work), struct psr_drv, flush_work); - /* If the state has changed since we initiated the flush, do nothing */ mutex_lock(&psr->lock); - if (psr->state == PSR_FLUSH) - psr_set_state_locked(psr, PSR_ENABLE); + psr_set_state_locked(psr, true); mutex_unlock(&psr->lock); } /** - * rockchip_drm_psr_activate - activate PSR on the given pipe + * rockchip_drm_psr_inhibit_put - release PSR inhibit on given encoder * @encoder: encoder to obtain the PSR encoder * + * Decrements PSR inhibit count on given encoder. Should be called only + * for a PSR inhibit count increment done before. If PSR inhibit counter + * reaches zero, PSR flush work is scheduled to make the hardware enter + * PSR mode in PSR_FLUSH_TIMEOUT_MS. + * * Returns: * Zero on success, negative errno on failure. */ -int rockchip_drm_psr_activate(struct drm_encoder *encoder) +int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder) { struct psr_drv *psr = find_psr_by_encoder(encoder); @@ -142,21 +98,30 @@ int rockchip_drm_psr_activate(struct drm_encoder *encoder) return PTR_ERR(psr); mutex_lock(&psr->lock); - psr->active = true; + --psr->inhibit_count; + WARN_ON(psr->inhibit_count < 0); + if (!psr->inhibit_count) + mod_delayed_work(system_wq, &psr->flush_work, + PSR_FLUSH_TIMEOUT_MS); mutex_unlock(&psr->lock); return 0; } -EXPORT_SYMBOL(rockchip_drm_psr_activate); +EXPORT_SYMBOL(rockchip_drm_psr_inhibit_put); /** - * rockchip_drm_psr_deactivate - deactivate PSR on the given pipe + * rockchip_drm_psr_inhibit_get - acquire PSR inhibit on given encoder * @encoder: encoder to obtain the PSR encoder * + * Increments PSR inhibit count on given encoder. This function guarantees + * that after it returns PSR is turned off on given encoder and no PSR-related + * hardware state change occurs at least until a matching call to + * rockchip_drm_psr_inhibit_put() is done. + * * Returns: * Zero on success, negative errno on failure. */ -int rockchip_drm_psr_deactivate(struct drm_encoder *encoder) +int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder) { struct psr_drv *psr = find_psr_by_encoder(encoder); @@ -164,37 +129,25 @@ int rockchip_drm_psr_deactivate(struct drm_encoder *encoder) return PTR_ERR(psr); mutex_lock(&psr->lock); - psr->active = false; + psr_set_state_locked(psr, false); + ++psr->inhibit_count; mutex_unlock(&psr->lock); cancel_delayed_work_sync(&psr->flush_work); return 0; } -EXPORT_SYMBOL(rockchip_drm_psr_deactivate); +EXPORT_SYMBOL(rockchip_drm_psr_inhibit_get); static void rockchip_drm_do_flush(struct psr_drv *psr) { - psr_set_state(psr, PSR_FLUSH); - mod_delayed_work(system_wq, &psr->flush_work, PSR_FLUSH_TIMEOUT_MS); -} - -/** - * rockchip_drm_psr_flush - flush a single pipe - * @crtc: CRTC of the pipe to flush - * - * Returns: - * 0 on success, -errno on fail - */ -int rockchip_drm_psr_flush(struct drm_crtc *crtc) -{ - struct psr_drv *psr = find_psr_by_crtc(crtc); - if (IS_ERR(psr)) - return PTR_ERR(psr); + cancel_delayed_work_sync(&psr->flush_work); - rockchip_drm_do_flush(psr); - return 0; + mutex_lock(&psr->lock); + if (!psr_set_state_locked(psr, false)) + mod_delayed_work(system_wq, &psr->flush_work, + PSR_FLUSH_TIMEOUT_MS); + mutex_unlock(&psr->lock); } -EXPORT_SYMBOL(rockchip_drm_psr_flush); /** * rockchip_drm_psr_flush_all - force to flush all registered PSR encoders @@ -225,11 +178,16 @@ EXPORT_SYMBOL(rockchip_drm_psr_flush_all); * @encoder: encoder that obtain the PSR function * @psr_set: call back to set PSR state * + * The function returns with PSR inhibit counter initialized with one + * and the caller (typically encoder driver) needs to call + * rockchip_drm_psr_inhibit_put() when it becomes ready to accept PSR + * enable request. + * * Returns: * Zero on success, negative errno on failure. */ int rockchip_drm_psr_register(struct drm_encoder *encoder, - void (*psr_set)(struct drm_encoder *, bool enable)) + int (*psr_set)(struct drm_encoder *, bool enable)) { struct rockchip_drm_private *drm_drv = encoder->dev->dev_private; struct psr_drv *psr; @@ -244,8 +202,8 @@ int rockchip_drm_psr_register(struct drm_encoder *encoder, INIT_DELAYED_WORK(&psr->flush_work, psr_flush_handler); mutex_init(&psr->lock); - psr->active = true; - psr->state = PSR_DISABLE; + psr->inhibit_count = 1; + psr->enabled = false; psr->encoder = encoder; psr->set = psr_set; @@ -262,6 +220,11 @@ EXPORT_SYMBOL(rockchip_drm_psr_register); * @encoder: encoder that obtain the PSR function * @psr_set: call back to set PSR state * + * It is expected that the PSR inhibit counter is 1 when this function is + * called, which corresponds to a state when related encoder has been + * disconnected from any CRTCs and its driver called + * rockchip_drm_psr_inhibit_get() to stop the PSR logic. + * * Returns: * Zero on success, negative errno on failure. */ @@ -273,7 +236,12 @@ void rockchip_drm_psr_unregister(struct drm_encoder *encoder) mutex_lock(&drm_drv->psr_list_lock); list_for_each_entry_safe(psr, n, &drm_drv->psr_list, list) { if (psr->encoder == encoder) { - cancel_delayed_work_sync(&psr->flush_work); + /* + * Any other value would mean that the encoder + * is still in use. + */ + WARN_ON(psr->inhibit_count != 1); + list_del(&psr->list); kfree(psr); } diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h index b1ea015..860c624 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_psr.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_psr.h @@ -16,13 +16,12 @@ #define __ROCKCHIP_DRM_PSR___ void rockchip_drm_psr_flush_all(struct drm_device *dev); -int rockchip_drm_psr_flush(struct drm_crtc *crtc); -int rockchip_drm_psr_activate(struct drm_encoder *encoder); -int rockchip_drm_psr_deactivate(struct drm_encoder *encoder); +int rockchip_drm_psr_inhibit_put(struct drm_encoder *encoder); +int rockchip_drm_psr_inhibit_get(struct drm_encoder *encoder); int rockchip_drm_psr_register(struct drm_encoder *encoder, - void (*psr_set)(struct drm_encoder *, bool enable)); + int (*psr_set)(struct drm_encoder *, bool enable)); void rockchip_drm_psr_unregister(struct drm_encoder *encoder); #endif /* __ROCKCHIP_DRM_PSR__ */ diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 53d4afe..2121345 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -76,6 +76,9 @@ #define VOP_WIN_GET_YRGBADDR(vop, win) \ vop_readl(vop, win->base + win->phy->yrgb_mst.offset) +#define VOP_WIN_TO_INDEX(vop_win) \ + ((vop_win) - (vop_win)->vop->win) + #define to_vop(x) container_of(x, struct vop, crtc) #define to_vop_win(x) container_of(x, struct vop_win, base) @@ -708,6 +711,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane, dma_addr_t dma_addr; uint32_t val; bool rb_swap; + int win_index = VOP_WIN_TO_INDEX(vop_win); int format; /* @@ -777,7 +781,14 @@ static void vop_plane_atomic_update(struct drm_plane *plane, rb_swap = has_rb_swapped(fb->format->format); VOP_WIN_SET(vop, win, rb_swap, rb_swap); - if (fb->format->has_alpha) { + /* + * Blending win0 with the background color doesn't seem to work + * correctly. We only get the background color, no matter the contents + * of the win0 framebuffer. However, blending pre-multiplied color + * with the default opaque black default background color is a no-op, + * so we can just disable blending to get the correct result. + */ + if (fb->format->has_alpha && win_index > 0) { VOP_WIN_SET(vop, win, dst_alpha_ctl, DST_FACTOR_M0(ALPHA_SRC_INVERSE)); val = SRC_ALPHA_EN(1) | SRC_COLOR_M0(ALPHA_SRC_PRE_MUL) | @@ -925,6 +936,12 @@ static void vop_crtc_atomic_enable(struct drm_crtc *crtc, if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && !(vop_data->feature & VOP_FEATURE_OUTPUT_RGB10)) s->output_mode = ROCKCHIP_OUT_MODE_P888; + + if (s->output_mode == ROCKCHIP_OUT_MODE_AAAA && s->output_bpc == 8) + VOP_REG_SET(vop, common, pre_dither_down, 1); + else + VOP_REG_SET(vop, common, pre_dither_down, 0); + VOP_REG_SET(vop, common, out_mode, s->output_mode); VOP_REG_SET(vop, modeset, htotal_pw, (htotal << 16) | hsync_len); @@ -1017,22 +1034,15 @@ static void vop_crtc_atomic_flush(struct drm_crtc *crtc, continue; drm_framebuffer_get(old_plane_state->fb); + WARN_ON(drm_crtc_vblank_get(crtc) != 0); drm_flip_work_queue(&vop->fb_unref_work, old_plane_state->fb); set_bit(VOP_PENDING_FB_UNREF, &vop->pending); - WARN_ON(drm_crtc_vblank_get(crtc) != 0); } } -static void vop_crtc_atomic_begin(struct drm_crtc *crtc, - struct drm_crtc_state *old_crtc_state) -{ - rockchip_drm_psr_flush(crtc); -} - static const struct drm_crtc_helper_funcs vop_crtc_helper_funcs = { .mode_fixup = vop_crtc_mode_fixup, .atomic_flush = vop_crtc_atomic_flush, - .atomic_begin = vop_crtc_atomic_begin, .atomic_enable = vop_crtc_atomic_enable, .atomic_disable = vop_crtc_atomic_disable, }; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 56bbd2e..084acdd 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -67,6 +67,7 @@ struct vop_common { struct vop_reg cfg_done; struct vop_reg dsp_blank; struct vop_reg data_blank; + struct vop_reg pre_dither_down; struct vop_reg dither_down; struct vop_reg dither_up; struct vop_reg gate_en; diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c index 2e4eea3..08023d3 100644 --- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c +++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c @@ -264,6 +264,7 @@ static const struct vop_common rk3288_common = { .standby = VOP_REG_SYNC(RK3288_SYS_CTRL, 0x1, 22), .gate_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 23), .mmu_en = VOP_REG(RK3288_SYS_CTRL, 0x1, 20), + .pre_dither_down = VOP_REG(RK3288_DSP_CTRL1, 0x1, 1), .dither_down = VOP_REG(RK3288_DSP_CTRL1, 0xf, 1), .dither_up = VOP_REG(RK3288_DSP_CTRL1, 0x1, 6), .data_blank = VOP_REG(RK3288_DSP_CTRL0, 0x1, 19), diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile index 4aebfc7..9fc349f 100644 --- a/drivers/gpu/drm/selftests/Makefile +++ b/drivers/gpu/drm/selftests/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += test-drm_mm.o +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm-helper.o diff --git a/drivers/gpu/drm/selftests/drm_helper_selftests.h b/drivers/gpu/drm/selftests/drm_helper_selftests.h new file mode 100644 index 0000000..9771290 --- /dev/null +++ b/drivers/gpu/drm/selftests/drm_helper_selftests.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as igt__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * Tests are executed in order by igt/drm_selftests_helper + */ +selftest(check_plane_state, igt_check_plane_state) diff --git a/drivers/gpu/drm/selftests/test-drm-helper.c b/drivers/gpu/drm/selftests/test-drm-helper.c new file mode 100644 index 0000000..a015712 --- /dev/null +++ b/drivers/gpu/drm/selftests/test-drm-helper.c @@ -0,0 +1,247 @@ +/* + * Test cases for the drm_kms_helper functions + */ + +#define pr_fmt(fmt) "drm_kms_helper: " fmt + +#include <linux/module.h> + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_plane_helper.h> +#include <drm/drm_modes.h> + +#define TESTS "drm_helper_selftests.h" +#include "drm_selftest.h" + +#define FAIL(test, msg, ...) \ + do { \ + if (test) { \ + pr_err("%s/%u: " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \ + return -EINVAL; \ + } \ + } while (0) + +#define FAIL_ON(x) FAIL((x), "%s", "FAIL_ON(" __stringify(x) ")\n") + +static void set_src(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + plane_state->src_x = src_x; + plane_state->src_y = src_y; + plane_state->src_w = src_w; + plane_state->src_h = src_h; +} + +static bool check_src_eq(struct drm_plane_state *plane_state, + unsigned src_x, unsigned src_y, + unsigned src_w, unsigned src_h) +{ + if (plane_state->src.x1 < 0) { + pr_err("src x coordinate %x should never be below 0.\n", plane_state->src.x1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + if (plane_state->src.y1 < 0) { + pr_err("src y coordinate %x should never be below 0.\n", plane_state->src.y1); + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + if (plane_state->src.x1 != src_x || + plane_state->src.y1 != src_y || + drm_rect_width(&plane_state->src) != src_w || + drm_rect_height(&plane_state->src) != src_h) { + drm_rect_debug_print("src: ", &plane_state->src, true); + return false; + } + + return true; +} + +static void set_crtc(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + plane_state->crtc_x = crtc_x; + plane_state->crtc_y = crtc_y; + plane_state->crtc_w = crtc_w; + plane_state->crtc_h = crtc_h; +} + +static bool check_crtc_eq(struct drm_plane_state *plane_state, + int crtc_x, int crtc_y, + unsigned crtc_w, unsigned crtc_h) +{ + if (plane_state->dst.x1 != crtc_x || + plane_state->dst.y1 != crtc_y || + drm_rect_width(&plane_state->dst) != crtc_w || + drm_rect_height(&plane_state->dst) != crtc_h) { + drm_rect_debug_print("dst: ", &plane_state->dst, false); + + return false; + } + + return true; +} + +static int igt_check_plane_state(void *ignored) +{ + int ret; + + const struct drm_crtc_state crtc_state = { + .crtc = ZERO_SIZE_PTR, + .enable = true, + .active = true, + .mode = { + DRM_MODE("1024x768", 0, 65000, 1024, 1048, + 1184, 1344, 0, 768, 771, 777, 806, 0, + DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) + }, + }; + struct drm_framebuffer fb = { + .width = 2048, + .height = 2048 + }; + struct drm_plane_state plane_state = { + .crtc = ZERO_SIZE_PTR, + .fb = &fb, + .rotation = DRM_MODE_ROTATE_0 + }; + + /* Simple clipping, no scaling. */ + set_src(&plane_state, 0, 0, fb.width << 16, fb.height << 16); + set_crtc(&plane_state, 0, 0, fb.width, fb.height); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Simple clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Rotated clipping + reflection, no scaling. */ + plane_state.rotation = DRM_MODE_ROTATE_90 | DRM_MODE_REFLECT_X; + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Rotated clipping check should pass\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 768 << 16, 1024 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + plane_state.rotation = DRM_MODE_ROTATE_0; + + /* Check whether positioning works correctly. */ + set_src(&plane_state, 0, 0, 1023 << 16, 767 << 16); + set_crtc(&plane_state, 0, 0, 1023, 767); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Should not be able to position on the crtc with can_position=false\n"); + + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Simple positioning should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1023 << 16, 767 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1023, 767)); + + /* Simple scaling tests. */ + set_src(&plane_state, 0, 0, 512 << 16, 384 << 16); + set_crtc(&plane_state, 0, 0, 1024, 768); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8001, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(!ret, "Upscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0x8000, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Upscaling exactly 2x should work\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 512 << 16, 384 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 2048 << 16, 1536 << 16); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x1ffff, false, false); + FAIL(!ret, "Downscaling out of range should fail.\n"); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x20000, false, false); + FAIL(ret < 0, "Should succeed with exact scaling limit\n"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2048 << 16, 1536 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + /* Testing rounding errors. */ + set_src(&plane_state, 0, 0, 0x40001, 0x40001); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x20001, 0x20001, 0x4040001, 0x3040001); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + DRM_PLANE_HELPER_NO_SCALING, + 0x10001, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x40002, 0x40002, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + set_src(&plane_state, 0, 0, 0x3ffff, 0x3ffff); + set_crtc(&plane_state, 1022, 766, 4, 4); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + true, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + /* Should not be rounded to 0x20001, which would be upscaling. */ + FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2)); + + set_src(&plane_state, 0x1ffff, 0x1ffff, 0x403ffff, 0x303ffff); + set_crtc(&plane_state, -2, -2, 1028, 772); + ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state, + 0xffff, + DRM_PLANE_HELPER_NO_SCALING, + false, false); + FAIL(ret < 0, "Should succeed by clipping to exact multiple"); + FAIL_ON(!plane_state.visible); + FAIL_ON(!check_src_eq(&plane_state, 0x3fffe, 0x3fffe, 1024 << 16, 768 << 16)); + FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768)); + + return 0; +} + +#include "drm_selftest.c" + +static int __init test_drm_helper_init(void) +{ + int err; + + err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL); + + return err > 0 ? 0 : err; +} + +module_init(test_drm_helper_init); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig index cca4b3c..1963cc1 100644 --- a/drivers/gpu/drm/sti/Kconfig +++ b/drivers/gpu/drm/sti/Kconfig @@ -1,6 +1,6 @@ config DRM_STI tristate "DRM Support for STMicroelectronics SoC stiH4xx Series" - depends on DRM && (ARCH_STI || ARCH_MULTIPLATFORM) + depends on OF && DRM && (ARCH_STI || ARCH_MULTIPLATFORM) select RESET_CONTROLLER select DRM_KMS_HELPER select DRM_GEM_CMA_HELPER @@ -8,6 +8,5 @@ config DRM_STI select DRM_PANEL select FW_LOADER select SND_SOC_HDMI_CODEC if SND_SOC - select OF help Choose this option to enable DRM on STM stiH4xx chipset diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c index 21e50d7..5824e6a 100644 --- a/drivers/gpu/drm/sti/sti_crtc.c +++ b/drivers/gpu/drm/sti/sti_crtc.c @@ -357,7 +357,7 @@ int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer, res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor, &sti_crtc_funcs, NULL); if (res) { - DRM_ERROR("Can't initialze CRTC\n"); + DRM_ERROR("Can't initialize CRTC\n"); return -EINVAL; } diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c index 55b6967..90c46b4 100644 --- a/drivers/gpu/drm/sti/sti_drv.c +++ b/drivers/gpu/drm/sti/sti_drv.c @@ -119,30 +119,10 @@ err: return ret; } -static int sti_atomic_check(struct drm_device *dev, - struct drm_atomic_state *state) -{ - int ret; - - ret = drm_atomic_helper_check_modeset(dev, state); - if (ret) - return ret; - - ret = drm_atomic_normalize_zpos(dev, state); - if (ret) - return ret; - - ret = drm_atomic_helper_check_planes(dev, state); - if (ret) - return ret; - - return ret; -} - static const struct drm_mode_config_funcs sti_mode_config_funcs = { .fb_create = drm_gem_fb_create, .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = sti_atomic_check, + .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; @@ -160,6 +140,8 @@ static void sti_mode_config_init(struct drm_device *dev) dev->mode_config.max_height = STI_MAX_FB_HEIGHT; dev->mode_config.funcs = &sti_mode_config_funcs; + + dev->mode_config.normalize_zpos = true; } DEFINE_DRM_GEM_CMA_FOPS(sti_driver_fops); diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c index b074609..b48cd86 100644 --- a/drivers/gpu/drm/sti/sti_plane.c +++ b/drivers/gpu/drm/sti/sti_plane.c @@ -40,6 +40,7 @@ void sti_plane_update_fps(struct sti_plane *plane, bool new_frame, bool new_field) { + struct drm_plane_state *state = plane->drm_plane.state; ktime_t now; struct sti_fps_info *fps; int fpks, fipks, ms_since_last, num_frames, num_fields; @@ -66,14 +67,14 @@ void sti_plane_update_fps(struct sti_plane *plane, fps->last_timestamp = now; fps->last_frame_counter = fps->curr_frame_counter; - if (plane->drm_plane.fb) { + if (state->fb) { fpks = (num_frames * 1000000) / ms_since_last; snprintf(plane->fps_info.fps_str, FPS_LENGTH, "%-8s %4dx%-4d %.4s @ %3d.%-3.3d fps (%s)", plane->drm_plane.name, - plane->drm_plane.fb->width, - plane->drm_plane.fb->height, - (char *)&plane->drm_plane.fb->format->format, + state->fb->width, + state->fb->height, + (char *)&state->fb->format->format, fpks / 1000, fpks % 1000, sti_plane_to_str(plane)); } diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c index 9ab00a8..8698e08 100644 --- a/drivers/gpu/drm/stm/drv.c +++ b/drivers/gpu/drm/stm/drv.c @@ -72,8 +72,6 @@ static struct drm_driver drv_driver = { .gem_prime_vmap = drm_gem_cma_prime_vmap, .gem_prime_vunmap = drm_gem_cma_prime_vunmap, .gem_prime_mmap = drm_gem_cma_prime_mmap, - .enable_vblank = ltdc_crtc_enable_vblank, - .disable_vblank = ltdc_crtc_disable_vblank, }; static int drv_load(struct drm_device *ddev) diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c index 1a3277e..d997a60 100644 --- a/drivers/gpu/drm/stm/ltdc.c +++ b/drivers/gpu/drm/stm/ltdc.c @@ -392,9 +392,6 @@ static void ltdc_crtc_update_clut(struct drm_crtc *crtc) u32 val; int i; - if (!crtc || !crtc->state) - return; - if (!crtc->state->color_mgmt_changed || !crtc->state->gamma_lut) return; @@ -448,6 +445,43 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc, reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR); } +#define CLK_TOLERANCE_HZ 50 + +static enum drm_mode_status +ltdc_crtc_mode_valid(struct drm_crtc *crtc, + const struct drm_display_mode *mode) +{ + struct ltdc_device *ldev = crtc_to_ltdc(crtc); + int target = mode->clock * 1000; + int target_min = target - CLK_TOLERANCE_HZ; + int target_max = target + CLK_TOLERANCE_HZ; + int result; + + /* + * Accept all "preferred" modes: + * - this is important for panels because panel clock tolerances are + * bigger than hdmi ones and there is no reason to not accept them + * (the fps may vary a little but it is not a problem). + * - the hdmi preferred mode will be accepted too, but userland will + * be able to use others hdmi "valid" modes if necessary. + */ + if (mode->type & DRM_MODE_TYPE_PREFERRED) + return MODE_OK; + + result = clk_round_rate(ldev->pixel_clk, target); + + DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result); + + /* + * Filter modes according to the clock value, particularly useful for + * hdmi modes that require precise pixel clocks. + */ + if (result < target_min || result > target_max) + return MODE_CLOCK_RANGE; + + return MODE_OK; +} + static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) @@ -562,6 +596,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc, } static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = { + .mode_valid = ltdc_crtc_mode_valid, .mode_fixup = ltdc_crtc_mode_fixup, .mode_set_nofb = ltdc_crtc_mode_set_nofb, .atomic_flush = ltdc_crtc_atomic_flush, @@ -569,9 +604,9 @@ static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = { .atomic_disable = ltdc_crtc_atomic_disable, }; -int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe) +static int ltdc_crtc_enable_vblank(struct drm_crtc *crtc) { - struct ltdc_device *ldev = ddev->dev_private; + struct ltdc_device *ldev = crtc_to_ltdc(crtc); DRM_DEBUG_DRIVER("\n"); reg_set(ldev->regs, LTDC_IER, IER_LIE); @@ -579,9 +614,9 @@ int ltdc_crtc_enable_vblank(struct drm_device *ddev, unsigned int pipe) return 0; } -void ltdc_crtc_disable_vblank(struct drm_device *ddev, unsigned int pipe) +static void ltdc_crtc_disable_vblank(struct drm_crtc *crtc) { - struct ltdc_device *ldev = ddev->dev_private; + struct ltdc_device *ldev = crtc_to_ltdc(crtc); DRM_DEBUG_DRIVER("\n"); reg_clear(ldev->regs, LTDC_IER, IER_LIE); @@ -594,6 +629,8 @@ static const struct drm_crtc_funcs ltdc_crtc_funcs = { .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, + .enable_vblank = ltdc_crtc_enable_vblank, + .disable_vblank = ltdc_crtc_disable_vblank, .gamma_set = drm_atomic_helper_legacy_gamma_set, }; @@ -727,6 +764,8 @@ static void ltdc_plane_atomic_update(struct drm_plane *plane, reg_update_bits(ldev->regs, LTDC_L1CR + lofs, LXCR_LEN | LXCR_CLUTEN, val); + ldev->plane_fpsi[plane->index].counter++; + mutex_lock(&ldev->err_lock); if (ldev->error_status & ISR_FUIF) { DRM_DEBUG_DRIVER("Fifo underrun\n"); @@ -752,6 +791,25 @@ static void ltdc_plane_atomic_disable(struct drm_plane *plane, oldstate->crtc->base.id, plane->base.id); } +static void ltdc_plane_atomic_print_state(struct drm_printer *p, + const struct drm_plane_state *state) +{ + struct drm_plane *plane = state->plane; + struct ltdc_device *ldev = plane_to_ltdc(plane); + struct fps_info *fpsi = &ldev->plane_fpsi[plane->index]; + int ms_since_last; + ktime_t now; + + now = ktime_get(); + ms_since_last = ktime_to_ms(ktime_sub(now, fpsi->last_timestamp)); + + drm_printf(p, "\tuser_updates=%dfps\n", + DIV_ROUND_CLOSEST(fpsi->counter * 1000, ms_since_last)); + + fpsi->last_timestamp = now; + fpsi->counter = 0; +} + static const struct drm_plane_funcs ltdc_plane_funcs = { .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, @@ -759,6 +817,7 @@ static const struct drm_plane_funcs ltdc_plane_funcs = { .reset = drm_atomic_helper_plane_reset, .atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state, .atomic_destroy_state = drm_atomic_helper_plane_destroy_state, + .atomic_print_state = ltdc_plane_atomic_print_state, }; static const struct drm_plane_helper_funcs ltdc_plane_helper_funcs = { @@ -801,13 +860,13 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev, plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL); if (!plane) - return 0; + return NULL; ret = drm_universal_plane_init(ddev, plane, possible_crtcs, <dc_plane_funcs, formats, nb_fmt, NULL, type, NULL); if (ret < 0) - return 0; + return NULL; drm_plane_helper_add(plane, <dc_plane_helper_funcs); @@ -966,14 +1025,13 @@ int ltdc_load(struct drm_device *ddev) &bridge[i]); /* - * If at least one endpoint is ready, continue probing, - * else if at least one endpoint is -EPROBE_DEFER and - * there is no previous ready endpoints, defer probing. + * If at least one endpoint is -EPROBE_DEFER, defer probing, + * else if at least one endpoint is ready, continue probing. */ - if (!ret) + if (ret == -EPROBE_DEFER) + return ret; + else if (!ret) endpoint_not_ready = 0; - else if (ret == -EPROBE_DEFER && endpoint_not_ready) - endpoint_not_ready = -EPROBE_DEFER; } if (endpoint_not_ready) diff --git a/drivers/gpu/drm/stm/ltdc.h b/drivers/gpu/drm/stm/ltdc.h index edb2681..1e16d6a 100644 --- a/drivers/gpu/drm/stm/ltdc.h +++ b/drivers/gpu/drm/stm/ltdc.h @@ -20,6 +20,13 @@ struct ltdc_caps { bool non_alpha_only_l1; /* non-native no-alpha formats on layer 1 */ }; +#define LTDC_MAX_LAYER 4 + +struct fps_info { + unsigned int counter; + ktime_t last_timestamp; +}; + struct ltdc_device { void __iomem *regs; struct clk *pixel_clk; /* lcd pixel clock */ @@ -27,10 +34,9 @@ struct ltdc_device { struct ltdc_caps caps; u32 error_status; u32 irq_status; + struct fps_info plane_fpsi[LTDC_MAX_LAYER]; }; -int ltdc_crtc_enable_vblank(struct drm_device *dev, unsigned int pipe); -void ltdc_crtc_disable_vblank(struct drm_device *dev, unsigned int pipe); int ltdc_load(struct drm_device *ddev); void ltdc_unload(struct drm_device *ddev); diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig index eee6bc0..156a865 100644 --- a/drivers/gpu/drm/sun4i/Kconfig +++ b/drivers/gpu/drm/sun4i/Kconfig @@ -40,6 +40,16 @@ config DRM_SUN4I_BACKEND do some alpha blending and feed graphics to TCON. If M is selected the module will be called sun4i-backend. +config DRM_SUN6I_DSI + tristate "Allwinner A31 MIPI-DSI Controller Support" + default MACH_SUN8I + select CRC_CCITT + select DRM_MIPI_DSI + help + Choose this option if you want have an Allwinner SoC with + MIPI-DSI support. If M is selected the module will be called + sun6i-dsi + config DRM_SUN8I_DW_HDMI tristate "Support for Allwinner version of DesignWare HDMI" depends on DRM_SUN4I diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile index 330843c..2589f4a 100644 --- a/drivers/gpu/drm/sun4i/Makefile +++ b/drivers/gpu/drm/sun4i/Makefile @@ -24,6 +24,9 @@ sun4i-tcon-y += sun4i_lvds.o sun4i-tcon-y += sun4i_tcon.o sun4i-tcon-y += sun4i_rgb.o +sun6i-dsi-y += sun6i_mipi_dphy.o +sun6i-dsi-y += sun6i_mipi_dsi.o + obj-$(CONFIG_DRM_SUN4I) += sun4i-drm.o obj-$(CONFIG_DRM_SUN4I) += sun4i-tcon.o obj-$(CONFIG_DRM_SUN4I) += sun4i_tv.o @@ -31,5 +34,6 @@ obj-$(CONFIG_DRM_SUN4I) += sun6i_drc.o obj-$(CONFIG_DRM_SUN4I_BACKEND) += sun4i-backend.o sun4i-frontend.o obj-$(CONFIG_DRM_SUN4I_HDMI) += sun4i-drm-hdmi.o +obj-$(CONFIG_DRM_SUN6I_DSI) += sun6i-dsi.o obj-$(CONFIG_DRM_SUN8I_DW_HDMI) += sun8i-drm-hdmi.o obj-$(CONFIG_DRM_SUN8I_MIXER) += sun8i-mixer.o diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 9bad54f..de0a76d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -295,6 +295,15 @@ int sun4i_backend_update_layer_formats(struct sun4i_backend *backend, DRM_DEBUG_DRIVER("Switching display backend interlaced mode %s\n", interlaced ? "on" : "off"); + val = SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(state->alpha >> 8); + if (state->alpha != DRM_BLEND_ALPHA_OPAQUE) + val |= SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN; + regmap_update_bits(backend->engine.regs, + SUN4I_BACKEND_ATTCTL_REG0(layer), + SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK | + SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN, + val); + if (sun4i_backend_format_is_yuv(fb->format->format)) return sun4i_backend_update_yuv_format(backend, layer, plane); @@ -490,7 +499,7 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, DRM_DEBUG_DRIVER("Plane FB format is %s\n", drm_get_format_name(fb->format->format, &format_name)); - if (fb->format->has_alpha) + if (fb->format->has_alpha || (plane_state->alpha != DRM_BLEND_ALPHA_OPAQUE)) num_alpha_planes++; if (sun4i_backend_format_is_yuv(fb->format->format)) { @@ -548,7 +557,8 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, } /* We can't have an alpha plane at the lowest position */ - if (plane_states[0]->fb->format->has_alpha) + if (plane_states[0]->fb->format->has_alpha || + (plane_states[0]->alpha != DRM_BLEND_ALPHA_OPAQUE)) return -EINVAL; for (i = 1; i < num_planes; i++) { @@ -560,7 +570,7 @@ static int sun4i_backend_atomic_check(struct sunxi_engine *engine, * The only alpha position is the lowest plane of the * second pipe. */ - if (fb->format->has_alpha) + if (fb->format->has_alpha || (p_state->alpha != DRM_BLEND_ALPHA_OPAQUE)) current_pipe++; s_state->pipe = current_pipe; diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.h b/drivers/gpu/drm/sun4i/sun4i_backend.h index 316f217..4caee03 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.h +++ b/drivers/gpu/drm/sun4i/sun4i_backend.h @@ -68,12 +68,15 @@ #define SUN4I_BACKEND_CKMIN_REG 0x884 #define SUN4I_BACKEND_CKCFG_REG 0x888 #define SUN4I_BACKEND_ATTCTL_REG0(l) (0x890 + (0x4 * (l))) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_MASK GENMASK(31, 24) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA(x) ((x) << 24) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL_MASK BIT(15) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL(x) ((x) << 15) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL_MASK GENMASK(11, 10) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_PRISEL(x) ((x) << 10) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_YUVEN BIT(2) #define SUN4I_BACKEND_ATTCTL_REG0_LAY_VDOEN BIT(1) +#define SUN4I_BACKEND_ATTCTL_REG0_LAY_GLBALPHA_EN BIT(0) #define SUN4I_BACKEND_ATTCTL_REG1(l) (0x8a0 + (0x4 * (l))) #define SUN4I_BACKEND_ATTCTL_REG1_LAY_HSCAFCT GENMASK(15, 14) diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c index 2949a3c..750ad24 100644 --- a/drivers/gpu/drm/sun4i/sun4i_layer.c +++ b/drivers/gpu/drm/sun4i/sun4i_layer.c @@ -37,6 +37,7 @@ static void sun4i_backend_layer_reset(struct drm_plane *plane) if (state) { plane->state = &state->state; plane->state->plane = plane; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; plane->state->zpos = layer->id; } } @@ -167,6 +168,7 @@ static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm, &sun4i_backend_layer_helper_funcs); layer->backend = backend; + drm_plane_create_alpha_property(&layer->plane); drm_plane_create_zpos_property(&layer->plane, 0, 0, SUN4I_BACKEND_NUM_LAYERS - 1); diff --git a/drivers/gpu/drm/sun4i/sun4i_lvds.c b/drivers/gpu/drm/sun4i/sun4i_lvds.c index bffff4c..be3f14d 100644 --- a/drivers/gpu/drm/sun4i/sun4i_lvds.c +++ b/drivers/gpu/drm/sun4i/sun4i_lvds.c @@ -94,64 +94,9 @@ static void sun4i_lvds_encoder_disable(struct drm_encoder *encoder) } } -static enum drm_mode_status sun4i_lvds_encoder_mode_valid(struct drm_encoder *crtc, - const struct drm_display_mode *mode) -{ - struct sun4i_lvds *lvds = drm_encoder_to_sun4i_lvds(crtc); - struct sun4i_tcon *tcon = lvds->tcon; - u32 hsync = mode->hsync_end - mode->hsync_start; - u32 vsync = mode->vsync_end - mode->vsync_start; - unsigned long rate = mode->clock * 1000; - long rounded_rate; - - DRM_DEBUG_DRIVER("Validating modes...\n"); - - if (hsync < 1) - return MODE_HSYNC_NARROW; - - if (hsync > 0x3ff) - return MODE_HSYNC_WIDE; - - if ((mode->hdisplay < 1) || (mode->htotal < 1)) - return MODE_H_ILLEGAL; - - if ((mode->hdisplay > 0x7ff) || (mode->htotal > 0xfff)) - return MODE_BAD_HVALUE; - - DRM_DEBUG_DRIVER("Horizontal parameters OK\n"); - - if (vsync < 1) - return MODE_VSYNC_NARROW; - - if (vsync > 0x3ff) - return MODE_VSYNC_WIDE; - - if ((mode->vdisplay < 1) || (mode->vtotal < 1)) - return MODE_V_ILLEGAL; - - if ((mode->vdisplay > 0x7ff) || (mode->vtotal > 0xfff)) - return MODE_BAD_VVALUE; - - DRM_DEBUG_DRIVER("Vertical parameters OK\n"); - - tcon->dclk_min_div = 7; - tcon->dclk_max_div = 7; - rounded_rate = clk_round_rate(tcon->dclk, rate); - if (rounded_rate < rate) - return MODE_CLOCK_LOW; - - if (rounded_rate > rate) - return MODE_CLOCK_HIGH; - - DRM_DEBUG_DRIVER("Clock rate OK\n"); - - return MODE_OK; -} - static const struct drm_encoder_helper_funcs sun4i_lvds_enc_helper_funcs = { .disable = sun4i_lvds_encoder_disable, .enable = sun4i_lvds_encoder_enable, - .mode_valid = sun4i_lvds_encoder_mode_valid, }; static const struct drm_encoder_funcs sun4i_lvds_enc_funcs = { diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index c3d92d5..08747fc 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -35,6 +35,7 @@ #include "sun4i_lvds.h" #include "sun4i_rgb.h" #include "sun4i_tcon.h" +#include "sun6i_mipi_dsi.h" #include "sunxi_engine.h" static struct drm_connector *sun4i_tcon_get_connector(const struct drm_encoder *encoder) @@ -169,6 +170,7 @@ void sun4i_tcon_set_status(struct sun4i_tcon *tcon, case DRM_MODE_ENCODER_LVDS: is_lvds = true; /* Fallthrough */ + case DRM_MODE_ENCODER_DSI: case DRM_MODE_ENCODER_NONE: channel = 0; break; @@ -201,7 +203,8 @@ void sun4i_tcon_enable_vblank(struct sun4i_tcon *tcon, bool enable) DRM_DEBUG_DRIVER("%sabling VBLANK interrupt\n", enable ? "En" : "Dis"); mask = SUN4I_TCON_GINT0_VBLANK_ENABLE(0) | - SUN4I_TCON_GINT0_VBLANK_ENABLE(1); + SUN4I_TCON_GINT0_VBLANK_ENABLE(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE; if (enable) val = mask; @@ -273,6 +276,71 @@ static void sun4i_tcon0_mode_set_common(struct sun4i_tcon *tcon, SUN4I_TCON0_BASIC0_Y(mode->crtc_vdisplay)); } +static void sun4i_tcon0_mode_set_cpu(struct sun4i_tcon *tcon, + struct mipi_dsi_device *device, + const struct drm_display_mode *mode) +{ + u8 bpp = mipi_dsi_pixel_format_to_bpp(device->format); + u8 lanes = device->lanes; + u32 block_space, start_delay; + u32 tcon_div; + + tcon->dclk_min_div = 4; + tcon->dclk_max_div = 127; + + sun4i_tcon0_mode_set_common(tcon, mode); + + regmap_update_bits(tcon->regs, SUN4I_TCON0_CTL_REG, + SUN4I_TCON0_CTL_IF_MASK, + SUN4I_TCON0_CTL_IF_8080); + + regmap_write(tcon->regs, SUN4I_TCON_ECC_FIFO_REG, + SUN4I_TCON_ECC_FIFO_EN); + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_IF_REG, + SUN4I_TCON0_CPU_IF_MODE_DSI | + SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH | + SUN4I_TCON0_CPU_IF_TRI_FIFO_EN | + SUN4I_TCON0_CPU_IF_TRI_EN); + + /* + * This looks suspicious, but it works... + * + * The datasheet says that this should be set higher than 20 * + * pixel cycle, but it's not clear what a pixel cycle is. + */ + regmap_read(tcon->regs, SUN4I_TCON0_DCLK_REG, &tcon_div); + tcon_div &= GENMASK(6, 0); + block_space = mode->htotal * bpp / (tcon_div * lanes); + block_space -= mode->hdisplay + 40; + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI0_REG, + SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(block_space) | + SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(mode->hdisplay)); + + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI1_REG, + SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(mode->vdisplay)); + + start_delay = (mode->crtc_vtotal - mode->crtc_vdisplay - 10 - 1); + start_delay = start_delay * mode->crtc_htotal * 149; + start_delay = start_delay / (mode->crtc_clock / 1000) / 8; + regmap_write(tcon->regs, SUN4I_TCON0_CPU_TRI2_REG, + SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(10) | + SUN4I_TCON0_CPU_TRI2_START_DELAY(start_delay)); + + /* + * The Allwinner BSP has a comment that the period should be + * the display clock * 15, but uses an hardcoded 3000... + */ + regmap_write(tcon->regs, SUN4I_TCON_SAFE_PERIOD_REG, + SUN4I_TCON_SAFE_PERIOD_NUM(3000) | + SUN4I_TCON_SAFE_PERIOD_MODE(3)); + + /* Enable the output on the pins */ + regmap_write(tcon->regs, SUN4I_TCON0_IO_TRI_REG, + 0xe0000000); +} + static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon, const struct drm_encoder *encoder, const struct drm_display_mode *mode) @@ -538,7 +606,17 @@ void sun4i_tcon_mode_set(struct sun4i_tcon *tcon, const struct drm_encoder *encoder, const struct drm_display_mode *mode) { + struct sun6i_dsi *dsi; + switch (encoder->encoder_type) { + case DRM_MODE_ENCODER_DSI: + /* + * This is not really elegant, but it's the "cleaner" + * way I could think of... + */ + dsi = encoder_to_sun6i_dsi(encoder); + sun4i_tcon0_mode_set_cpu(tcon, dsi->device, mode); + break; case DRM_MODE_ENCODER_LVDS: sun4i_tcon0_mode_set_lvds(tcon, encoder, mode); break; @@ -582,7 +660,8 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private) regmap_read(tcon->regs, SUN4I_TCON_GINT0_REG, &status); if (!(status & (SUN4I_TCON_GINT0_VBLANK_INT(0) | - SUN4I_TCON_GINT0_VBLANK_INT(1)))) + SUN4I_TCON_GINT0_VBLANK_INT(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT))) return IRQ_NONE; drm_crtc_handle_vblank(&scrtc->crtc); @@ -591,7 +670,8 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private) /* Acknowledge the interrupt */ regmap_update_bits(tcon->regs, SUN4I_TCON_GINT0_REG, SUN4I_TCON_GINT0_VBLANK_INT(0) | - SUN4I_TCON_GINT0_VBLANK_INT(1), + SUN4I_TCON_GINT0_VBLANK_INT(1) | + SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT, 0); if (engine->ops->vblank_quirk) diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index 161e094..f6a071c 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -28,13 +28,32 @@ #define SUN4I_TCON_GINT0_REG 0x4 #define SUN4I_TCON_GINT0_VBLANK_ENABLE(pipe) BIT(31 - (pipe)) +#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_ENABLE BIT(27) +#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_ENABLE BIT(26) #define SUN4I_TCON_GINT0_VBLANK_INT(pipe) BIT(15 - (pipe)) +#define SUN4I_TCON_GINT0_TCON0_TRI_FINISH_INT BIT(11) +#define SUN4I_TCON_GINT0_TCON0_TRI_COUNTER_INT BIT(10) #define SUN4I_TCON_GINT1_REG 0x8 + #define SUN4I_TCON_FRM_CTL_REG 0x10 +#define SUN4I_TCON_FRM_CTL_EN BIT(31) + +#define SUN4I_TCON_FRM_SEED_PR_REG 0x14 +#define SUN4I_TCON_FRM_SEED_PG_REG 0x18 +#define SUN4I_TCON_FRM_SEED_PB_REG 0x1c +#define SUN4I_TCON_FRM_SEED_LR_REG 0x20 +#define SUN4I_TCON_FRM_SEED_LG_REG 0x24 +#define SUN4I_TCON_FRM_SEED_LB_REG 0x28 +#define SUN4I_TCON_FRM_TBL0_REG 0x2c +#define SUN4I_TCON_FRM_TBL1_REG 0x30 +#define SUN4I_TCON_FRM_TBL2_REG 0x34 +#define SUN4I_TCON_FRM_TBL3_REG 0x38 #define SUN4I_TCON0_CTL_REG 0x40 #define SUN4I_TCON0_CTL_TCON_ENABLE BIT(31) +#define SUN4I_TCON0_CTL_IF_MASK GENMASK(25, 24) +#define SUN4I_TCON0_CTL_IF_8080 (1 << 24) #define SUN4I_TCON0_CTL_CLK_DELAY_MASK GENMASK(8, 4) #define SUN4I_TCON0_CTL_CLK_DELAY(delay) ((delay << 4) & SUN4I_TCON0_CTL_CLK_DELAY_MASK) #define SUN4I_TCON0_CTL_SRC_SEL_MASK GENMASK(2, 0) @@ -61,7 +80,14 @@ #define SUN4I_TCON0_BASIC3_V_SYNC(height) (((height) - 1) & 0x7ff) #define SUN4I_TCON0_HV_IF_REG 0x58 + #define SUN4I_TCON0_CPU_IF_REG 0x60 +#define SUN4I_TCON0_CPU_IF_MODE_MASK GENMASK(31, 28) +#define SUN4I_TCON0_CPU_IF_MODE_DSI (1 << 28) +#define SUN4I_TCON0_CPU_IF_TRI_FIFO_FLUSH BIT(16) +#define SUN4I_TCON0_CPU_IF_TRI_FIFO_EN BIT(2) +#define SUN4I_TCON0_CPU_IF_TRI_EN BIT(0) + #define SUN4I_TCON0_CPU_WR_REG 0x64 #define SUN4I_TCON0_CPU_RD0_REG 0x68 #define SUN4I_TCON0_CPU_RDA_REG 0x6c @@ -128,6 +154,10 @@ #define SUN4I_TCON1_IO_POL_REG 0xf0 #define SUN4I_TCON1_IO_TRI_REG 0xf4 + +#define SUN4I_TCON_ECC_FIFO_REG 0xf8 +#define SUN4I_TCON_ECC_FIFO_EN BIT(3) + #define SUN4I_TCON_CEU_CTL_REG 0x100 #define SUN4I_TCON_CEU_MUL_RR_REG 0x110 #define SUN4I_TCON_CEU_MUL_RG_REG 0x114 @@ -144,6 +174,22 @@ #define SUN4I_TCON_CEU_RANGE_R_REG 0x140 #define SUN4I_TCON_CEU_RANGE_G_REG 0x144 #define SUN4I_TCON_CEU_RANGE_B_REG 0x148 + +#define SUN4I_TCON0_CPU_TRI0_REG 0x160 +#define SUN4I_TCON0_CPU_TRI0_BLOCK_SPACE(space) ((((space) - 1) & 0xfff) << 16) +#define SUN4I_TCON0_CPU_TRI0_BLOCK_SIZE(size) (((size) - 1) & 0xfff) + +#define SUN4I_TCON0_CPU_TRI1_REG 0x164 +#define SUN4I_TCON0_CPU_TRI1_BLOCK_NUM(num) (((num) - 1) & 0xffff) + +#define SUN4I_TCON0_CPU_TRI2_REG 0x168 +#define SUN4I_TCON0_CPU_TRI2_START_DELAY(delay) (((delay) & 0xffff) << 16) +#define SUN4I_TCON0_CPU_TRI2_TRANS_START_SET(set) ((set) & 0xfff) + +#define SUN4I_TCON_SAFE_PERIOD_REG 0x1f0 +#define SUN4I_TCON_SAFE_PERIOD_NUM(num) (((num) & 0xfff) << 16) +#define SUN4I_TCON_SAFE_PERIOD_MODE(mode) ((mode) & 0x3) + #define SUN4I_TCON_MUX_CTRL_REG 0x200 #define SUN4I_TCON0_LVDS_ANA0_REG 0x220 diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c new file mode 100644 index 0000000..e4d1943 --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dphy.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@free-electrons.com> + */ + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/of_address.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include "sun6i_mipi_dsi.h" + +#define SUN6I_DPHY_GCTL_REG 0x00 +#define SUN6I_DPHY_GCTL_LANE_NUM(n) ((((n) - 1) & 3) << 4) +#define SUN6I_DPHY_GCTL_EN BIT(0) + +#define SUN6I_DPHY_TX_CTL_REG 0x04 +#define SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT BIT(28) + +#define SUN6I_DPHY_TX_TIME0_REG 0x10 +#define SUN6I_DPHY_TX_TIME0_HS_TRAIL(n) (((n) & 0xff) << 24) +#define SUN6I_DPHY_TX_TIME0_HS_PREPARE(n) (((n) & 0xff) << 16) +#define SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME1_REG 0x14 +#define SUN6I_DPHY_TX_TIME1_CLK_POST(n) (((n) & 0xff) << 24) +#define SUN6I_DPHY_TX_TIME1_CLK_PRE(n) (((n) & 0xff) << 16) +#define SUN6I_DPHY_TX_TIME1_CLK_ZERO(n) (((n) & 0xff) << 8) +#define SUN6I_DPHY_TX_TIME1_CLK_PREPARE(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME2_REG 0x18 +#define SUN6I_DPHY_TX_TIME2_CLK_TRAIL(n) ((n) & 0xff) + +#define SUN6I_DPHY_TX_TIME3_REG 0x1c + +#define SUN6I_DPHY_TX_TIME4_REG 0x20 +#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(n) (((n) & 0xff) << 8) +#define SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(n) ((n) & 0xff) + +#define SUN6I_DPHY_ANA0_REG 0x4c +#define SUN6I_DPHY_ANA0_REG_PWS BIT(31) +#define SUN6I_DPHY_ANA0_REG_DMPC BIT(28) +#define SUN6I_DPHY_ANA0_REG_DMPD(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA0_REG_SLV(n) (((n) & 7) << 12) +#define SUN6I_DPHY_ANA0_REG_DEN(n) (((n) & 0xf) << 8) + +#define SUN6I_DPHY_ANA1_REG 0x50 +#define SUN6I_DPHY_ANA1_REG_VTTMODE BIT(31) +#define SUN6I_DPHY_ANA1_REG_CSMPS(n) (((n) & 3) << 28) +#define SUN6I_DPHY_ANA1_REG_SVTT(n) (((n) & 0xf) << 24) + +#define SUN6I_DPHY_ANA2_REG 0x54 +#define SUN6I_DPHY_ANA2_EN_P2S_CPU(n) (((n) & 0xf) << 24) +#define SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK GENMASK(27, 24) +#define SUN6I_DPHY_ANA2_EN_CK_CPU BIT(4) +#define SUN6I_DPHY_ANA2_REG_ENIB BIT(1) + +#define SUN6I_DPHY_ANA3_REG 0x58 +#define SUN6I_DPHY_ANA3_EN_VTTD(n) (((n) & 0xf) << 28) +#define SUN6I_DPHY_ANA3_EN_VTTD_MASK GENMASK(31, 28) +#define SUN6I_DPHY_ANA3_EN_VTTC BIT(27) +#define SUN6I_DPHY_ANA3_EN_DIV BIT(26) +#define SUN6I_DPHY_ANA3_EN_LDOC BIT(25) +#define SUN6I_DPHY_ANA3_EN_LDOD BIT(24) +#define SUN6I_DPHY_ANA3_EN_LDOR BIT(18) + +#define SUN6I_DPHY_ANA4_REG 0x5c +#define SUN6I_DPHY_ANA4_REG_DMPLVC BIT(24) +#define SUN6I_DPHY_ANA4_REG_DMPLVD(n) (((n) & 0xf) << 20) +#define SUN6I_DPHY_ANA4_REG_CKDV(n) (((n) & 0x1f) << 12) +#define SUN6I_DPHY_ANA4_REG_TMSC(n) (((n) & 3) << 10) +#define SUN6I_DPHY_ANA4_REG_TMSD(n) (((n) & 3) << 8) +#define SUN6I_DPHY_ANA4_REG_TXDNSC(n) (((n) & 3) << 6) +#define SUN6I_DPHY_ANA4_REG_TXDNSD(n) (((n) & 3) << 4) +#define SUN6I_DPHY_ANA4_REG_TXPUSC(n) (((n) & 3) << 2) +#define SUN6I_DPHY_ANA4_REG_TXPUSD(n) ((n) & 3) + +#define SUN6I_DPHY_DBG5_REG 0xf4 + +int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes) +{ + reset_control_deassert(dphy->reset); + clk_prepare_enable(dphy->mod_clk); + clk_set_rate_exclusive(dphy->mod_clk, 150000000); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG, + SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG, + SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) | + SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) | + SUN6I_DPHY_TX_TIME0_HS_TRAIL(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG, + SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) | + SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) | + SUN6I_DPHY_TX_TIME1_CLK_PRE(3) | + SUN6I_DPHY_TX_TIME1_CLK_POST(10)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG, + SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30)); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG, + SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) | + SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3)); + + regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG, + SUN6I_DPHY_GCTL_LANE_NUM(lanes) | + SUN6I_DPHY_GCTL_EN); + + return 0; +} + +int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes) +{ + u8 lanes_mask = GENMASK(lanes - 1, 0); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG, + SUN6I_DPHY_ANA0_REG_PWS | + SUN6I_DPHY_ANA0_REG_DMPC | + SUN6I_DPHY_ANA0_REG_SLV(7) | + SUN6I_DPHY_ANA0_REG_DMPD(lanes_mask) | + SUN6I_DPHY_ANA0_REG_DEN(lanes_mask)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_CSMPS(1) | + SUN6I_DPHY_ANA1_REG_SVTT(7)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG, + SUN6I_DPHY_ANA4_REG_CKDV(1) | + SUN6I_DPHY_ANA4_REG_TMSC(1) | + SUN6I_DPHY_ANA4_REG_TMSD(1) | + SUN6I_DPHY_ANA4_REG_TXDNSC(1) | + SUN6I_DPHY_ANA4_REG_TXDNSD(1) | + SUN6I_DPHY_ANA4_REG_TXPUSC(1) | + SUN6I_DPHY_ANA4_REG_TXPUSD(1) | + SUN6I_DPHY_ANA4_REG_DMPLVC | + SUN6I_DPHY_ANA4_REG_DMPLVD(lanes_mask)); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_REG_ENIB); + udelay(5); + + regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_LDOR | + SUN6I_DPHY_ANA3_EN_LDOC | + SUN6I_DPHY_ANA3_EN_LDOD); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_VTTC | + SUN6I_DPHY_ANA3_EN_VTTD_MASK, + SUN6I_DPHY_ANA3_EN_VTTC | + SUN6I_DPHY_ANA3_EN_VTTD(lanes_mask)); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG, + SUN6I_DPHY_ANA3_EN_DIV, + SUN6I_DPHY_ANA3_EN_DIV); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_CK_CPU, + SUN6I_DPHY_ANA2_EN_CK_CPU); + udelay(1); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_VTTMODE, + SUN6I_DPHY_ANA1_REG_VTTMODE); + + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG, + SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK, + SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask)); + + return 0; +} + +int sun6i_dphy_power_off(struct sun6i_dphy *dphy) +{ + regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA1_REG, + SUN6I_DPHY_ANA1_REG_VTTMODE, 0); + + return 0; +} + +int sun6i_dphy_exit(struct sun6i_dphy *dphy) +{ + clk_rate_exclusive_put(dphy->mod_clk); + clk_disable_unprepare(dphy->mod_clk); + reset_control_assert(dphy->reset); + + return 0; +} + +static struct regmap_config sun6i_dphy_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = SUN6I_DPHY_DBG5_REG, + .name = "mipi-dphy", +}; + +static const struct of_device_id sun6i_dphy_of_table[] = { + { .compatible = "allwinner,sun6i-a31-mipi-dphy" }, + { } +}; + +int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node) +{ + struct sun6i_dphy *dphy; + struct resource res; + void __iomem *regs; + int ret; + + if (!of_match_node(sun6i_dphy_of_table, node)) { + dev_err(dsi->dev, "Incompatible D-PHY\n"); + return -EINVAL; + } + + dphy = devm_kzalloc(dsi->dev, sizeof(*dphy), GFP_KERNEL); + if (!dphy) + return -ENOMEM; + + ret = of_address_to_resource(node, 0, &res); + if (ret) { + dev_err(dsi->dev, "phy: Couldn't get our resources\n"); + return ret; + } + + regs = devm_ioremap_resource(dsi->dev, &res); + if (IS_ERR(regs)) { + dev_err(dsi->dev, "Couldn't map the DPHY encoder registers\n"); + return PTR_ERR(regs); + } + + dphy->regs = devm_regmap_init_mmio(dsi->dev, regs, + &sun6i_dphy_regmap_config); + if (IS_ERR(dphy->regs)) { + dev_err(dsi->dev, "Couldn't create the DPHY encoder regmap\n"); + return PTR_ERR(dphy->regs); + } + + dphy->reset = of_reset_control_get_shared(node, NULL); + if (IS_ERR(dphy->reset)) { + dev_err(dsi->dev, "Couldn't get our reset line\n"); + return PTR_ERR(dphy->reset); + } + + dphy->bus_clk = of_clk_get_by_name(node, "bus"); + if (IS_ERR(dphy->bus_clk)) { + dev_err(dsi->dev, "Couldn't get the DPHY bus clock\n"); + ret = PTR_ERR(dphy->bus_clk); + goto err_free_reset; + } + regmap_mmio_attach_clk(dphy->regs, dphy->bus_clk); + + dphy->mod_clk = of_clk_get_by_name(node, "mod"); + if (IS_ERR(dphy->mod_clk)) { + dev_err(dsi->dev, "Couldn't get the DPHY mod clock\n"); + ret = PTR_ERR(dphy->mod_clk); + goto err_free_bus; + } + + dsi->dphy = dphy; + + return 0; + +err_free_bus: + regmap_mmio_detach_clk(dphy->regs); + clk_put(dphy->bus_clk); +err_free_reset: + reset_control_put(dphy->reset); + return ret; +} + +int sun6i_dphy_remove(struct sun6i_dsi *dsi) +{ + struct sun6i_dphy *dphy = dsi->dphy; + + regmap_mmio_detach_clk(dphy->regs); + clk_put(dphy->mod_clk); + clk_put(dphy->bus_clk); + reset_control_put(dphy->reset); + + return 0; +} diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c new file mode 100644 index 0000000..bfbf761 --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c @@ -0,0 +1,1107 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@bootlin.com> + */ + +#include <linux/clk.h> +#include <linux/component.h> +#include <linux/crc-ccitt.h> +#include <linux/of_address.h> +#include <linux/pm_runtime.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include <linux/phy/phy.h> + +#include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_mipi_dsi.h> +#include <drm/drm_panel.h> + +#include "sun4i_drv.h" +#include "sun6i_mipi_dsi.h" + +#include <video/mipi_display.h> + +#define SUN6I_DSI_CTL_REG 0x000 +#define SUN6I_DSI_CTL_EN BIT(0) + +#define SUN6I_DSI_BASIC_CTL_REG 0x00c +#define SUN6I_DSI_BASIC_CTL_HBP_DIS BIT(2) +#define SUN6I_DSI_BASIC_CTL_HSA_HSE_DIS BIT(1) +#define SUN6I_DSI_BASIC_CTL_VIDEO_BURST BIT(0) + +#define SUN6I_DSI_BASIC_CTL0_REG 0x010 +#define SUN6I_DSI_BASIC_CTL0_HS_EOTP_EN BIT(18) +#define SUN6I_DSI_BASIC_CTL0_CRC_EN BIT(17) +#define SUN6I_DSI_BASIC_CTL0_ECC_EN BIT(16) +#define SUN6I_DSI_BASIC_CTL0_INST_ST BIT(0) + +#define SUN6I_DSI_BASIC_CTL1_REG 0x014 +#define SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(n) (((n) & 0x1fff) << 4) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_FILL BIT(2) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION BIT(1) +#define SUN6I_DSI_BASIC_CTL1_VIDEO_MODE BIT(0) + +#define SUN6I_DSI_BASIC_SIZE0_REG 0x018 +#define SUN6I_DSI_BASIC_SIZE0_VBP(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_BASIC_SIZE0_VSA(n) ((n) & 0xfff) + +#define SUN6I_DSI_BASIC_SIZE1_REG 0x01c +#define SUN6I_DSI_BASIC_SIZE1_VT(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_BASIC_SIZE1_VACT(n) ((n) & 0xfff) + +#define SUN6I_DSI_INST_FUNC_REG(n) (0x020 + (n) * 0x04) +#define SUN6I_DSI_INST_FUNC_INST_MODE(n) (((n) & 0xf) << 28) +#define SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(n) (((n) & 0xf) << 24) +#define SUN6I_DSI_INST_FUNC_TRANS_PACKET(n) (((n) & 0xf) << 20) +#define SUN6I_DSI_INST_FUNC_LANE_CEN BIT(4) +#define SUN6I_DSI_INST_FUNC_LANE_DEN(n) ((n) & 0xf) + +#define SUN6I_DSI_INST_LOOP_SEL_REG 0x040 + +#define SUN6I_DSI_INST_LOOP_NUM_REG(n) (0x044 + (n) * 0x10) +#define SUN6I_DSI_INST_LOOP_NUM_N1(n) (((n) & 0xfff) << 16) +#define SUN6I_DSI_INST_LOOP_NUM_N0(n) ((n) & 0xfff) + +#define SUN6I_DSI_INST_JUMP_SEL_REG 0x048 + +#define SUN6I_DSI_INST_JUMP_CFG_REG(n) (0x04c + (n) * 0x04) +#define SUN6I_DSI_INST_JUMP_CFG_TO(n) (((n) & 0xf) << 20) +#define SUN6I_DSI_INST_JUMP_CFG_POINT(n) (((n) & 0xf) << 16) +#define SUN6I_DSI_INST_JUMP_CFG_NUM(n) ((n) & 0xffff) + +#define SUN6I_DSI_TRANS_START_REG 0x060 + +#define SUN6I_DSI_TRANS_ZERO_REG 0x078 + +#define SUN6I_DSI_TCON_DRQ_REG 0x07c +#define SUN6I_DSI_TCON_DRQ_ENABLE_MODE BIT(28) +#define SUN6I_DSI_TCON_DRQ_SET(n) ((n) & 0x3ff) + +#define SUN6I_DSI_PIXEL_CTL0_REG 0x080 +#define SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE BIT(16) +#define SUN6I_DSI_PIXEL_CTL0_FORMAT(n) ((n) & 0xf) + +#define SUN6I_DSI_PIXEL_CTL1_REG 0x084 + +#define SUN6I_DSI_PIXEL_PH_REG 0x090 +#define SUN6I_DSI_PIXEL_PH_ECC(n) (((n) & 0xff) << 24) +#define SUN6I_DSI_PIXEL_PH_WC(n) (((n) & 0xffff) << 8) +#define SUN6I_DSI_PIXEL_PH_VC(n) (((n) & 3) << 6) +#define SUN6I_DSI_PIXEL_PH_DT(n) ((n) & 0x3f) + +#define SUN6I_DSI_PIXEL_PF0_REG 0x098 +#define SUN6I_DSI_PIXEL_PF0_CRC_FORCE(n) ((n) & 0xffff) + +#define SUN6I_DSI_PIXEL_PF1_REG 0x09c +#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(n) ((n) & 0xffff) + +#define SUN6I_DSI_SYNC_HSS_REG 0x0b0 + +#define SUN6I_DSI_SYNC_HSE_REG 0x0b4 + +#define SUN6I_DSI_SYNC_VSS_REG 0x0b8 + +#define SUN6I_DSI_SYNC_VSE_REG 0x0bc + +#define SUN6I_DSI_BLK_HSA0_REG 0x0c0 + +#define SUN6I_DSI_BLK_HSA1_REG 0x0c4 +#define SUN6I_DSI_BLK_PF(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BLK_PD(n) ((n) & 0xff) + +#define SUN6I_DSI_BLK_HBP0_REG 0x0c8 + +#define SUN6I_DSI_BLK_HBP1_REG 0x0cc + +#define SUN6I_DSI_BLK_HFP0_REG 0x0d0 + +#define SUN6I_DSI_BLK_HFP1_REG 0x0d4 + +#define SUN6I_DSI_BLK_HBLK0_REG 0x0e0 + +#define SUN6I_DSI_BLK_HBLK1_REG 0x0e4 + +#define SUN6I_DSI_BLK_VBLK0_REG 0x0e8 + +#define SUN6I_DSI_BLK_VBLK1_REG 0x0ec + +#define SUN6I_DSI_BURST_LINE_REG 0x0f0 +#define SUN6I_DSI_BURST_LINE_SYNC_POINT(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BURST_LINE_NUM(n) ((n) & 0xffff) + +#define SUN6I_DSI_BURST_DRQ_REG 0x0f4 +#define SUN6I_DSI_BURST_DRQ_EDGE1(n) (((n) & 0xffff) << 16) +#define SUN6I_DSI_BURST_DRQ_EDGE0(n) ((n) & 0xffff) + +#define SUN6I_DSI_CMD_CTL_REG 0x200 +#define SUN6I_DSI_CMD_CTL_RX_OVERFLOW BIT(26) +#define SUN6I_DSI_CMD_CTL_RX_FLAG BIT(25) +#define SUN6I_DSI_CMD_CTL_TX_FLAG BIT(9) + +#define SUN6I_DSI_CMD_RX_REG(n) (0x240 + (n) * 0x04) + +#define SUN6I_DSI_DEBUG_DATA_REG 0x2f8 + +#define SUN6I_DSI_CMD_TX_REG(n) (0x300 + (n) * 0x04) + +enum sun6i_dsi_start_inst { + DSI_START_LPRX, + DSI_START_LPTX, + DSI_START_HSC, + DSI_START_HSD, +}; + +enum sun6i_dsi_inst_id { + DSI_INST_ID_LP11 = 0, + DSI_INST_ID_TBA, + DSI_INST_ID_HSC, + DSI_INST_ID_HSD, + DSI_INST_ID_LPDT, + DSI_INST_ID_HSCEXIT, + DSI_INST_ID_NOP, + DSI_INST_ID_DLY, + DSI_INST_ID_END = 15, +}; + +enum sun6i_dsi_inst_mode { + DSI_INST_MODE_STOP = 0, + DSI_INST_MODE_TBA, + DSI_INST_MODE_HS, + DSI_INST_MODE_ESCAPE, + DSI_INST_MODE_HSCEXIT, + DSI_INST_MODE_NOP, +}; + +enum sun6i_dsi_inst_escape { + DSI_INST_ESCA_LPDT = 0, + DSI_INST_ESCA_ULPS, + DSI_INST_ESCA_UN1, + DSI_INST_ESCA_UN2, + DSI_INST_ESCA_RESET, + DSI_INST_ESCA_UN3, + DSI_INST_ESCA_UN4, + DSI_INST_ESCA_UN5, +}; + +enum sun6i_dsi_inst_packet { + DSI_INST_PACK_PIXEL = 0, + DSI_INST_PACK_COMMAND, +}; + +static const u32 sun6i_dsi_ecc_array[] = { + [0] = (BIT(0) | BIT(1) | BIT(2) | BIT(4) | BIT(5) | BIT(7) | BIT(10) | + BIT(11) | BIT(13) | BIT(16) | BIT(20) | BIT(21) | BIT(22) | + BIT(23)), + [1] = (BIT(0) | BIT(1) | BIT(3) | BIT(4) | BIT(6) | BIT(8) | BIT(10) | + BIT(12) | BIT(14) | BIT(17) | BIT(20) | BIT(21) | BIT(22) | + BIT(23)), + [2] = (BIT(0) | BIT(2) | BIT(3) | BIT(5) | BIT(6) | BIT(9) | BIT(11) | + BIT(12) | BIT(15) | BIT(18) | BIT(20) | BIT(21) | BIT(22)), + [3] = (BIT(1) | BIT(2) | BIT(3) | BIT(7) | BIT(8) | BIT(9) | BIT(13) | + BIT(14) | BIT(15) | BIT(19) | BIT(20) | BIT(21) | BIT(23)), + [4] = (BIT(4) | BIT(5) | BIT(6) | BIT(7) | BIT(8) | BIT(9) | BIT(16) | + BIT(17) | BIT(18) | BIT(19) | BIT(20) | BIT(22) | BIT(23)), + [5] = (BIT(10) | BIT(11) | BIT(12) | BIT(13) | BIT(14) | BIT(15) | + BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(21) | BIT(22) | + BIT(23)), +}; + +static u32 sun6i_dsi_ecc_compute(unsigned int data) +{ + int i; + u8 ecc = 0; + + for (i = 0; i < ARRAY_SIZE(sun6i_dsi_ecc_array); i++) { + u32 field = sun6i_dsi_ecc_array[i]; + bool init = false; + u8 val = 0; + int j; + + for (j = 0; j < 24; j++) { + if (!(BIT(j) & field)) + continue; + + if (!init) { + val = (BIT(j) & data) ? 1 : 0; + init = true; + } else { + val ^= (BIT(j) & data) ? 1 : 0; + } + } + + ecc |= val << i; + } + + return ecc; +} + +static u16 sun6i_dsi_crc_compute(u8 const *buffer, size_t len) +{ + return crc_ccitt(0xffff, buffer, len); +} + +static u16 sun6i_dsi_crc_repeat_compute(u8 pd, size_t len) +{ + u8 buffer[len]; + + memset(buffer, pd, len); + + return sun6i_dsi_crc_compute(buffer, len); +} + +static u32 sun6i_dsi_build_sync_pkt(u8 dt, u8 vc, u8 d0, u8 d1) +{ + u32 val = dt & 0x3f; + + val |= (vc & 3) << 6; + val |= (d0 & 0xff) << 8; + val |= (d1 & 0xff) << 16; + val |= sun6i_dsi_ecc_compute(val) << 24; + + return val; +} + +static u32 sun6i_dsi_build_blk0_pkt(u8 vc, u16 wc) +{ + return sun6i_dsi_build_sync_pkt(MIPI_DSI_BLANKING_PACKET, vc, + wc & 0xff, wc >> 8); +} + +static u32 sun6i_dsi_build_blk1_pkt(u16 pd, size_t len) +{ + u32 val = SUN6I_DSI_BLK_PD(pd); + + return val | SUN6I_DSI_BLK_PF(sun6i_dsi_crc_repeat_compute(pd, len)); +} + +static void sun6i_dsi_inst_abort(struct sun6i_dsi *dsi) +{ + regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_INST_ST, 0); +} + +static void sun6i_dsi_inst_commit(struct sun6i_dsi *dsi) +{ + regmap_update_bits(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_INST_ST, + SUN6I_DSI_BASIC_CTL0_INST_ST); +} + +static int sun6i_dsi_inst_wait_for_completion(struct sun6i_dsi *dsi) +{ + u32 val; + + return regmap_read_poll_timeout(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + val, + !(val & SUN6I_DSI_BASIC_CTL0_INST_ST), + 100, 5000); +} + +static void sun6i_dsi_inst_setup(struct sun6i_dsi *dsi, + enum sun6i_dsi_inst_id id, + enum sun6i_dsi_inst_mode mode, + bool clock, u8 data, + enum sun6i_dsi_inst_packet packet, + enum sun6i_dsi_inst_escape escape) +{ + regmap_write(dsi->regs, SUN6I_DSI_INST_FUNC_REG(id), + SUN6I_DSI_INST_FUNC_INST_MODE(mode) | + SUN6I_DSI_INST_FUNC_ESCAPE_ENTRY(escape) | + SUN6I_DSI_INST_FUNC_TRANS_PACKET(packet) | + (clock ? SUN6I_DSI_INST_FUNC_LANE_CEN : 0) | + SUN6I_DSI_INST_FUNC_LANE_DEN(data)); +} + +static void sun6i_dsi_inst_init(struct sun6i_dsi *dsi, + struct mipi_dsi_device *device) +{ + u8 lanes_mask = GENMASK(device->lanes - 1, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LP11, DSI_INST_MODE_STOP, + true, lanes_mask, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_TBA, DSI_INST_MODE_TBA, + false, 1, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSC, DSI_INST_MODE_HS, + true, 0, DSI_INST_PACK_PIXEL, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSD, DSI_INST_MODE_HS, + false, lanes_mask, DSI_INST_PACK_PIXEL, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_LPDT, DSI_INST_MODE_ESCAPE, + false, 1, DSI_INST_PACK_COMMAND, + DSI_INST_ESCA_LPDT); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_HSCEXIT, DSI_INST_MODE_HSCEXIT, + true, 0, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_NOP, DSI_INST_MODE_STOP, + false, lanes_mask, 0, 0); + + sun6i_dsi_inst_setup(dsi, DSI_INST_ID_DLY, DSI_INST_MODE_NOP, + true, lanes_mask, 0, 0); + + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_CFG_REG(0), + SUN6I_DSI_INST_JUMP_CFG_POINT(DSI_INST_ID_NOP) | + SUN6I_DSI_INST_JUMP_CFG_TO(DSI_INST_ID_HSCEXIT) | + SUN6I_DSI_INST_JUMP_CFG_NUM(1)); +}; + +static u16 sun6i_dsi_get_video_start_delay(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + return mode->vtotal - (mode->vsync_end - mode->vdisplay) + 1; +} + +static void sun6i_dsi_setup_burst(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + u32 val = 0; + + if ((mode->hsync_end - mode->hdisplay) > 20) { + /* Maaaaaagic */ + u16 drq = (mode->hsync_end - mode->hdisplay) - 20; + + drq *= mipi_dsi_pixel_format_to_bpp(device->format); + drq /= 32; + + val = (SUN6I_DSI_TCON_DRQ_ENABLE_MODE | + SUN6I_DSI_TCON_DRQ_SET(drq)); + } + + regmap_write(dsi->regs, SUN6I_DSI_TCON_DRQ_REG, val); +} + +static void sun6i_dsi_setup_inst_loop(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + u16 delay = 50 - 1; + + regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(0), + SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) | + SUN6I_DSI_INST_LOOP_NUM_N1(delay)); + regmap_write(dsi->regs, SUN6I_DSI_INST_LOOP_NUM_REG(1), + SUN6I_DSI_INST_LOOP_NUM_N0(50 - 1) | + SUN6I_DSI_INST_LOOP_NUM_N1(delay)); +} + +static void sun6i_dsi_setup_format(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + u32 val = SUN6I_DSI_PIXEL_PH_VC(device->channel); + u8 dt, fmt; + u16 wc; + + /* + * TODO: The format defines are only valid in video mode and + * change in command mode. + */ + switch (device->format) { + case MIPI_DSI_FMT_RGB888: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_24; + fmt = 8; + break; + case MIPI_DSI_FMT_RGB666: + dt = MIPI_DSI_PIXEL_STREAM_3BYTE_18; + fmt = 9; + break; + case MIPI_DSI_FMT_RGB666_PACKED: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_18; + fmt = 10; + break; + case MIPI_DSI_FMT_RGB565: + dt = MIPI_DSI_PACKED_PIXEL_STREAM_16; + fmt = 11; + break; + default: + return; + } + val |= SUN6I_DSI_PIXEL_PH_DT(dt); + + wc = mode->hdisplay * mipi_dsi_pixel_format_to_bpp(device->format) / 8; + val |= SUN6I_DSI_PIXEL_PH_WC(wc); + val |= SUN6I_DSI_PIXEL_PH_ECC(sun6i_dsi_ecc_compute(val)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PH_REG, val); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF0_REG, + SUN6I_DSI_PIXEL_PF0_CRC_FORCE(0xffff)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_PF1_REG, + SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINE0(0xffff) | + SUN6I_DSI_PIXEL_PF1_CRC_INIT_LINEN(0xffff)); + + regmap_write(dsi->regs, SUN6I_DSI_PIXEL_CTL0_REG, + SUN6I_DSI_PIXEL_CTL0_PD_PLUG_DISABLE | + SUN6I_DSI_PIXEL_CTL0_FORMAT(fmt)); +} + +static void sun6i_dsi_setup_timings(struct sun6i_dsi *dsi, + struct drm_display_mode *mode) +{ + struct mipi_dsi_device *device = dsi->device; + unsigned int Bpp = mipi_dsi_pixel_format_to_bpp(device->format) / 8; + u16 hbp, hfp, hsa, hblk, vblk; + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL_REG, 0); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSS_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_START, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_HSE_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_H_SYNC_END, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSS_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_START, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_SYNC_VSE_REG, + sun6i_dsi_build_sync_pkt(MIPI_DSI_V_SYNC_END, + device->channel, + 0, 0)); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE0_REG, + SUN6I_DSI_BASIC_SIZE0_VSA(mode->vsync_end - + mode->vsync_start) | + SUN6I_DSI_BASIC_SIZE0_VBP(mode->vsync_start - + mode->vdisplay)); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_SIZE1_REG, + SUN6I_DSI_BASIC_SIZE1_VACT(mode->vdisplay) | + SUN6I_DSI_BASIC_SIZE1_VT(mode->vtotal)); + + /* + * A sync period is composed of a blanking packet (4 bytes + + * payload + 2 bytes) and a sync event packet (4 bytes). Its + * minimal size is therefore 10 bytes + */ +#define HSA_PACKET_OVERHEAD 10 + hsa = max((unsigned int)HSA_PACKET_OVERHEAD, + (mode->hsync_end - mode->hsync_start) * Bpp - HSA_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hsa)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HSA1_REG, + sun6i_dsi_build_blk1_pkt(0, hsa)); + + /* + * The backporch is set using a blanking packet (4 bytes + + * payload + 2 bytes). Its minimal size is therefore 6 bytes + */ +#define HBP_PACKET_OVERHEAD 6 + hbp = max((unsigned int)HBP_PACKET_OVERHEAD, + (mode->hsync_start - mode->hdisplay) * Bpp - HBP_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hbp)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBP1_REG, + sun6i_dsi_build_blk1_pkt(0, hbp)); + + /* + * The frontporch is set using a blanking packet (4 bytes + + * payload + 2 bytes). Its minimal size is therefore 6 bytes + */ +#define HFP_PACKET_OVERHEAD 6 + hfp = max((unsigned int)HFP_PACKET_OVERHEAD, + (mode->htotal - mode->hsync_end) * Bpp - HFP_PACKET_OVERHEAD); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hfp)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HFP1_REG, + sun6i_dsi_build_blk1_pkt(0, hfp)); + + /* + * hblk seems to be the line + porches length. + */ + hblk = mode->htotal * Bpp - hsa; + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, hblk)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_HBLK1_REG, + sun6i_dsi_build_blk1_pkt(0, hblk)); + + /* + * And I'm not entirely sure what vblk is about. The driver in + * Allwinner BSP is using a rather convoluted calculation + * there only for 4 lanes. However, using 0 (the !4 lanes + * case) even with a 4 lanes screen seems to work... + */ + vblk = 0; + regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK0_REG, + sun6i_dsi_build_blk0_pkt(device->channel, vblk)); + regmap_write(dsi->regs, SUN6I_DSI_BLK_VBLK1_REG, + sun6i_dsi_build_blk1_pkt(0, vblk)); +} + +static int sun6i_dsi_start(struct sun6i_dsi *dsi, + enum sun6i_dsi_start_inst func) +{ + switch (func) { + case DSI_START_LPTX: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_END << (4 * DSI_INST_ID_LPDT)); + break; + case DSI_START_LPRX: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_LPDT << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_DLY << (4 * DSI_INST_ID_LPDT) | + DSI_INST_ID_TBA << (4 * DSI_INST_ID_DLY) | + DSI_INST_ID_END << (4 * DSI_INST_ID_TBA)); + break; + case DSI_START_HSC: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_HSC << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_END << (4 * DSI_INST_ID_HSC)); + break; + case DSI_START_HSD: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_NOP << (4 * DSI_INST_ID_LP11) | + DSI_INST_ID_HSD << (4 * DSI_INST_ID_NOP) | + DSI_INST_ID_DLY << (4 * DSI_INST_ID_HSD) | + DSI_INST_ID_NOP << (4 * DSI_INST_ID_DLY) | + DSI_INST_ID_END << (4 * DSI_INST_ID_HSCEXIT)); + break; + default: + regmap_write(dsi->regs, SUN6I_DSI_INST_JUMP_SEL_REG, + DSI_INST_ID_END << (4 * DSI_INST_ID_LP11)); + break; + } + + sun6i_dsi_inst_abort(dsi); + sun6i_dsi_inst_commit(dsi); + + if (func == DSI_START_HSC) + regmap_write_bits(dsi->regs, + SUN6I_DSI_INST_FUNC_REG(DSI_INST_ID_LP11), + SUN6I_DSI_INST_FUNC_LANE_CEN, 0); + + return 0; +} + +static void sun6i_dsi_encoder_enable(struct drm_encoder *encoder) +{ + struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode; + struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder); + struct mipi_dsi_device *device = dsi->device; + u16 delay; + + DRM_DEBUG_DRIVER("Enabling DSI output\n"); + + pm_runtime_get_sync(dsi->dev); + + delay = sun6i_dsi_get_video_start_delay(dsi, mode); + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL1_REG, + SUN6I_DSI_BASIC_CTL1_VIDEO_ST_DELAY(delay) | + SUN6I_DSI_BASIC_CTL1_VIDEO_FILL | + SUN6I_DSI_BASIC_CTL1_VIDEO_PRECISION | + SUN6I_DSI_BASIC_CTL1_VIDEO_MODE); + + sun6i_dsi_setup_burst(dsi, mode); + sun6i_dsi_setup_inst_loop(dsi, mode); + sun6i_dsi_setup_format(dsi, mode); + sun6i_dsi_setup_timings(dsi, mode); + + sun6i_dphy_init(dsi->dphy, device->lanes); + sun6i_dphy_power_on(dsi->dphy, device->lanes); + + if (!IS_ERR(dsi->panel)) + drm_panel_prepare(dsi->panel); + + /* + * FIXME: This should be moved after the switch to HS mode. + * + * Unfortunately, once in HS mode, it seems like we're not + * able to send DCS commands anymore, which would prevent any + * panel to send any DCS command as part as their enable + * method, which is quite common. + * + * I haven't seen any artifact due to that sub-optimal + * ordering on the panels I've tested it with, so I guess this + * will do for now, until that IP is better understood. + */ + if (!IS_ERR(dsi->panel)) + drm_panel_enable(dsi->panel); + + sun6i_dsi_start(dsi, DSI_START_HSC); + + udelay(1000); + + sun6i_dsi_start(dsi, DSI_START_HSD); +} + +static void sun6i_dsi_encoder_disable(struct drm_encoder *encoder) +{ + struct sun6i_dsi *dsi = encoder_to_sun6i_dsi(encoder); + + DRM_DEBUG_DRIVER("Disabling DSI output\n"); + + if (!IS_ERR(dsi->panel)) { + drm_panel_disable(dsi->panel); + drm_panel_unprepare(dsi->panel); + } + + sun6i_dphy_power_off(dsi->dphy); + sun6i_dphy_exit(dsi->dphy); + + pm_runtime_put(dsi->dev); +} + +static int sun6i_dsi_get_modes(struct drm_connector *connector) +{ + struct sun6i_dsi *dsi = connector_to_sun6i_dsi(connector); + + return drm_panel_get_modes(dsi->panel); +} + +static struct drm_connector_helper_funcs sun6i_dsi_connector_helper_funcs = { + .get_modes = sun6i_dsi_get_modes, +}; + +static enum drm_connector_status +sun6i_dsi_connector_detect(struct drm_connector *connector, bool force) +{ + return connector_status_connected; +} + +static const struct drm_connector_funcs sun6i_dsi_connector_funcs = { + .detect = sun6i_dsi_connector_detect, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +static const struct drm_encoder_helper_funcs sun6i_dsi_enc_helper_funcs = { + .disable = sun6i_dsi_encoder_disable, + .enable = sun6i_dsi_encoder_enable, +}; + +static const struct drm_encoder_funcs sun6i_dsi_enc_funcs = { + .destroy = drm_encoder_cleanup, +}; + +static u32 sun6i_dsi_dcs_build_pkt_hdr(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + u32 pkt = msg->type; + + if (msg->type == MIPI_DSI_DCS_LONG_WRITE) { + pkt |= ((msg->tx_len + 1) & 0xffff) << 8; + pkt |= (((msg->tx_len + 1) >> 8) & 0xffff) << 16; + } else { + pkt |= (((u8 *)msg->tx_buf)[0] << 8); + if (msg->tx_len > 1) + pkt |= (((u8 *)msg->tx_buf)[1] << 16); + } + + pkt |= sun6i_dsi_ecc_compute(pkt) << 24; + + return pkt; +} + +static int sun6i_dsi_dcs_write_short(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + regmap_write_bits(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + 0xff, (4 - 1)); + + sun6i_dsi_start(dsi, DSI_START_LPTX); + + return msg->tx_len; +} + +static int sun6i_dsi_dcs_write_long(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + int ret, len = 0; + u8 *bounce; + u16 crc; + + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + + bounce = kzalloc(msg->tx_len + sizeof(crc), GFP_KERNEL); + if (!bounce) + return -ENOMEM; + + memcpy(bounce, msg->tx_buf, msg->tx_len); + len += msg->tx_len; + + crc = sun6i_dsi_crc_compute(bounce, msg->tx_len); + memcpy((u8 *)bounce + msg->tx_len, &crc, sizeof(crc)); + len += sizeof(crc); + + regmap_bulk_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(1), bounce, len); + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, len + 4 - 1); + kfree(bounce); + + sun6i_dsi_start(dsi, DSI_START_LPTX); + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) { + sun6i_dsi_inst_abort(dsi); + return ret; + } + + /* + * TODO: There's some bits (reg 0x200, bits 8/9) that + * apparently can be used to check whether the data have been + * sent, but I couldn't get it to work reliably. + */ + return msg->tx_len; +} + +static int sun6i_dsi_dcs_read(struct sun6i_dsi *dsi, + const struct mipi_dsi_msg *msg) +{ + u32 val; + int ret; + u8 byte0; + + regmap_write(dsi->regs, SUN6I_DSI_CMD_TX_REG(0), + sun6i_dsi_dcs_build_pkt_hdr(dsi, msg)); + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + (4 - 1)); + + sun6i_dsi_start(dsi, DSI_START_LPRX); + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) { + sun6i_dsi_inst_abort(dsi); + return ret; + } + + /* + * TODO: There's some bits (reg 0x200, bits 24/25) that + * apparently can be used to check whether the data have been + * received, but I couldn't get it to work reliably. + */ + regmap_read(dsi->regs, SUN6I_DSI_CMD_CTL_REG, &val); + if (val & SUN6I_DSI_CMD_CTL_RX_OVERFLOW) + return -EIO; + + regmap_read(dsi->regs, SUN6I_DSI_CMD_RX_REG(0), &val); + byte0 = val & 0xff; + if (byte0 == MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT) + return -EIO; + + ((u8 *)msg->rx_buf)[0] = (val >> 8); + + return 1; +} + +static int sun6i_dsi_attach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + + dsi->device = device; + dsi->panel = of_drm_find_panel(device->dev.of_node); + if (!dsi->panel) + return -EINVAL; + + dev_info(host->dev, "Attached device %s\n", device->name); + + return 0; +} + +static int sun6i_dsi_detach(struct mipi_dsi_host *host, + struct mipi_dsi_device *device) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + + dsi->panel = NULL; + dsi->device = NULL; + + return 0; +} + +static ssize_t sun6i_dsi_transfer(struct mipi_dsi_host *host, + const struct mipi_dsi_msg *msg) +{ + struct sun6i_dsi *dsi = host_to_sun6i_dsi(host); + int ret; + + ret = sun6i_dsi_inst_wait_for_completion(dsi); + if (ret < 0) + sun6i_dsi_inst_abort(dsi); + + regmap_write(dsi->regs, SUN6I_DSI_CMD_CTL_REG, + SUN6I_DSI_CMD_CTL_RX_OVERFLOW | + SUN6I_DSI_CMD_CTL_RX_FLAG | + SUN6I_DSI_CMD_CTL_TX_FLAG); + + switch (msg->type) { + case MIPI_DSI_DCS_SHORT_WRITE: + case MIPI_DSI_DCS_SHORT_WRITE_PARAM: + ret = sun6i_dsi_dcs_write_short(dsi, msg); + break; + + case MIPI_DSI_DCS_LONG_WRITE: + ret = sun6i_dsi_dcs_write_long(dsi, msg); + break; + + case MIPI_DSI_DCS_READ: + if (msg->rx_len == 1) { + ret = sun6i_dsi_dcs_read(dsi, msg); + break; + } + + default: + ret = -EINVAL; + } + + return ret; +} + +static const struct mipi_dsi_host_ops sun6i_dsi_host_ops = { + .attach = sun6i_dsi_attach, + .detach = sun6i_dsi_detach, + .transfer = sun6i_dsi_transfer, +}; + +static const struct regmap_config sun6i_dsi_regmap_config = { + .reg_bits = 32, + .val_bits = 32, + .reg_stride = 4, + .max_register = SUN6I_DSI_CMD_TX_REG(255), + .name = "mipi-dsi", +}; + +static int sun6i_dsi_bind(struct device *dev, struct device *master, + void *data) +{ + struct drm_device *drm = data; + struct sun4i_drv *drv = drm->dev_private; + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + int ret; + + if (!dsi->panel) + return -EPROBE_DEFER; + + dsi->drv = drv; + + drm_encoder_helper_add(&dsi->encoder, + &sun6i_dsi_enc_helper_funcs); + ret = drm_encoder_init(drm, + &dsi->encoder, + &sun6i_dsi_enc_funcs, + DRM_MODE_ENCODER_DSI, + NULL); + if (ret) { + dev_err(dsi->dev, "Couldn't initialise the DSI encoder\n"); + return ret; + } + dsi->encoder.possible_crtcs = BIT(0); + + drm_connector_helper_add(&dsi->connector, + &sun6i_dsi_connector_helper_funcs); + ret = drm_connector_init(drm, &dsi->connector, + &sun6i_dsi_connector_funcs, + DRM_MODE_CONNECTOR_DSI); + if (ret) { + dev_err(dsi->dev, + "Couldn't initialise the DSI connector\n"); + goto err_cleanup_connector; + } + + drm_mode_connector_attach_encoder(&dsi->connector, &dsi->encoder); + drm_panel_attach(dsi->panel, &dsi->connector); + + return 0; + +err_cleanup_connector: + drm_encoder_cleanup(&dsi->encoder); + return ret; +} + +static void sun6i_dsi_unbind(struct device *dev, struct device *master, + void *data) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + drm_panel_detach(dsi->panel); +} + +static const struct component_ops sun6i_dsi_ops = { + .bind = sun6i_dsi_bind, + .unbind = sun6i_dsi_unbind, +}; + +static int sun6i_dsi_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct device_node *dphy_node; + struct sun6i_dsi *dsi; + struct resource *res; + void __iomem *base; + int ret; + + dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL); + if (!dsi) + return -ENOMEM; + dev_set_drvdata(dev, dsi); + dsi->dev = dev; + dsi->host.ops = &sun6i_dsi_host_ops; + dsi->host.dev = dev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + base = devm_ioremap_resource(dev, res); + if (IS_ERR(base)) { + dev_err(dev, "Couldn't map the DSI encoder registers\n"); + return PTR_ERR(base); + } + + dsi->regs = devm_regmap_init_mmio_clk(dev, "bus", base, + &sun6i_dsi_regmap_config); + if (IS_ERR(dsi->regs)) { + dev_err(dev, "Couldn't create the DSI encoder regmap\n"); + return PTR_ERR(dsi->regs); + } + + dsi->reset = devm_reset_control_get_shared(dev, NULL); + if (IS_ERR(dsi->reset)) { + dev_err(dev, "Couldn't get our reset line\n"); + return PTR_ERR(dsi->reset); + } + + dsi->mod_clk = devm_clk_get(dev, "mod"); + if (IS_ERR(dsi->mod_clk)) { + dev_err(dev, "Couldn't get the DSI mod clock\n"); + return PTR_ERR(dsi->mod_clk); + } + + /* + * In order to operate properly, that clock seems to be always + * set to 297MHz. + */ + clk_set_rate_exclusive(dsi->mod_clk, 297000000); + + dphy_node = of_parse_phandle(dev->of_node, "phys", 0); + ret = sun6i_dphy_probe(dsi, dphy_node); + of_node_put(dphy_node); + if (ret) { + dev_err(dev, "Couldn't get the MIPI D-PHY\n"); + goto err_unprotect_clk; + } + + pm_runtime_enable(dev); + + ret = mipi_dsi_host_register(&dsi->host); + if (ret) { + dev_err(dev, "Couldn't register MIPI-DSI host\n"); + goto err_remove_phy; + } + + ret = component_add(&pdev->dev, &sun6i_dsi_ops); + if (ret) { + dev_err(dev, "Couldn't register our component\n"); + goto err_remove_dsi_host; + } + + return 0; + +err_remove_dsi_host: + mipi_dsi_host_unregister(&dsi->host); +err_remove_phy: + pm_runtime_disable(dev); + sun6i_dphy_remove(dsi); +err_unprotect_clk: + clk_rate_exclusive_put(dsi->mod_clk); + return ret; +} + +static int sun6i_dsi_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + component_del(&pdev->dev, &sun6i_dsi_ops); + mipi_dsi_host_unregister(&dsi->host); + pm_runtime_disable(dev); + sun6i_dphy_remove(dsi); + clk_rate_exclusive_put(dsi->mod_clk); + + return 0; +} + +static int sun6i_dsi_runtime_resume(struct device *dev) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + reset_control_deassert(dsi->reset); + clk_prepare_enable(dsi->mod_clk); + + /* + * Enable the DSI block. + * + * Some part of it can only be done once we get a number of + * lanes, see sun6i_dsi_inst_init + */ + regmap_write(dsi->regs, SUN6I_DSI_CTL_REG, SUN6I_DSI_CTL_EN); + + regmap_write(dsi->regs, SUN6I_DSI_BASIC_CTL0_REG, + SUN6I_DSI_BASIC_CTL0_ECC_EN | SUN6I_DSI_BASIC_CTL0_CRC_EN); + + regmap_write(dsi->regs, SUN6I_DSI_TRANS_START_REG, 10); + regmap_write(dsi->regs, SUN6I_DSI_TRANS_ZERO_REG, 0); + + if (dsi->device) + sun6i_dsi_inst_init(dsi, dsi->device); + + regmap_write(dsi->regs, SUN6I_DSI_DEBUG_DATA_REG, 0xff); + + return 0; +} + +static int sun6i_dsi_runtime_suspend(struct device *dev) +{ + struct sun6i_dsi *dsi = dev_get_drvdata(dev); + + clk_disable_unprepare(dsi->mod_clk); + reset_control_assert(dsi->reset); + + return 0; +} + +static const struct dev_pm_ops sun6i_dsi_pm_ops = { + SET_RUNTIME_PM_OPS(sun6i_dsi_runtime_suspend, + sun6i_dsi_runtime_resume, + NULL) +}; + +static const struct of_device_id sun6i_dsi_of_table[] = { + { .compatible = "allwinner,sun6i-a31-mipi-dsi" }, + { } +}; +MODULE_DEVICE_TABLE(of, sun6i_dsi_of_table); + +static struct platform_driver sun6i_dsi_platform_driver = { + .probe = sun6i_dsi_probe, + .remove = sun6i_dsi_remove, + .driver = { + .name = "sun6i-mipi-dsi", + .of_match_table = sun6i_dsi_of_table, + .pm = &sun6i_dsi_pm_ops, + }, +}; +module_platform_driver(sun6i_dsi_platform_driver); + +MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>"); +MODULE_DESCRIPTION("Allwinner A31 DSI Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h new file mode 100644 index 0000000..dbbc5b3 --- /dev/null +++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (c) 2016 Allwinnertech Co., Ltd. + * Copyright (C) 2017-2018 Bootlin + * + * Maxime Ripard <maxime.ripard@bootlin.com> + */ + +#ifndef _SUN6I_MIPI_DSI_H_ +#define _SUN6I_MIPI_DSI_H_ + +#include <drm/drm_connector.h> +#include <drm/drm_encoder.h> +#include <drm/drm_mipi_dsi.h> + +struct sun6i_dphy { + struct clk *bus_clk; + struct clk *mod_clk; + struct regmap *regs; + struct reset_control *reset; +}; + +struct sun6i_dsi { + struct drm_connector connector; + struct drm_encoder encoder; + struct mipi_dsi_host host; + + struct clk *bus_clk; + struct clk *mod_clk; + struct regmap *regs; + struct reset_control *reset; + struct sun6i_dphy *dphy; + + struct device *dev; + struct sun4i_drv *drv; + struct mipi_dsi_device *device; + struct drm_panel *panel; +}; + +static inline struct sun6i_dsi *host_to_sun6i_dsi(struct mipi_dsi_host *host) +{ + return container_of(host, struct sun6i_dsi, host); +}; + +static inline struct sun6i_dsi *connector_to_sun6i_dsi(struct drm_connector *connector) +{ + return container_of(connector, struct sun6i_dsi, connector); +}; + +static inline struct sun6i_dsi *encoder_to_sun6i_dsi(const struct drm_encoder *encoder) +{ + return container_of(encoder, struct sun6i_dsi, encoder); +}; + +int sun6i_dphy_probe(struct sun6i_dsi *dsi, struct device_node *node); +int sun6i_dphy_remove(struct sun6i_dsi *dsi); + +int sun6i_dphy_init(struct sun6i_dphy *dphy, unsigned int lanes); +int sun6i_dphy_power_on(struct sun6i_dphy *dphy, unsigned int lanes); +int sun6i_dphy_power_off(struct sun6i_dphy *dphy); +int sun6i_dphy_exit(struct sun6i_dphy *dphy); + +#endif /* _SUN6I_MIPI_DSI_H_ */ diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 616c963..9f83a65 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2025,9 +2025,9 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = { .coupled_pm = false, .has_nvdisplay = false, .num_primary_formats = ARRAY_SIZE(tegra124_primary_formats), - .primary_formats = tegra114_primary_formats, + .primary_formats = tegra124_primary_formats, .num_overlay_formats = ARRAY_SIZE(tegra124_overlay_formats), - .overlay_formats = tegra114_overlay_formats, + .overlay_formats = tegra124_overlay_formats, .modifiers = tegra124_modifiers, }; @@ -2178,7 +2178,7 @@ static int tegra_dc_couple(struct tegra_dc *dc) struct device_link *link; struct device *partner; - partner = driver_find_device(dc->dev->driver, NULL, 0, + partner = driver_find_device(dc->dev->driver, NULL, NULL, tegra_dc_match_by_pipe); if (!partner) return -EPROBE_DEFER; diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c index 7afe2f6..a051961 100644 --- a/drivers/gpu/drm/tegra/drm.c +++ b/drivers/gpu/drm/tegra/drm.c @@ -38,26 +38,11 @@ static int tegra_atomic_check(struct drm_device *drm, { int err; - err = drm_atomic_helper_check_modeset(drm, state); + err = drm_atomic_helper_check(drm, state); if (err < 0) return err; - err = tegra_display_hub_atomic_check(drm, state); - if (err < 0) - return err; - - err = drm_atomic_normalize_zpos(drm, state); - if (err < 0) - return err; - - err = drm_atomic_helper_check_planes(drm, state); - if (err < 0) - return err; - - if (state->legacy_cursor_update) - state->async_update = !drm_atomic_helper_async_check(drm, state); - - return 0; + return tegra_display_hub_atomic_check(drm, state); } static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = { @@ -151,6 +136,8 @@ static int tegra_drm_load(struct drm_device *drm, unsigned long flags) drm->mode_config.allow_fb_modifiers = true; + drm->mode_config.normalize_zpos = true; + drm->mode_config.funcs = &tegra_drm_mode_config_funcs; drm->mode_config.helper_private = &tegra_drm_mode_config_helpers; diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c index 1b278a2..1067e70 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c +++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c @@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc) ret = clk_set_rate(priv->clk, req_rate * clkdiv); clk_rate = clk_get_rate(priv->clk); - if (ret < 0) { + if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) { /* * If we fail to set the clock rate (some architectures don't * use the common clock framework yet and may not implement diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c index 4c661627..24a33bf 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL(tinydrm_gem_cma_prime_import_sg_table); * GEM object state and frees the memory used to store the object itself using * drm_gem_cma_free_object(). It also handles PRIME buffers which has the kernel * virtual address set by tinydrm_gem_cma_prime_import_sg_table(). Drivers - * can use this as their &drm_driver->gem_free_object callback. + * can use this as their &drm_driver->gem_free_object_unlocked callback. */ void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj) { diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c index d1c3ce9..dcd3901 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c @@ -78,6 +78,36 @@ bool tinydrm_merge_clips(struct drm_clip_rect *dst, } EXPORT_SYMBOL(tinydrm_merge_clips); +int tinydrm_fb_dirty(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int flags, unsigned int color, + struct drm_clip_rect *clips, + unsigned int num_clips) +{ + struct tinydrm_device *tdev = fb->dev->dev_private; + struct drm_plane *plane = &tdev->pipe.plane; + int ret = 0; + + drm_modeset_lock(&plane->mutex, NULL); + + /* fbdev can flush even when we're not interested */ + if (plane->state->fb == fb) { + mutex_lock(&tdev->dirty_lock); + ret = tdev->fb_dirty(fb, file_priv, flags, + color, clips, num_clips); + mutex_unlock(&tdev->dirty_lock); + } + + drm_modeset_unlock(&plane->mutex); + + if (ret) + dev_err_once(fb->dev->dev, + "Failed to update display %d\n", ret); + + return ret; +} +EXPORT_SYMBOL(tinydrm_fb_dirty); + /** * tinydrm_memcpy - Copy clip buffer * @dst: Destination buffer diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c index 11ae950..7e8e24d 100644 --- a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c +++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c @@ -125,9 +125,8 @@ void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe, struct drm_crtc *crtc = &tdev->pipe.crtc; if (fb && (fb != old_state->fb)) { - pipe->plane.fb = fb; - if (fb->funcs->dirty) - fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0); + if (tdev->fb_dirty) + tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0); } if (crtc->state->event) { @@ -139,23 +138,6 @@ void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe, } EXPORT_SYMBOL(tinydrm_display_pipe_update); -/** - * tinydrm_display_pipe_prepare_fb - Display pipe prepare_fb helper - * @pipe: Simple display pipe - * @plane_state: Plane state - * - * This function uses drm_gem_fb_prepare_fb() to check if the plane FB has an - * dma-buf attached, extracts the exclusive fence and attaches it to plane - * state for the atomic helper to wait on. Drivers can use this as their - * &drm_simple_display_pipe_funcs->prepare_fb callback. - */ -int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} -EXPORT_SYMBOL(tinydrm_display_pipe_prepare_fb); - static int tinydrm_rotate_mode(struct drm_display_mode *mode, unsigned int rotation) { diff --git a/drivers/gpu/drm/tinydrm/ili9225.c b/drivers/gpu/drm/tinydrm/ili9225.c index a075950..841c69a 100644 --- a/drivers/gpu/drm/tinydrm/ili9225.c +++ b/drivers/gpu/drm/tinydrm/ili9225.c @@ -88,14 +88,8 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, bool full; void *tr; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; full = tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -108,7 +102,7 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, tr = mipi->tx_buf; ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap); if (ret) - goto out_unlock; + return ret; } else { tr = cma_obj->vaddr; } @@ -159,24 +153,18 @@ static int ili9225_fb_dirty(struct drm_framebuffer *fb, ret = mipi_dbi_command_buf(mipi, ILI9225_WRITE_DATA_TO_GRAM, tr, (clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs ili9225_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = ili9225_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -268,7 +256,7 @@ static void ili9225_pipe_enable(struct drm_simple_display_pipe *pipe, ili9225_command(mipi, ILI9225_DISPLAY_CONTROL_1, 0x1017); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static void ili9225_pipe_disable(struct drm_simple_display_pipe *pipe) @@ -341,6 +329,8 @@ static int ili9225_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = ili9225_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, ili9225_formats, @@ -364,7 +354,7 @@ static const struct drm_simple_display_pipe_funcs ili9225_pipe_funcs = { .enable = ili9225_pipe_enable, .disable = ili9225_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode ili9225_mode = { diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c index d8ed6e6..015d03f 100644 --- a/drivers/gpu/drm/tinydrm/mi0283qt.c +++ b/drivers/gpu/drm/tinydrm/mi0283qt.c @@ -19,6 +19,7 @@ #include <drm/drm_fb_helper.h> #include <drm/drm_modeset_helper.h> +#include <drm/drm_gem_framebuffer_helper.h> #include <drm/tinydrm/mipi-dbi.h> #include <drm/tinydrm/tinydrm-helpers.h> #include <video/mipi_display.h> @@ -49,7 +50,8 @@ #define ILI9341_MADCTL_MY BIT(7) static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -83,24 +85,6 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, /* Memory Access Control */ mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT); - switch (mipi->rotation) { - default: - addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | - ILI9341_MADCTL_MX; - break; - case 90: - addr_mode = ILI9341_MADCTL_MY; - break; - case 180: - addr_mode = ILI9341_MADCTL_MV; - break; - case 270: - addr_mode = ILI9341_MADCTL_MX; - break; - } - addr_mode |= ILI9341_MADCTL_BGR; - mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); - /* Frame Rate */ mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b); @@ -126,14 +110,37 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe, msleep(100); out_enable: - mipi_dbi_enable_flush(mipi); + /* The PiTFT (ili9340) has a hardware reset circuit that + * resets only on power-on and not on each reboot through + * a gpio like the rpi-display does. + * As a result, we need to always apply the rotation value + * regardless of the display "on/off" state. + */ + switch (mipi->rotation) { + default: + addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY | + ILI9341_MADCTL_MX; + break; + case 90: + addr_mode = ILI9341_MADCTL_MY; + break; + case 180: + addr_mode = ILI9341_MADCTL_MV; + break; + case 270: + addr_mode = ILI9341_MADCTL_MX; + break; + } + addr_mode |= ILI9341_MADCTL_BGR; + mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = { .enable = mi0283qt_enable, .disable = mipi_dbi_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode mi0283qt_mode = { diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c index 9e90381..4d1fb31 100644 --- a/drivers/gpu/drm/tinydrm/mipi-dbi.c +++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c @@ -219,14 +219,8 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, bool full; void *tr; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; full = tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -239,7 +233,7 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, tr = mipi->tx_buf; ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap); if (ret) - goto out_unlock; + return ret; } else { tr = cma_obj->vaddr; } @@ -254,20 +248,13 @@ static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb, ret = mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START, tr, (clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = mipi_dbi_fb_dirty, + .dirty = tinydrm_fb_dirty, }; /** @@ -278,13 +265,16 @@ static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = { * enables the backlight. Drivers can use this in their * &drm_simple_display_pipe_funcs->enable callback. */ -void mipi_dbi_enable_flush(struct mipi_dbi *mipi) +void mipi_dbi_enable_flush(struct mipi_dbi *mipi, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { - struct drm_framebuffer *fb = mipi->tinydrm.pipe.plane.fb; + struct tinydrm_device *tdev = &mipi->tinydrm; + struct drm_framebuffer *fb = plane_state->fb; mipi->enabled = true; if (fb) - fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0); + tdev->fb_dirty(fb, NULL, 0, 0, NULL, 0); backlight_enable(mipi->backlight); } @@ -381,6 +371,8 @@ int mipi_dbi_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = mipi_dbi_fb_dirty; + /* TODO: Maybe add DRM_MODE_CONNECTOR_SPI */ ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, diff --git a/drivers/gpu/drm/tinydrm/repaper.c b/drivers/gpu/drm/tinydrm/repaper.c index 7574063..1ee6855 100644 --- a/drivers/gpu/drm/tinydrm/repaper.c +++ b/drivers/gpu/drm/tinydrm/repaper.c @@ -540,14 +540,8 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, clip.y1 = 0; clip.y2 = fb->height; - mutex_lock(&tdev->dirty_lock); - if (!epd->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; repaper_get_temperature(epd); @@ -555,16 +549,14 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, epd->factored_stage_time); buf = kmalloc(fb->width * fb->height, GFP_KERNEL); - if (!buf) { - ret = -ENOMEM; - goto out_unlock; - } + if (!buf) + return -ENOMEM; if (import_attach) { ret = dma_buf_begin_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); if (ret) - goto out_unlock; + goto out_free; } tinydrm_xrgb8888_to_gray8(buf, cma_obj->vaddr, fb, &clip); @@ -573,7 +565,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, ret = dma_buf_end_cpu_access(import_attach->dmabuf, DMA_FROM_DEVICE); if (ret) - goto out_unlock; + goto out_free; } repaper_gray8_to_mono_reversed(buf, fb->width, fb->height); @@ -625,11 +617,7 @@ static int repaper_fb_dirty(struct drm_framebuffer *fb, } } -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - DRM_DEV_ERROR(fb->dev->dev, "Failed to update display (%d)\n", ret); +out_free: kfree(buf); return ret; @@ -638,7 +626,7 @@ out_unlock: static const struct drm_framebuffer_funcs repaper_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = repaper_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void power_off(struct repaper_epd *epd) @@ -659,7 +647,8 @@ static void power_off(struct repaper_epd *epd) } static void repaper_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct repaper_epd *epd = epd_from_tinydrm(tdev); @@ -852,7 +841,7 @@ static const struct drm_simple_display_pipe_funcs repaper_pipe_funcs = { .enable = repaper_pipe_enable, .disable = repaper_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const uint32_t repaper_formats[] = { @@ -1069,6 +1058,8 @@ static int repaper_probe(struct spi_device *spi) if (ret) return ret; + tdev->fb_dirty = repaper_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, &repaper_pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, repaper_formats, diff --git a/drivers/gpu/drm/tinydrm/st7586.c b/drivers/gpu/drm/tinydrm/st7586.c index a6396ef..5c29e38 100644 --- a/drivers/gpu/drm/tinydrm/st7586.c +++ b/drivers/gpu/drm/tinydrm/st7586.c @@ -120,14 +120,8 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, int start, end; int ret = 0; - mutex_lock(&tdev->dirty_lock); - if (!mipi->enabled) - goto out_unlock; - - /* fbdev can flush even when we're not interested */ - if (tdev->pipe.plane.fb != fb) - goto out_unlock; + return 0; tinydrm_merge_clips(&clip, clips, num_clips, flags, fb->width, fb->height); @@ -141,7 +135,7 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, ret = st7586_buf_copy(mipi->tx_buf, fb, &clip); if (ret) - goto out_unlock; + return ret; /* Pixels are packed 3 per byte */ start = clip.x1 / 3; @@ -158,24 +152,18 @@ static int st7586_fb_dirty(struct drm_framebuffer *fb, (u8 *)mipi->tx_buf, (end - start) * (clip.y2 - clip.y1)); -out_unlock: - mutex_unlock(&tdev->dirty_lock); - - if (ret) - dev_err_once(fb->dev->dev, "Failed to update display %d\n", - ret); - return ret; } static const struct drm_framebuffer_funcs st7586_fb_funcs = { .destroy = drm_gem_fb_destroy, .create_handle = drm_gem_fb_create_handle, - .dirty = st7586_fb_dirty, + .dirty = tinydrm_fb_dirty, }; static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -237,7 +225,7 @@ static void st7586_pipe_enable(struct drm_simple_display_pipe *pipe, mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static void st7586_pipe_disable(struct drm_simple_display_pipe *pipe) @@ -277,6 +265,8 @@ static int st7586_init(struct device *dev, struct mipi_dbi *mipi, if (ret) return ret; + tdev->fb_dirty = st7586_fb_dirty; + ret = tinydrm_display_pipe_init(tdev, pipe_funcs, DRM_MODE_CONNECTOR_VIRTUAL, st7586_formats, @@ -300,7 +290,7 @@ static const struct drm_simple_display_pipe_funcs st7586_pipe_funcs = { .enable = st7586_pipe_enable, .disable = st7586_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode st7586_mode = { diff --git a/drivers/gpu/drm/tinydrm/st7735r.c b/drivers/gpu/drm/tinydrm/st7735r.c index 67d197e..6c7b15c 100644 --- a/drivers/gpu/drm/tinydrm/st7735r.c +++ b/drivers/gpu/drm/tinydrm/st7735r.c @@ -37,7 +37,8 @@ #define ST7735R_MV BIT(5) static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *crtc_state) + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) { struct tinydrm_device *tdev = pipe_to_tinydrm(pipe); struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev); @@ -98,14 +99,14 @@ static void jd_t18003_t01_pipe_enable(struct drm_simple_display_pipe *pipe, msleep(20); - mipi_dbi_enable_flush(mipi); + mipi_dbi_enable_flush(mipi, crtc_state, plane_state); } static const struct drm_simple_display_pipe_funcs jd_t18003_t01_pipe_funcs = { .enable = jd_t18003_t01_pipe_enable, .disable = mipi_dbi_pipe_disable, .update = tinydrm_display_pipe_update, - .prepare_fb = tinydrm_display_pipe_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, }; static const struct drm_display_mode jd_t18003_t01_mode = { diff --git a/drivers/gpu/drm/tve200/tve200_display.c b/drivers/gpu/drm/tve200/tve200_display.c index db397fc..e8723a2 100644 --- a/drivers/gpu/drm/tve200/tve200_display.c +++ b/drivers/gpu/drm/tve200/tve200_display.c @@ -120,7 +120,8 @@ static int tve200_display_check(struct drm_simple_display_pipe *pipe, } static void tve200_display_enable(struct drm_simple_display_pipe *pipe, - struct drm_crtc_state *cstate) + struct drm_crtc_state *cstate, + struct drm_plane_state *plane_state) { struct drm_crtc *crtc = &pipe->crtc; struct drm_plane *plane = &pipe->plane; @@ -292,18 +293,12 @@ static void tve200_display_disable_vblank(struct drm_simple_display_pipe *pipe) writel(0, priv->regs + TVE200_INT_EN); } -static int tve200_display_prepare_fb(struct drm_simple_display_pipe *pipe, - struct drm_plane_state *plane_state) -{ - return drm_gem_fb_prepare_fb(&pipe->plane, plane_state); -} - static const struct drm_simple_display_pipe_funcs tve200_display_funcs = { .check = tve200_display_check, .enable = tve200_display_enable, .disable = tve200_display_disable, .update = tve200_display_update, - .prepare_fb = tve200_display_prepare_fb, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, .enable_vblank = tve200_display_enable_vblank, .disable_vblank = tve200_display_disable_vblank, }; diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index c3dc1fd..09dc585 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -105,7 +105,7 @@ static int udl_get_modes(struct drm_connector *connector) return 0; } -static int udl_mode_valid(struct drm_connector *connector, +static enum drm_mode_status udl_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct udl_device *udl = connector->dev->dev_private; diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c index 2867ed1..0a20695 100644 --- a/drivers/gpu/drm/udl/udl_dmabuf.c +++ b/drivers/gpu/drm/udl/udl_dmabuf.c @@ -76,6 +76,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, struct udl_drm_dmabuf_attachment *udl_attach = attach->priv; struct udl_gem_object *obj = to_udl_bo(attach->dmabuf->priv); struct drm_device *dev = obj->base.dev; + struct udl_device *udl = dev->dev_private; struct scatterlist *rd, *wr; struct sg_table *sgt = NULL; unsigned int i; @@ -112,7 +113,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, return ERR_PTR(-ENOMEM); } - mutex_lock(&dev->struct_mutex); + mutex_lock(&udl->gem_lock); rd = obj->sg->sgl; wr = sgt->sgl; @@ -137,7 +138,7 @@ static struct sg_table *udl_map_dma_buf(struct dma_buf_attachment *attach, attach->priv = udl_attach; err_unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&udl->gem_lock); return sgt; } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 3c45a30..9ef515d 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -53,7 +53,7 @@ static struct drm_driver driver = { .unload = udl_driver_unload, /* gem hooks */ - .gem_free_object = udl_gem_free_object, + .gem_free_object_unlocked = udl_gem_free_object, .gem_vm_ops = &udl_gem_vm_ops, .dumb_create = udl_dumb_create, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 2a75ab8..55c0cc3 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -54,6 +54,8 @@ struct udl_device { struct usb_device *udev; struct drm_crtc *crtc; + struct mutex gem_lock; + int sku_pixel_limit; struct urb_list urbs; diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index dee6bd9..9a15cce 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -214,9 +214,10 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, { struct udl_gem_object *gobj; struct drm_gem_object *obj; + struct udl_device *udl = dev->dev_private; int ret = 0; - mutex_lock(&dev->struct_mutex); + mutex_lock(&udl->gem_lock); obj = drm_gem_object_lookup(file, handle); if (obj == NULL) { ret = -ENOENT; @@ -236,6 +237,6 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, out: drm_gem_object_put(&gobj->base); unlock: - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&udl->gem_lock); return ret; } diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index f1ec452..d518de8 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -324,6 +324,8 @@ int udl_driver_load(struct drm_device *dev, unsigned long flags) udl->ddev = dev; dev->dev_private = udl; + mutex_init(&udl->gem_lock); + if (!udl_parse_vendor_descriptor(dev, udl->udev)) { ret = -ENODEV; DRM_ERROR("firmware not recognized. Assume incompatible device\n"); diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig new file mode 100644 index 0000000..a0c0259 --- /dev/null +++ b/drivers/gpu/drm/v3d/Kconfig @@ -0,0 +1,9 @@ +config DRM_V3D + tristate "Broadcom V3D 3.x and newer" + depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST + depends on DRM + depends on COMMON_CLK + select DRM_SCHED + help + Choose this option if you have a system that has a Broadcom + V3D 3.x or newer GPU, such as BCM7268. diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile new file mode 100644 index 0000000..34446e1 --- /dev/null +++ b/drivers/gpu/drm/v3d/Makefile @@ -0,0 +1,18 @@ +# Please keep these build lists sorted! + +# core driver code +v3d-y := \ + v3d_bo.o \ + v3d_drv.o \ + v3d_fence.o \ + v3d_gem.o \ + v3d_irq.o \ + v3d_mmu.o \ + v3d_trace_points.o \ + v3d_sched.o + +v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o + +obj-$(CONFIG_DRM_V3D) += v3d.o + +CFLAGS_v3d_trace_points.o := -I$(src) diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c new file mode 100644 index 0000000..7b1e2a5 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_bo.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +/** + * DOC: V3D GEM BO management support + * + * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the + * GPU and the bus, allowing us to use shmem objects for our storage + * instead of CMA. + * + * Physically contiguous objects may still be imported to V3D, but the + * driver doesn't allocate physically contiguous objects on its own. + * Display engines requiring physically contiguous allocations should + * look into Mesa's "renderonly" support (as used by the Mesa pl111 + * driver) for an example of how to integrate with V3D. + * + * Long term, we should support evicting pages from the MMU when under + * memory pressure (thus the v3d_bo_get_pages() refcounting), but + * that's not a high priority since our systems tend to not have swap. + */ + +#include <linux/dma-buf.h> +#include <linux/pfn_t.h> + +#include "v3d_drv.h" +#include "uapi/drm/v3d_drm.h" + +/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps + * it for DMA. + */ +static int +v3d_bo_get_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + struct drm_device *dev = obj->dev; + int npages = obj->size >> PAGE_SHIFT; + int ret = 0; + + mutex_lock(&bo->lock); + if (bo->pages_refcount++ != 0) + goto unlock; + + if (!obj->import_attach) { + bo->pages = drm_gem_get_pages(obj); + if (IS_ERR(bo->pages)) { + ret = PTR_ERR(bo->pages); + goto unlock; + } + + bo->sgt = drm_prime_pages_to_sg(bo->pages, npages); + if (IS_ERR(bo->sgt)) { + ret = PTR_ERR(bo->sgt); + goto put_pages; + } + + /* Map the pages for use by the GPU. */ + dma_map_sg(dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + } else { + bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL); + if (!bo->pages) + goto put_pages; + + drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages, + NULL, npages); + + /* Note that dma-bufs come in mapped. */ + } + + mutex_unlock(&bo->lock); + + return 0; + +put_pages: + drm_gem_put_pages(obj, bo->pages, true, true); + bo->pages = NULL; +unlock: + bo->pages_refcount--; + mutex_unlock(&bo->lock); + return ret; +} + +static void +v3d_bo_put_pages(struct v3d_bo *bo) +{ + struct drm_gem_object *obj = &bo->base; + + mutex_lock(&bo->lock); + if (--bo->pages_refcount == 0) { + if (!obj->import_attach) { + dma_unmap_sg(obj->dev->dev, bo->sgt->sgl, + bo->sgt->nents, DMA_BIDIRECTIONAL); + sg_free_table(bo->sgt); + kfree(bo->sgt); + drm_gem_put_pages(obj, bo->pages, true, true); + } else { + kfree(bo->pages); + } + } + mutex_unlock(&bo->lock); +} + +static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + size_t size = roundup(unaligned_size, PAGE_SIZE); + int ret; + + if (size == 0) + return ERR_PTR(-EINVAL); + + bo = kzalloc(sizeof(*bo), GFP_KERNEL); + if (!bo) + return ERR_PTR(-ENOMEM); + obj = &bo->base; + + INIT_LIST_HEAD(&bo->vmas); + INIT_LIST_HEAD(&bo->unref_head); + mutex_init(&bo->lock); + + ret = drm_gem_object_init(dev, obj, size); + if (ret) + goto free_bo; + + spin_lock(&v3d->mm_lock); + ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, + obj->size >> PAGE_SHIFT, + GMP_GRANULARITY >> PAGE_SHIFT, 0, 0); + spin_unlock(&v3d->mm_lock); + if (ret) + goto free_obj; + + return bo; + +free_obj: + drm_gem_object_release(obj); +free_bo: + kfree(bo); + return ERR_PTR(ret); +} + +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t unaligned_size) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_gem_object *obj; + struct v3d_bo *bo; + int ret; + + bo = v3d_bo_create_struct(dev, unaligned_size); + if (IS_ERR(bo)) + return bo; + obj = &bo->base; + + bo->resv = &bo->_resv; + reservation_object_init(bo->resv); + + ret = v3d_bo_get_pages(bo); + if (ret) + goto free_mm; + + v3d_mmu_insert_ptes(bo); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated++; + v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + return bo; + +free_mm: + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + drm_gem_object_release(obj); + kfree(bo); + return ERR_PTR(ret); +} + +/* Called DRM core on the last userspace/kernel unreference of the + * BO. + */ +void v3d_free_object(struct drm_gem_object *obj) +{ + struct v3d_dev *v3d = to_v3d_dev(obj->dev); + struct v3d_bo *bo = to_v3d_bo(obj); + + mutex_lock(&v3d->bo_lock); + v3d->bo_stats.num_allocated--; + v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT; + mutex_unlock(&v3d->bo_lock); + + reservation_object_fini(&bo->_resv); + + v3d_bo_put_pages(bo); + + if (obj->import_attach) + drm_prime_gem_destroy(obj, bo->sgt); + + v3d_mmu_remove_ptes(bo); + spin_lock(&v3d->mm_lock); + drm_mm_remove_node(&bo->node); + spin_unlock(&v3d->mm_lock); + + mutex_destroy(&bo->lock); + + drm_gem_object_release(obj); + kfree(bo); +} + +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + + return bo->resv; +} + +static void +v3d_set_mmap_vma_flags(struct vm_area_struct *vma) +{ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); +} + +int v3d_gem_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct drm_gem_object *obj = vma->vm_private_data; + struct v3d_bo *bo = to_v3d_bo(obj); + unsigned long pfn; + pgoff_t pgoff; + int ret; + + /* We don't use vmf->pgoff since that has the fake offset: */ + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + pfn = page_to_pfn(bo->pages[pgoff]); + + ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); + + switch (ret) { + case -EAGAIN: + case 0: + case -ERESTARTSYS: + case -EINTR: + case -EBUSY: + /* + * EBUSY is ok: this just means that another thread + * already did the job. + */ + return VM_FAULT_NOPAGE; + case -ENOMEM: + return VM_FAULT_OOM; + default: + return VM_FAULT_SIGBUS; + } +} + +int v3d_mmap(struct file *filp, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return ret; +} + +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + ret = drm_gem_mmap_obj(obj, obj->size, vma); + if (ret < 0) + return ret; + + v3d_set_mmap_vma_flags(vma); + + return 0; +} + +struct sg_table * +v3d_prime_get_sg_table(struct drm_gem_object *obj) +{ + struct v3d_bo *bo = to_v3d_bo(obj); + int npages = obj->size >> PAGE_SHIFT; + + return drm_prime_pages_to_sg(bo->pages, npages); +} + +struct drm_gem_object * +v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct drm_gem_object *obj; + struct v3d_bo *bo; + + bo = v3d_bo_create_struct(dev, attach->dmabuf->size); + if (IS_ERR(bo)) + return ERR_CAST(bo); + obj = &bo->base; + + bo->resv = attach->dmabuf->resv; + + bo->sgt = sgt; + v3d_bo_get_pages(bo); + + v3d_mmu_insert_ptes(bo); + + return obj; +} + +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_create_bo *args = data; + struct v3d_bo *bo = NULL; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown create_bo flags: %d\n", args->flags); + return -EINVAL; + } + + bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size)); + if (IS_ERR(bo)) + return PTR_ERR(bo); + + args->offset = bo->node.start << PAGE_SHIFT; + + ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle); + drm_gem_object_put_unlocked(&bo->base); + + return ret; +} + +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_mmap_bo *args = data; + struct drm_gem_object *gem_obj; + int ret; + + if (args->flags != 0) { + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); + return -EINVAL; + } + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + + ret = drm_gem_create_mmap_offset(gem_obj); + if (ret == 0) + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_v3d_get_bo_offset *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -ENOENT; + } + bo = to_v3d_bo(gem_obj); + + args->offset = bo->node.start << PAGE_SHIFT; + + drm_gem_object_put_unlocked(gem_obj); + return 0; +} diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c new file mode 100644 index 0000000..4db62c5 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_debugfs.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <linux/circ_buf.h> +#include <linux/ctype.h> +#include <linux/debugfs.h> +#include <linux/pm_runtime.h> +#include <linux/seq_file.h> +#include <drm/drmP.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define REGDEF(reg) { reg, #reg } +struct v3d_reg_def { + u32 reg; + const char *name; +}; + +static const struct v3d_reg_def v3d_hub_reg_defs[] = { + REGDEF(V3D_HUB_AXICFG), + REGDEF(V3D_HUB_UIFCFG), + REGDEF(V3D_HUB_IDENT0), + REGDEF(V3D_HUB_IDENT1), + REGDEF(V3D_HUB_IDENT2), + REGDEF(V3D_HUB_IDENT3), + REGDEF(V3D_HUB_INT_STS), + REGDEF(V3D_HUB_INT_MSK_STS), +}; + +static const struct v3d_reg_def v3d_gca_reg_defs[] = { + REGDEF(V3D_GCA_SAFE_SHUTDOWN), + REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK), +}; + +static const struct v3d_reg_def v3d_core_reg_defs[] = { + REGDEF(V3D_CTL_IDENT0), + REGDEF(V3D_CTL_IDENT1), + REGDEF(V3D_CTL_IDENT2), + REGDEF(V3D_CTL_MISCCFG), + REGDEF(V3D_CTL_INT_STS), + REGDEF(V3D_CTL_INT_MSK_STS), + REGDEF(V3D_CLE_CT0CS), + REGDEF(V3D_CLE_CT0CA), + REGDEF(V3D_CLE_CT0EA), + REGDEF(V3D_CLE_CT1CS), + REGDEF(V3D_CLE_CT1CA), + REGDEF(V3D_CLE_CT1EA), + + REGDEF(V3D_PTB_BPCA), + REGDEF(V3D_PTB_BPCS), + + REGDEF(V3D_MMU_CTL), + REGDEF(V3D_MMU_VIO_ADDR), + + REGDEF(V3D_GMP_STATUS), + REGDEF(V3D_GMP_CFG), + REGDEF(V3D_GMP_VIO_ADDR), +}; + +static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + int i, core; + + for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg, + V3D_READ(v3d_hub_reg_defs[i].reg)); + } + + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { + seq_printf(m, "%s (0x%04x): 0x%08x\n", + v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg, + V3D_GCA_READ(v3d_gca_reg_defs[i].reg)); + } + + for (core = 0; core < v3d->cores; core++) { + for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) { + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", + core, + v3d_core_reg_defs[i].name, + v3d_core_reg_defs[i].reg, + V3D_CORE_READ(core, + v3d_core_reg_defs[i].reg)); + } + } + + return 0; +} + +static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 ident0, ident1, ident2, ident3, cores; + int ret, core; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) + return ret; + + ident0 = V3D_READ(V3D_HUB_IDENT0); + ident1 = V3D_READ(V3D_HUB_IDENT1); + ident2 = V3D_READ(V3D_HUB_IDENT2); + ident3 = V3D_READ(V3D_HUB_IDENT3); + cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + + seq_printf(m, "Revision: %d.%d.%d.%d\n", + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER), + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV), + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX)); + seq_printf(m, "MMU: %s\n", + (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no"); + seq_printf(m, "TFU: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no"); + seq_printf(m, "TSY: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no"); + seq_printf(m, "MSO: %s\n", + (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no"); + seq_printf(m, "L3C: %s (%dkb)\n", + (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no", + V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB)); + + for (core = 0; core < cores; core++) { + u32 misccfg; + u32 nslc, ntmu, qups; + + ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0); + ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1); + ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2); + misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG); + + nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC); + ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU); + qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS); + + seq_printf(m, "Core %d:\n", core); + seq_printf(m, " Revision: %d.%d\n", + V3D_GET_FIELD(ident0, V3D_IDENT0_VER), + V3D_GET_FIELD(ident1, V3D_IDENT1_REV)); + seq_printf(m, " Slices: %d\n", nslc); + seq_printf(m, " TMUs: %d\n", nslc * ntmu); + seq_printf(m, " QPUs: %d\n", nslc * qups); + seq_printf(m, " Semaphores: %d\n", + V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); + seq_printf(m, " BCG int: %d\n", + (ident2 & V3D_IDENT2_BCG_INT) != 0); + seq_printf(m, " Override TMU: %d\n", + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + return 0; +} + +static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused) +{ + struct drm_info_node *node = (struct drm_info_node *)m->private; + struct drm_device *dev = node->minor->dev; + struct v3d_dev *v3d = to_v3d_dev(dev); + + mutex_lock(&v3d->bo_lock); + seq_printf(m, "allocated bos: %d\n", + v3d->bo_stats.num_allocated); + seq_printf(m, "allocated bo size (kb): %ld\n", + (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10)); + mutex_unlock(&v3d->bo_lock); + + return 0; +} + +static const struct drm_info_list v3d_debugfs_list[] = { + {"v3d_ident", v3d_v3d_debugfs_ident, 0}, + {"v3d_regs", v3d_v3d_debugfs_regs, 0}, + {"bo_stats", v3d_debugfs_bo_stats, 0}, +}; + +int +v3d_debugfs_init(struct drm_minor *minor) +{ + return drm_debugfs_create_files(v3d_debugfs_list, + ARRAY_SIZE(v3d_debugfs_list), + minor->debugfs_root, minor); +} diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c new file mode 100644 index 0000000..cdb5820 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.c @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Broadcom V3D Graphics Driver + * + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs. + * For V3D 2.x support, see the VC4 driver. + * + * Currently only single-core rendering using the binner and renderer + * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD + * (compute shader dispatch) are not yet supported. + */ + +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/module.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <drm/drm_fb_cma_helper.h> +#include <drm/drm_fb_helper.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define DRIVER_NAME "v3d" +#define DRIVER_DESC "Broadcom V3D graphics" +#define DRIVER_DATE "20180419" +#define DRIVER_MAJOR 1 +#define DRIVER_MINOR 0 +#define DRIVER_PATCHLEVEL 0 + +#ifdef CONFIG_PM +static int v3d_runtime_suspend(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + v3d_irq_disable(v3d); + + clk_disable_unprepare(v3d->clk); + + return 0; +} + +static int v3d_runtime_resume(struct device *dev) +{ + struct drm_device *drm = dev_get_drvdata(dev); + struct v3d_dev *v3d = to_v3d_dev(drm); + int ret; + + ret = clk_prepare_enable(v3d->clk); + if (ret != 0) + return ret; + + /* XXX: VPM base */ + + v3d_mmu_set_page_table(v3d); + v3d_irq_enable(v3d); + + return 0; +} +#endif + +static const struct dev_pm_ops v3d_v3d_pm_ops = { + SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL) +}; + +static int v3d_get_param_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct drm_v3d_get_param *args = data; + int ret; + static const u32 reg_map[] = { + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG, + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1, + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2, + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3, + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0, + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1, + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2, + }; + + if (args->pad != 0) + return -EINVAL; + + /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need + * to explicitly allow it in the "the register in our + * parameter map" check. + */ + if (args->param < ARRAY_SIZE(reg_map) && + (reg_map[args->param] || + args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) { + u32 offset = reg_map[args->param]; + + if (args->value != 0) + return -EINVAL; + + ret = pm_runtime_get_sync(v3d->dev); + if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 && + args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) { + args->value = V3D_CORE_READ(0, offset); + } else { + args->value = V3D_READ(offset); + } + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + return 0; + } + + /* Any params that aren't just register reads would go here. */ + + DRM_DEBUG("Unknown parameter %d\n", args->param); + return -EINVAL; +} + +static int +v3d_open(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv; + int i; + + v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); + if (!v3d_priv) + return -ENOMEM; + + v3d_priv->v3d = v3d; + + for (i = 0; i < V3D_MAX_QUEUES; i++) { + drm_sched_entity_init(&v3d->queue[i].sched, + &v3d_priv->sched_entity[i], + &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], + NULL); + } + + file->driver_priv = v3d_priv; + + return 0; +} + +static void +v3d_postclose(struct drm_device *dev, struct drm_file *file) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file->driver_priv; + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_entity_fini(&v3d->queue[q].sched, + &v3d_priv->sched_entity[q]); + } + + kfree(v3d_priv); +} + +static const struct file_operations v3d_drm_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, + .mmap = v3d_mmap, + .poll = drm_poll, + .read = drm_read, + .compat_ioctl = drm_compat_ioctl, + .llseek = noop_llseek, +}; + +/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP + * protection between clients. Note that render nodes would be be + * able to submit CLs that could access BOs from clients authenticated + * with the master node. + */ +static const struct drm_ioctl_desc v3d_drm_ioctls[] = { + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), + DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), +}; + +static const struct vm_operations_struct v3d_vm_ops = { + .fault = v3d_gem_fault, + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static struct drm_driver v3d_drm_driver = { + .driver_features = (DRIVER_GEM | + DRIVER_RENDER | + DRIVER_PRIME | + DRIVER_SYNCOBJ), + + .open = v3d_open, + .postclose = v3d_postclose, + +#if defined(CONFIG_DEBUG_FS) + .debugfs_init = v3d_debugfs_init, +#endif + + .gem_free_object_unlocked = v3d_free_object, + .gem_vm_ops = &v3d_vm_ops, + + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_res_obj = v3d_prime_res_obj, + .gem_prime_get_sg_table = v3d_prime_get_sg_table, + .gem_prime_import_sg_table = v3d_prime_import_sg_table, + .gem_prime_mmap = v3d_prime_mmap, + + .ioctls = v3d_drm_ioctls, + .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), + .fops = &v3d_drm_fops, + + .name = DRIVER_NAME, + .desc = DRIVER_DESC, + .date = DRIVER_DATE, + .major = DRIVER_MAJOR, + .minor = DRIVER_MINOR, + .patchlevel = DRIVER_PATCHLEVEL, +}; + +static const struct of_device_id v3d_of_match[] = { + { .compatible = "brcm,7268-v3d" }, + { .compatible = "brcm,7278-v3d" }, + {}, +}; +MODULE_DEVICE_TABLE(of, v3d_of_match); + +static int +map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) +{ + struct resource *res = + platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name); + + *regs = devm_ioremap_resource(v3d->dev, res); + return PTR_ERR_OR_ZERO(*regs); +} + +static int v3d_platform_drm_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct drm_device *drm; + struct v3d_dev *v3d; + int ret; + u32 ident1; + + dev->coherent_dma_mask = DMA_BIT_MASK(36); + + v3d = kzalloc(sizeof(*v3d), GFP_KERNEL); + if (!v3d) + return -ENOMEM; + v3d->dev = dev; + v3d->pdev = pdev; + drm = &v3d->drm; + + ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->hub_regs, "hub"); + if (ret) + goto dev_free; + + ret = map_regs(v3d, &v3d->core_regs[0], "core0"); + if (ret) + goto dev_free; + + ident1 = V3D_READ(V3D_HUB_IDENT1); + v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); + WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ + + if (v3d->ver < 41) { + ret = map_regs(v3d, &v3d->gca_regs, "gca"); + if (ret) + goto dev_free; + } + + v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->mmu_scratch) { + dev_err(dev, "Failed to allocate MMU scratch page\n"); + ret = -ENOMEM; + goto dev_free; + } + + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_enable(dev); + + ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev); + if (ret) + goto dma_free; + + platform_set_drvdata(pdev, drm); + drm->dev_private = v3d; + + ret = v3d_gem_init(drm); + if (ret) + goto dev_destroy; + + v3d_irq_init(v3d); + + ret = drm_dev_register(drm, 0); + if (ret) + goto gem_destroy; + + return 0; + +gem_destroy: + v3d_gem_destroy(drm); +dev_destroy: + drm_dev_put(drm); +dma_free: + dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); +dev_free: + kfree(v3d); + return ret; +} + +static int v3d_platform_drm_remove(struct platform_device *pdev) +{ + struct drm_device *drm = platform_get_drvdata(pdev); + struct v3d_dev *v3d = to_v3d_dev(drm); + + drm_dev_unregister(drm); + + v3d_gem_destroy(drm); + + drm_dev_put(drm); + + dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); + + return 0; +} + +static struct platform_driver v3d_platform_driver = { + .probe = v3d_platform_drm_probe, + .remove = v3d_platform_drm_remove, + .driver = { + .name = "v3d", + .of_match_table = v3d_of_match, + }, +}; + +static int __init v3d_drm_register(void) +{ + return platform_driver_register(&v3d_platform_driver); +} + +static void __exit v3d_drm_unregister(void) +{ + platform_driver_unregister(&v3d_platform_driver); +} + +module_init(v3d_drm_register); +module_exit(v3d_drm_unregister); + +MODULE_ALIAS("platform:v3d-drm"); +MODULE_DESCRIPTION("Broadcom V3D DRM Driver"); +MODULE_AUTHOR("Eric Anholt <eric@anholt.net>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h new file mode 100644 index 0000000..a043ac3 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -0,0 +1,294 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#include <linux/reservation.h> +#include <drm/drmP.h> +#include <drm/drm_encoder.h> +#include <drm/drm_gem.h> +#include <drm/gpu_scheduler.h> + +#define GMP_GRANULARITY (128 * 1024) + +/* Enum for each of the V3D queues. We maintain various queue + * tracking as an array because at some point we'll want to support + * the TFU (texture formatting unit) as another queue. + */ +enum v3d_queue { + V3D_BIN, + V3D_RENDER, +}; + +#define V3D_MAX_QUEUES (V3D_RENDER + 1) + +struct v3d_queue_state { + struct drm_gpu_scheduler sched; + + u64 fence_context; + u64 emit_seqno; + u64 finished_seqno; +}; + +struct v3d_dev { + struct drm_device drm; + + /* Short representation (e.g. 33, 41) of the V3D tech version + * and revision. + */ + int ver; + + struct device *dev; + struct platform_device *pdev; + void __iomem *hub_regs; + void __iomem *core_regs[3]; + void __iomem *bridge_regs; + void __iomem *gca_regs; + struct clk *clk; + + /* Virtual and DMA addresses of the single shared page table. */ + volatile u32 *pt; + dma_addr_t pt_paddr; + + /* Virtual and DMA addresses of the MMU's scratch page. When + * a read or write is invalid in the MMU, it will be + * redirected here. + */ + void *mmu_scratch; + dma_addr_t mmu_scratch_paddr; + + /* Number of V3D cores. */ + u32 cores; + + /* Allocator managing the address space. All units are in + * number of pages. + */ + struct drm_mm mm; + spinlock_t mm_lock; + + struct work_struct overflow_mem_work; + + struct v3d_exec_info *bin_job; + struct v3d_exec_info *render_job; + + struct v3d_queue_state queue[V3D_MAX_QUEUES]; + + /* Spinlock used to synchronize the overflow memory + * management against bin job submission. + */ + spinlock_t job_lock; + + /* Protects bo_stats */ + struct mutex bo_lock; + + /* Lock taken when resetting the GPU, to keep multiple + * processes from trying to park the scheduler threads and + * reset at once. + */ + struct mutex reset_lock; + + struct { + u32 num_allocated; + u32 pages_allocated; + } bo_stats; +}; + +static inline struct v3d_dev * +to_v3d_dev(struct drm_device *dev) +{ + return (struct v3d_dev *)dev->dev_private; +} + +/* The per-fd struct, which tracks the MMU mappings. */ +struct v3d_file_priv { + struct v3d_dev *v3d; + + struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; +}; + +/* Tracks a mapping of a BO into a per-fd address space */ +struct v3d_vma { + struct v3d_page_table *pt; + struct list_head list; /* entry in v3d_bo.vmas */ +}; + +struct v3d_bo { + struct drm_gem_object base; + + struct mutex lock; + + struct drm_mm_node node; + + u32 pages_refcount; + struct page **pages; + struct sg_table *sgt; + void *vaddr; + + struct list_head vmas; /* list of v3d_vma */ + + /* List entry for the BO's position in + * v3d_exec_info->unref_list + */ + struct list_head unref_head; + + /* normally (resv == &_resv) except for imported bo's */ + struct reservation_object *resv; + struct reservation_object _resv; +}; + +static inline struct v3d_bo * +to_v3d_bo(struct drm_gem_object *bo) +{ + return (struct v3d_bo *)bo; +} + +struct v3d_fence { + struct dma_fence base; + struct drm_device *dev; + /* v3d seqno for signaled() test */ + u64 seqno; + enum v3d_queue queue; +}; + +static inline struct v3d_fence * +to_v3d_fence(struct dma_fence *fence) +{ + return (struct v3d_fence *)fence; +} + +#define V3D_READ(offset) readl(v3d->hub_regs + offset) +#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) + +#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) +#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) + +#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) +#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) + +#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) +#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) + +struct v3d_job { + struct drm_sched_job base; + + struct v3d_exec_info *exec; + + /* An optional fence userspace can pass in for the job to depend on. */ + struct dma_fence *in_fence; + + /* v3d fence to be signaled by IRQ handler when the job is complete. */ + struct dma_fence *done_fence; + + /* GPU virtual addresses of the start/end of the CL job. */ + u32 start, end; +}; + +struct v3d_exec_info { + struct v3d_dev *v3d; + + struct v3d_job bin, render; + + /* Fence for when the scheduler considers the binner to be + * done, for render to depend on. + */ + struct dma_fence *bin_done_fence; + + struct kref refcount; + + /* This is the array of BOs that were looked up at the start of exec. */ + struct v3d_bo **bo; + u32 bo_count; + + /* List of overflow BOs used in the job that need to be + * released once the job is complete. + */ + struct list_head unref_list; + + /* Submitted tile memory allocation start/size, tile state. */ + u32 qma, qms, qts; +}; + +/** + * _wait_for - magic (register) wait macro + * + * Does the right thing for modeset paths when run under kdgb or similar atomic + * contexts. Note that it's important that we check the condition again after + * having timed out, since the timeout could be due to preemption or similar and + * we've never had a chance to check the condition before the timeout. + */ +#define wait_for(COND, MS) ({ \ + unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ + int ret__ = 0; \ + while (!(COND)) { \ + if (time_after(jiffies, timeout__)) { \ + if (!(COND)) \ + ret__ = -ETIMEDOUT; \ + break; \ + } \ + msleep(1); \ + } \ + ret__; \ +}) + +static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) +{ + /* nsecs_to_jiffies64() does not guard against overflow */ + if (NSEC_PER_SEC % HZ && + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) + return MAX_JIFFY_OFFSET; + + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); +} + +/* v3d_bo.c */ +void v3d_free_object(struct drm_gem_object *gem_obj); +struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, + size_t size); +int v3d_create_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_gem_fault(struct vm_fault *vmf); +int v3d_mmap(struct file *filp, struct vm_area_struct *vma); +struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); +int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); +struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +/* v3d_debugfs.c */ +int v3d_debugfs_init(struct drm_minor *minor); + +/* v3d_fence.c */ +extern const struct dma_fence_ops v3d_fence_ops; +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); + +/* v3d_gem.c */ +int v3d_gem_init(struct drm_device *dev); +void v3d_gem_destroy(struct drm_device *dev); +int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv); +void v3d_exec_put(struct v3d_exec_info *exec); +void v3d_reset(struct v3d_dev *v3d); +void v3d_invalidate_caches(struct v3d_dev *v3d); +void v3d_flush_caches(struct v3d_dev *v3d); + +/* v3d_irq.c */ +void v3d_irq_init(struct v3d_dev *v3d); +void v3d_irq_enable(struct v3d_dev *v3d); +void v3d_irq_disable(struct v3d_dev *v3d); +void v3d_irq_reset(struct v3d_dev *v3d); + +/* v3d_mmu.c */ +int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, + u32 *offset); +int v3d_mmu_set_page_table(struct v3d_dev *v3d); +void v3d_mmu_insert_ptes(struct v3d_bo *bo); +void v3d_mmu_remove_ptes(struct v3d_bo *bo); + +/* v3d_sched.c */ +int v3d_sched_init(struct v3d_dev *v3d); +void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c new file mode 100644 index 0000000..087d49c --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_fence.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#include "v3d_drv.h" + +struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) +{ + struct v3d_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->dev = &v3d->drm; + fence->queue = queue; + fence->seqno = ++v3d->queue[queue].emit_seqno; + dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, + v3d->queue[queue].fence_context, fence->seqno); + + return &fence->base; +} + +static const char *v3d_fence_get_driver_name(struct dma_fence *fence) +{ + return "v3d"; +} + +static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + + if (f->queue == V3D_BIN) + return "v3d-bin"; + else + return "v3d-render"; +} + +static bool v3d_fence_enable_signaling(struct dma_fence *fence) +{ + return true; +} + +static bool v3d_fence_signaled(struct dma_fence *fence) +{ + struct v3d_fence *f = to_v3d_fence(fence); + struct v3d_dev *v3d = to_v3d_dev(f->dev); + + return v3d->queue[f->queue].finished_seqno >= f->seqno; +} + +const struct dma_fence_ops v3d_fence_ops = { + .get_driver_name = v3d_fence_get_driver_name, + .get_timeline_name = v3d_fence_get_timeline_name, + .enable_signaling = v3d_fence_enable_signaling, + .signaled = v3d_fence_signaled, + .wait = dma_fence_default_wait, + .release = dma_fence_free, +}; diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c new file mode 100644 index 0000000..b513f91 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_gem.c @@ -0,0 +1,668 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +#include <drm/drmP.h> +#include <drm/drm_syncobj.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/device.h> +#include <linux/io.h> +#include <linux/sched/signal.h> + +#include "uapi/drm/v3d_drm.h" +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static void +v3d_init_core(struct v3d_dev *v3d, int core) +{ + /* Set OVRTMUOUT, which means that the texture sampler uniform + * configuration's tmu output type field is used, instead of + * using the hardware default behavior based on the texture + * type. If you want the default behavior, you can still put + * "2" in the indirect texture state's output_type field. + */ + V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); + + /* Whenever we flush the L2T cache, we always want to flush + * the whole thing. + */ + V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); + V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); +} + +/* Sets invariant state for the HW. */ +static void +v3d_init_hw_state(struct v3d_dev *v3d) +{ + v3d_init_core(v3d, 0); +} + +static void +v3d_idle_axi(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); + + if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & + (V3D_GMP_STATUS_RD_COUNT_MASK | + V3D_GMP_STATUS_WR_COUNT_MASK | + V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { + DRM_ERROR("Failed to wait for safe GMP shutdown\n"); + } +} + +static void +v3d_idle_gca(struct v3d_dev *v3d) +{ + if (v3d->ver >= 41) + return; + + V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); + + if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { + DRM_ERROR("Failed to wait for safe GCA shutdown\n"); + } +} + +static void +v3d_reset_v3d(struct v3d_dev *v3d) +{ + int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); + + if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, + V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); + + /* GFXH-1383: The SW_INIT may cause a stray write to address 0 + * of the unit, so reset it to its power-on value here. + */ + V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); + } else { + WARN_ON_ONCE(V3D_GET_FIELD(version, + V3D_TOP_GR_BRIDGE_MAJOR) != 7); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, + V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); + } + + v3d_init_hw_state(v3d); +} + +void +v3d_reset(struct v3d_dev *v3d) +{ + struct drm_device *dev = &v3d->drm; + + DRM_ERROR("Resetting GPU.\n"); + trace_v3d_reset_begin(dev); + + /* XXX: only needed for safe powerdown, not reset. */ + if (false) + v3d_idle_axi(v3d, 0); + + v3d_idle_gca(v3d); + v3d_reset_v3d(v3d); + + v3d_mmu_set_page_table(v3d); + v3d_irq_reset(v3d); + + trace_v3d_reset_end(dev); +} + +static void +v3d_flush_l3(struct v3d_dev *v3d) +{ + if (v3d->ver < 41) { + u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); + + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); + + if (v3d->ver < 33) { + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, + gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); + } + } +} + +/* Invalidates the (read-only) L2 cache. */ +static void +v3d_invalidate_l2(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, + V3D_L2CACTL_L2CCLR | + V3D_L2CACTL_L2CENA); +} + +static void +v3d_invalidate_l1td(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); + } +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_flush_l2t(struct v3d_dev *v3d, int core) +{ + v3d_invalidate_l1td(v3d, core); + + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T flush\n"); + } +} + +/* Invalidates the slice caches. These are read-only caches. */ +static void +v3d_invalidate_slices(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, + V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | + V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); +} + +/* Invalidates texture L2 cachelines */ +static void +v3d_invalidate_l2t(struct v3d_dev *v3d, int core) +{ + V3D_CORE_WRITE(core, + V3D_CTL_L2TCACTL, + V3D_L2TCACTL_L2TFLS | + V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM)); + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & + V3D_L2TCACTL_L2TFLS), 100)) { + DRM_ERROR("Timeout waiting for L2T invalidate\n"); + } +} + +void +v3d_invalidate_caches(struct v3d_dev *v3d) +{ + v3d_flush_l3(v3d); + + v3d_invalidate_l2(v3d, 0); + v3d_invalidate_slices(v3d, 0); + v3d_flush_l2t(v3d, 0); +} + +void +v3d_flush_caches(struct v3d_dev *v3d) +{ + v3d_invalidate_l1td(v3d, 0); + v3d_invalidate_l2t(v3d, 0); +} + +static void +v3d_attach_object_fences(struct v3d_exec_info *exec) +{ + struct dma_fence *out_fence = &exec->render.base.s_fence->finished; + struct v3d_bo *bo; + int i; + + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + /* XXX: Use shared fences for read-only objects. */ + reservation_object_add_excl_fence(bo->resv, out_fence); + } +} + +static void +v3d_unlock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int i; + + for (i = 0; i < exec->bo_count; i++) { + struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + ww_acquire_fini(acquire_ctx); +} + +/* Takes the reservation lock on all the BOs being referenced, so that + * at queue submit time we can update the reservations. + * + * We don't lock the RCL the tile alloc/state BOs, or overflow memory + * (all of which are on exec->unref_list). They're entirely private + * to v3d, so we don't attach dma-buf fences to them. + */ +static int +v3d_lock_bo_reservations(struct drm_device *dev, + struct v3d_exec_info *exec, + struct ww_acquire_ctx *acquire_ctx) +{ + int contended_lock = -1; + int i, ret; + struct v3d_bo *bo; + + ww_acquire_init(acquire_ctx, &reservation_ww_class); + +retry: + if (contended_lock != -1) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, + acquire_ctx); + if (ret) { + ww_acquire_done(acquire_ctx); + return ret; + } + } + + for (i = 0; i < exec->bo_count; i++) { + if (i == contended_lock) + continue; + + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); + if (ret) { + int j; + + for (j = 0; j < i; j++) { + bo = to_v3d_bo(&exec->bo[j]->base); + ww_mutex_unlock(&bo->resv->lock); + } + + if (contended_lock != -1 && contended_lock >= i) { + bo = to_v3d_bo(&exec->bo[contended_lock]->base); + + ww_mutex_unlock(&bo->resv->lock); + } + + if (ret == -EDEADLK) { + contended_lock = i; + goto retry; + } + + ww_acquire_done(acquire_ctx); + return ret; + } + } + + ww_acquire_done(acquire_ctx); + + /* Reserve space for our shared (read-only) fence references, + * before we commit the CL to the hardware. + */ + for (i = 0; i < exec->bo_count; i++) { + bo = to_v3d_bo(&exec->bo[i]->base); + + ret = reservation_object_reserve_shared(bo->resv); + if (ret) { + v3d_unlock_bo_reservations(dev, exec, acquire_ctx); + return ret; + } + } + + return 0; +} + +/** + * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects + * referenced by the job. + * @dev: DRM device + * @file_priv: DRM file for this fd + * @exec: V3D job being set up + * + * The command validator needs to reference BOs by their index within + * the submitted job's BO list. This does the validation of the job's + * BO list and reference counting for the lifetime of the job. + * + * Note that this function doesn't need to unreference the BOs on + * failure, because that will happen at v3d_exec_cleanup() time. + */ +static int +v3d_cl_lookup_bos(struct drm_device *dev, + struct drm_file *file_priv, + struct drm_v3d_submit_cl *args, + struct v3d_exec_info *exec) +{ + u32 *handles; + int ret = 0; + int i; + + exec->bo_count = args->bo_handle_count; + + if (!exec->bo_count) { + /* See comment on bo_index for why we have to check + * this. + */ + DRM_DEBUG("Rendering requires BOs\n"); + return -EINVAL; + } + + exec->bo = kvmalloc_array(exec->bo_count, + sizeof(struct drm_gem_cma_object *), + GFP_KERNEL | __GFP_ZERO); + if (!exec->bo) { + DRM_DEBUG("Failed to allocate validated BO pointers\n"); + return -ENOMEM; + } + + handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); + if (!handles) { + ret = -ENOMEM; + DRM_DEBUG("Failed to allocate incoming GEM handles\n"); + goto fail; + } + + if (copy_from_user(handles, + (void __user *)(uintptr_t)args->bo_handles, + exec->bo_count * sizeof(u32))) { + ret = -EFAULT; + DRM_DEBUG("Failed to copy in GEM handles\n"); + goto fail; + } + + spin_lock(&file_priv->table_lock); + for (i = 0; i < exec->bo_count; i++) { + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, + handles[i]); + if (!bo) { + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", + i, handles[i]); + ret = -ENOENT; + spin_unlock(&file_priv->table_lock); + goto fail; + } + drm_gem_object_get(bo); + exec->bo[i] = to_v3d_bo(bo); + } + spin_unlock(&file_priv->table_lock); + +fail: + kvfree(handles); + return ret; +} + +static void +v3d_exec_cleanup(struct kref *ref) +{ + struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, + refcount); + struct v3d_dev *v3d = exec->v3d; + unsigned int i; + struct v3d_bo *bo, *save; + + dma_fence_put(exec->bin.in_fence); + dma_fence_put(exec->render.in_fence); + + dma_fence_put(exec->bin.done_fence); + dma_fence_put(exec->render.done_fence); + + dma_fence_put(exec->bin_done_fence); + + for (i = 0; i < exec->bo_count; i++) + drm_gem_object_put_unlocked(&exec->bo[i]->base); + kvfree(exec->bo); + + list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { + drm_gem_object_put_unlocked(&bo->base); + } + + pm_runtime_mark_last_busy(v3d->dev); + pm_runtime_put_autosuspend(v3d->dev); + + kfree(exec); +} + +void v3d_exec_put(struct v3d_exec_info *exec) +{ + kref_put(&exec->refcount, v3d_exec_cleanup); +} + +int +v3d_wait_bo_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + int ret; + struct drm_v3d_wait_bo *args = data; + struct drm_gem_object *gem_obj; + struct v3d_bo *bo; + ktime_t start = ktime_get(); + u64 delta_ns; + unsigned long timeout_jiffies = + nsecs_to_jiffies_timeout(args->timeout_ns); + + if (args->pad != 0) + return -EINVAL; + + gem_obj = drm_gem_object_lookup(file_priv, args->handle); + if (!gem_obj) { + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); + return -EINVAL; + } + bo = to_v3d_bo(gem_obj); + + ret = reservation_object_wait_timeout_rcu(bo->resv, + true, true, + timeout_jiffies); + + if (ret == 0) + ret = -ETIME; + else if (ret > 0) + ret = 0; + + /* Decrement the user's timeout, in case we got interrupted + * such that the ioctl will be restarted. + */ + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); + if (delta_ns < args->timeout_ns) + args->timeout_ns -= delta_ns; + else + args->timeout_ns = 0; + + /* Asked to wait beyond the jiffie/scheduler precision? */ + if (ret == -ETIME && args->timeout_ns) + ret = -EAGAIN; + + drm_gem_object_put_unlocked(gem_obj); + + return ret; +} + +/** + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. + * @dev: DRM device + * @data: ioctl argument + * @file_priv: DRM file for this fd + * + * This is the main entrypoint for userspace to submit a 3D frame to + * the GPU. Userspace provides the binner command list (if + * applicable), and the kernel sets up the render command list to draw + * to the framebuffer described in the ioctl, using the command lists + * that the 3D engine's binner will produce. + */ +int +v3d_submit_cl_ioctl(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; + struct drm_v3d_submit_cl *args = data; + struct v3d_exec_info *exec; + struct ww_acquire_ctx acquire_ctx; + struct drm_syncobj *sync_out; + int ret = 0; + + if (args->pad != 0) { + DRM_INFO("pad must be zero: %d\n", args->pad); + return -EINVAL; + } + + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); + if (!exec) + return -ENOMEM; + + ret = pm_runtime_get_sync(v3d->dev); + if (ret < 0) { + kfree(exec); + return ret; + } + + kref_init(&exec->refcount); + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, + &exec->bin.in_fence); + if (ret == -EINVAL) + goto fail; + + ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, + &exec->render.in_fence); + if (ret == -EINVAL) + goto fail; + + exec->qma = args->qma; + exec->qms = args->qms; + exec->qts = args->qts; + exec->bin.exec = exec; + exec->bin.start = args->bcl_start; + exec->bin.end = args->bcl_end; + exec->render.exec = exec; + exec->render.start = args->rcl_start; + exec->render.end = args->rcl_end; + exec->v3d = v3d; + INIT_LIST_HEAD(&exec->unref_list); + + ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); + if (ret) + goto fail; + + ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); + if (ret) + goto fail; + + if (exec->bin.start != exec->bin.end) { + ret = drm_sched_job_init(&exec->bin.base, + &v3d->queue[V3D_BIN].sched, + &v3d_priv->sched_entity[V3D_BIN], + v3d_priv); + if (ret) + goto fail_unreserve; + + exec->bin_done_fence = + dma_fence_get(&exec->bin.base.s_fence->finished); + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->bin.base, + &v3d_priv->sched_entity[V3D_BIN]); + } + + ret = drm_sched_job_init(&exec->render.base, + &v3d->queue[V3D_RENDER].sched, + &v3d_priv->sched_entity[V3D_RENDER], + v3d_priv); + if (ret) + goto fail_unreserve; + + kref_get(&exec->refcount); /* put by scheduler job completion */ + drm_sched_entity_push_job(&exec->render.base, + &v3d_priv->sched_entity[V3D_RENDER]); + + v3d_attach_object_fences(exec); + + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); + + /* Update the return sync object for the */ + sync_out = drm_syncobj_find(file_priv, args->out_sync); + if (sync_out) { + drm_syncobj_replace_fence(sync_out, + &exec->render.base.s_fence->finished); + drm_syncobj_put(sync_out); + } + + v3d_exec_put(exec); + + return 0; + +fail_unreserve: + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); +fail: + v3d_exec_put(exec); + + return ret; +} + +int +v3d_gem_init(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + u32 pt_size = 4096 * 1024; + int ret, i; + + for (i = 0; i < V3D_MAX_QUEUES; i++) + v3d->queue[i].fence_context = dma_fence_context_alloc(1); + + spin_lock_init(&v3d->mm_lock); + spin_lock_init(&v3d->job_lock); + mutex_init(&v3d->bo_lock); + mutex_init(&v3d->reset_lock); + + /* Note: We don't allocate address 0. Various bits of HW + * treat 0 as special, such as the occlusion query counters + * where 0 means "disabled". + */ + drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); + + v3d->pt = dma_alloc_wc(v3d->dev, pt_size, + &v3d->pt_paddr, + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); + if (!v3d->pt) { + drm_mm_takedown(&v3d->mm); + dev_err(v3d->dev, + "Failed to allocate page tables. " + "Please ensure you have CMA enabled.\n"); + return -ENOMEM; + } + + v3d_init_hw_state(v3d); + v3d_mmu_set_page_table(v3d); + + ret = v3d_sched_init(v3d); + if (ret) { + drm_mm_takedown(&v3d->mm); + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, + v3d->pt_paddr); + } + + return 0; +} + +void +v3d_gem_destroy(struct drm_device *dev) +{ + struct v3d_dev *v3d = to_v3d_dev(dev); + enum v3d_queue q; + + v3d_sched_fini(v3d); + + /* Waiting for exec to finish would need to be done before + * unregistering V3D. + */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + WARN_ON(v3d->queue[q].emit_seqno != + v3d->queue[q].finished_seqno); + } + + drm_mm_takedown(&v3d->mm); + + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr); +} diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c new file mode 100644 index 0000000..77e1fa0 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_irq.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2014-2018 Broadcom */ + +/** + * DOC: Interrupt management for the V3D engine + * + * When we take a binning or rendering flush done interrupt, we need + * to signal the fence for that job so that the scheduler can queue up + * the next one and unblock any waiters. + * + * When we take the binner out of memory interrupt, we need to + * allocate some new memory and pass it to the binner so that the + * current job can make progress. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ + V3D_INT_FLDONE | \ + V3D_INT_FRDONE | \ + V3D_INT_GMPV)) + +#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ + V3D_HUB_INT_MMU_PTI | \ + V3D_HUB_INT_MMU_CAP)) + +static void +v3d_overflow_mem_work(struct work_struct *work) +{ + struct v3d_dev *v3d = + container_of(work, struct v3d_dev, overflow_mem_work); + struct drm_device *dev = &v3d->drm; + struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); + unsigned long irqflags; + + if (IS_ERR(bo)) { + DRM_ERROR("Couldn't allocate binner overflow mem\n"); + return; + } + + /* We lost a race, and our work task came in after the bin job + * completed and exited. This can happen because the HW + * signals OOM before it's fully OOM, so the binner might just + * barely complete. + * + * If we lose the race and our work task comes in after a new + * bin job got scheduled, that's fine. We'll just give them + * some binner pool anyway. + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (!v3d->bin_job) { + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + goto out; + } + + drm_gem_object_get(&bo->base); + list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); + V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size); + +out: + drm_gem_object_put_unlocked(&bo->base); +} + +static irqreturn_t +v3d_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts); + + if (intsts & V3D_INT_OUTOMEM) { + /* Note that the OOM status is edge signaled, so the + * interrupt won't happen again until the we actually + * add more memory. + */ + schedule_work(&v3d->overflow_mem_work); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FLDONE) { + v3d->queue[V3D_BIN].finished_seqno++; + dma_fence_signal(v3d->bin_job->bin.done_fence); + status = IRQ_HANDLED; + } + + if (intsts & V3D_INT_FRDONE) { + v3d->queue[V3D_RENDER].finished_seqno++; + dma_fence_signal(v3d->render_job->render.done_fence); + + status = IRQ_HANDLED; + } + + /* We shouldn't be triggering these if we have GMP in + * always-allowed mode. + */ + if (intsts & V3D_INT_GMPV) + dev_err(v3d->dev, "GMP violation\n"); + + return status; +} + +static irqreturn_t +v3d_hub_irq(int irq, void *arg) +{ + struct v3d_dev *v3d = arg; + u32 intsts; + irqreturn_t status = IRQ_NONE; + + intsts = V3D_READ(V3D_HUB_INT_STS); + + /* Acknowledge the interrupts we're handling here. */ + V3D_WRITE(V3D_HUB_INT_CLR, intsts); + + if (intsts & (V3D_HUB_INT_MMU_WRV | + V3D_HUB_INT_MMU_PTI | + V3D_HUB_INT_MMU_CAP)) { + u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); + u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8; + + dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n", + axi_id, (long long)vio_addr, + ((intsts & V3D_HUB_INT_MMU_WRV) ? + ", write violation" : ""), + ((intsts & V3D_HUB_INT_MMU_PTI) ? + ", pte invalid" : ""), + ((intsts & V3D_HUB_INT_MMU_CAP) ? + ", cap exceeded" : "")); + status = IRQ_HANDLED; + } + + return status; +} + +void +v3d_irq_init(struct v3d_dev *v3d) +{ + int ret, core; + + INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); + + /* Clear any pending interrupts someone might have left around + * for us. + */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0), + v3d_hub_irq, IRQF_SHARED, + "v3d_hub", v3d); + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1), + v3d_irq, IRQF_SHARED, + "v3d_core0", v3d); + if (ret) + dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); + + v3d_irq_enable(v3d); +} + +void +v3d_irq_enable(struct v3d_dev *v3d) +{ + int core; + + /* Enable our set of interrupts, masking out any others. */ + for (core = 0; core < v3d->cores; core++) { + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS); + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS); + } + + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS); + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS); +} + +void +v3d_irq_disable(struct v3d_dev *v3d) +{ + int core; + + /* Disable all interrupts. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); + + /* Clear any pending interrupts we might have left. */ + for (core = 0; core < v3d->cores; core++) + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); + + cancel_work_sync(&v3d->overflow_mem_work); +} + +/** Reinitializes interrupt registers when a GPU reset is performed. */ +void v3d_irq_reset(struct v3d_dev *v3d) +{ + v3d_irq_enable(v3d); +} diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c new file mode 100644 index 0000000..b00f97c --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_mmu.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +/** + * DOC: Broadcom V3D MMU + * + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has + * a single level of page tables for the V3D's 4GB address space to + * map to AXI bus addresses, thus it could need up to 4MB of + * physically contiguous memory to store the PTEs. + * + * Because the 4MB of contiguous memory for page tables is precious, + * and switching between them is expensive, we load all BOs into the + * same 4GB address space. + * + * To protect clients from each other, we should use the GMP to + * quickly mask out (at 128kb granularity) what pages are available to + * each client. This is not yet implemented. + */ + +#include "v3d_drv.h" +#include "v3d_regs.h" + +#define V3D_MMU_PAGE_SHIFT 12 + +/* Note: All PTEs for the 1MB superpage must be filled with the + * superpage bit set. + */ +#define V3D_PTE_SUPERPAGE BIT(31) +#define V3D_PTE_WRITEABLE BIT(29) +#define V3D_PTE_VALID BIT(28) + +static int v3d_mmu_flush_all(struct v3d_dev *v3d) +{ + int ret; + + /* Make sure that another flush isn't already running when we + * start this one. + */ + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) + dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n"); + + V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | + V3D_MMU_CTL_TLB_CLEAR); + + V3D_WRITE(V3D_MMUC_CONTROL, + V3D_MMUC_CONTROL_FLUSH | + V3D_MMUC_CONTROL_ENABLE); + + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & + V3D_MMU_CTL_TLB_CLEARING), 100); + if (ret) { + dev_err(v3d->dev, "TLB clear wait idle failed\n"); + return ret; + } + + ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & + V3D_MMUC_CONTROL_FLUSHING), 100); + if (ret) + dev_err(v3d->dev, "MMUC flush wait idle failed\n"); + + return ret; +} + +int v3d_mmu_set_page_table(struct v3d_dev *v3d) +{ + V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT); + V3D_WRITE(V3D_MMU_CTL, + V3D_MMU_CTL_ENABLE | + V3D_MMU_CTL_PT_INVALID | + V3D_MMU_CTL_PT_INVALID_ABORT | + V3D_MMU_CTL_WRITE_VIOLATION_ABORT | + V3D_MMU_CTL_CAP_EXCEEDED_ABORT); + V3D_WRITE(V3D_MMU_ILLEGAL_ADDR, + (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) | + V3D_MMU_ILLEGAL_ADDR_ENABLE); + V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE); + + return v3d_mmu_flush_all(v3d); +} + +void v3d_mmu_insert_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 page = bo->node.start; + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; + unsigned int count; + struct scatterlist *sgl; + + for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) { + u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT; + u32 pte = page_prot | page_address; + u32 i; + + BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >= + BIT(24)); + + for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++) + v3d->pt[page++] = pte + i; + } + + WARN_ON_ONCE(page - bo->node.start != + bo->base.size >> V3D_MMU_PAGE_SHIFT); + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} + +void v3d_mmu_remove_ptes(struct v3d_bo *bo) +{ + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); + u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT; + u32 page; + + for (page = bo->node.start; page < bo->node.start + npages; page++) + v3d->pt[page] = 0; + + if (v3d_mmu_flush_all(v3d)) + dev_err(v3d->dev, "MMU flush timeout\n"); +} diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h new file mode 100644 index 0000000..fc13282 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_regs.h @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2017-2018 Broadcom */ + +#ifndef V3D_REGS_H +#define V3D_REGS_H + +#include <linux/bitops.h> + +#define V3D_MASK(high, low) ((u32)GENMASK(high, low)) +/* Using the GNU statement expression extension */ +#define V3D_SET_FIELD(value, field) \ + ({ \ + u32 fieldval = (value) << field##_SHIFT; \ + WARN_ON((fieldval & ~field##_MASK) != 0); \ + fieldval & field##_MASK; \ + }) + +#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ + field##_SHIFT) + +/* Hub registers for shared hardware between V3D cores. */ + +#define V3D_HUB_AXICFG 0x00000 +# define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0) +# define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0 +#define V3D_HUB_UIFCFG 0x00004 +#define V3D_HUB_IDENT0 0x00008 + +#define V3D_HUB_IDENT1 0x0000c +# define V3D_HUB_IDENT1_WITH_MSO BIT(19) +# define V3D_HUB_IDENT1_WITH_TSY BIT(18) +# define V3D_HUB_IDENT1_WITH_TFU BIT(17) +# define V3D_HUB_IDENT1_WITH_L3C BIT(16) +# define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12) +# define V3D_HUB_IDENT1_NHOSTS_SHIFT 12 +# define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8) +# define V3D_HUB_IDENT1_NCORES_SHIFT 8 +# define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4) +# define V3D_HUB_IDENT1_REV_SHIFT 4 +# define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0) +# define V3D_HUB_IDENT1_TVER_SHIFT 0 + +#define V3D_HUB_IDENT2 0x00010 +# define V3D_HUB_IDENT2_WITH_MMU BIT(8) +# define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0 + +#define V3D_HUB_IDENT3 0x00014 +# define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8) +# define V3D_HUB_IDENT3_IPREV_SHIFT 8 +# define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0) +# define V3D_HUB_IDENT3_IPIDX_SHIFT 0 + +#define V3D_HUB_INT_STS 0x00050 +#define V3D_HUB_INT_SET 0x00054 +#define V3D_HUB_INT_CLR 0x00058 +#define V3D_HUB_INT_MSK_STS 0x0005c +#define V3D_HUB_INT_MSK_SET 0x00060 +#define V3D_HUB_INT_MSK_CLR 0x00064 +# define V3D_HUB_INT_MMU_WRV BIT(5) +# define V3D_HUB_INT_MMU_PTI BIT(4) +# define V3D_HUB_INT_MMU_CAP BIT(3) +# define V3D_HUB_INT_MSO BIT(2) +# define V3D_HUB_INT_TFUC BIT(1) +# define V3D_HUB_INT_TFUF BIT(0) + +#define V3D_GCA_CACHE_CTRL 0x0000c +# define V3D_GCA_CACHE_CTRL_FLUSH BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN 0x000b0 +# define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0) + +#define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4 +# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3 + +# define V3D_TOP_GR_BRIDGE_REVISION 0x00000 +# define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8) +# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8 +# define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0) +# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0 + +/* 7268 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008 +# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0) +/* 7278 reset reg */ +# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c +# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) + +/* Per-MMU registers. */ + +#define V3D_MMUC_CONTROL 0x01000 +# define V3D_MMUC_CONTROL_CLEAR BIT(3) +# define V3D_MMUC_CONTROL_FLUSHING BIT(2) +# define V3D_MMUC_CONTROL_FLUSH BIT(1) +# define V3D_MMUC_CONTROL_ENABLE BIT(0) + +#define V3D_MMU_CTL 0x01200 +# define V3D_MMU_CTL_CAP_EXCEEDED BIT(27) +# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26) +# define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25) +# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24) +# define V3D_MMU_CTL_PT_INVALID BIT(20) +# define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19) +# define V3D_MMU_CTL_PT_INVALID_INT BIT(18) +# define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17) +# define V3D_MMU_CTL_WRITE_VIOLATION BIT(16) +# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11) +# define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10) +# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9) +# define V3D_MMU_CTL_TLB_CLEARING BIT(7) +# define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3) +# define V3D_MMU_CTL_TLB_CLEAR BIT(2) +# define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1) +# define V3D_MMU_CTL_ENABLE BIT(0) + +#define V3D_MMU_PT_PA_BASE 0x01204 +#define V3D_MMU_HIT 0x01208 +#define V3D_MMU_MISSES 0x0120c +#define V3D_MMU_STALLS 0x01210 + +#define V3D_MMU_ADDR_CAP 0x01214 +# define V3D_MMU_ADDR_CAP_ENABLE BIT(31) +# define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0) +# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0 + +#define V3D_MMU_SHOOT_DOWN 0x01218 +# define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29) +# define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28) +# define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0) +# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0 + +#define V3D_MMU_BYPASS_START 0x0121c +#define V3D_MMU_BYPASS_END 0x01220 + +/* AXI ID of the access that faulted */ +#define V3D_MMU_VIO_ID 0x0122c + +/* Address for illegal PTEs to return */ +#define V3D_MMU_ILLEGAL_ADDR 0x01230 +# define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31) + +/* Address that faulted */ +#define V3D_MMU_VIO_ADDR 0x01234 + +/* Per-V3D-core registers */ + +#define V3D_CTL_IDENT0 0x00000 +# define V3D_IDENT0_VER_MASK V3D_MASK(31, 24) +# define V3D_IDENT0_VER_SHIFT 24 + +#define V3D_CTL_IDENT1 0x00004 +/* Multiples of 1kb */ +# define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28) +# define V3D_IDENT1_VPM_SIZE_SHIFT 28 +# define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16) +# define V3D_IDENT1_NSEM_SHIFT 16 +# define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12) +# define V3D_IDENT1_NTMU_SHIFT 12 +# define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8) +# define V3D_IDENT1_QUPS_SHIFT 8 +# define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4) +# define V3D_IDENT1_NSLC_SHIFT 4 +# define V3D_IDENT1_REV_MASK V3D_MASK(3, 0) +# define V3D_IDENT1_REV_SHIFT 0 + +#define V3D_CTL_IDENT2 0x00008 +# define V3D_IDENT2_BCG_INT BIT(28) + +#define V3D_CTL_MISCCFG 0x00018 +# define V3D_MISCCFG_OVRTMUOUT BIT(0) + +#define V3D_CTL_L2CACTL 0x00020 +# define V3D_L2CACTL_L2CCLR BIT(2) +# define V3D_L2CACTL_L2CDIS BIT(1) +# define V3D_L2CACTL_L2CENA BIT(0) + +#define V3D_CTL_SLCACTL 0x00024 +# define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24) +# define V3D_SLCACTL_TVCCS_SHIFT 24 +# define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16) +# define V3D_SLCACTL_TDCCS_SHIFT 16 +# define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8) +# define V3D_SLCACTL_UCC_SHIFT 8 +# define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0) +# define V3D_SLCACTL_ICC_SHIFT 0 + +#define V3D_CTL_L2TCACTL 0x00030 +# define V3D_L2TCACTL_TMUWCF BIT(8) +# define V3D_L2TCACTL_L2T_NO_WM BIT(4) +# define V3D_L2TCACTL_FLM_FLUSH 0 +# define V3D_L2TCACTL_FLM_CLEAR 1 +# define V3D_L2TCACTL_FLM_CLEAN 2 +# define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1) +# define V3D_L2TCACTL_FLM_SHIFT 1 +# define V3D_L2TCACTL_L2TFLS BIT(0) +#define V3D_CTL_L2TFLSTA 0x00034 +#define V3D_CTL_L2TFLEND 0x00038 + +#define V3D_CTL_INT_STS 0x00050 +#define V3D_CTL_INT_SET 0x00054 +#define V3D_CTL_INT_CLR 0x00058 +#define V3D_CTL_INT_MSK_STS 0x0005c +#define V3D_CTL_INT_MSK_SET 0x00060 +#define V3D_CTL_INT_MSK_CLR 0x00064 +# define V3D_INT_QPU_MASK V3D_MASK(27, 16) +# define V3D_INT_QPU_SHIFT 16 +# define V3D_INT_GMPV BIT(5) +# define V3D_INT_TRFB BIT(4) +# define V3D_INT_SPILLUSE BIT(3) +# define V3D_INT_OUTOMEM BIT(2) +# define V3D_INT_FLDONE BIT(1) +# define V3D_INT_FRDONE BIT(0) + +#define V3D_CLE_CT0CS 0x00100 +#define V3D_CLE_CT1CS 0x00104 +#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n) +#define V3D_CLE_CT0EA 0x00108 +#define V3D_CLE_CT1EA 0x0010c +#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n) +#define V3D_CLE_CT0CA 0x00110 +#define V3D_CLE_CT1CA 0x00114 +#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n) +#define V3D_CLE_CT0RA 0x00118 +#define V3D_CLE_CT1RA 0x0011c +#define V3D_CLE_CT0LC 0x00120 +#define V3D_CLE_CT1LC 0x00124 +#define V3D_CLE_CT0PC 0x00128 +#define V3D_CLE_CT1PC 0x0012c +#define V3D_CLE_PCS 0x00130 +#define V3D_CLE_BFC 0x00134 +#define V3D_CLE_RFC 0x00138 +#define V3D_CLE_TFBC 0x0013c +#define V3D_CLE_TFIT 0x00140 +#define V3D_CLE_CT1CFG 0x00144 +#define V3D_CLE_CT1TILECT 0x00148 +#define V3D_CLE_CT1TSKIP 0x0014c +#define V3D_CLE_CT1PTCT 0x00150 +#define V3D_CLE_CT0SYNC 0x00154 +#define V3D_CLE_CT1SYNC 0x00158 +#define V3D_CLE_CT0QTS 0x0015c +# define V3D_CLE_CT0QTS_ENABLE BIT(1) +#define V3D_CLE_CT0QBA 0x00160 +#define V3D_CLE_CT1QBA 0x00164 +#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n) +#define V3D_CLE_CT0QEA 0x00168 +#define V3D_CLE_CT1QEA 0x0016c +#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n) +#define V3D_CLE_CT0QMA 0x00170 +#define V3D_CLE_CT0QMS 0x00174 +#define V3D_CLE_CT1QCFG 0x00178 +/* If set without ETPROC, entirely skip tiles with no primitives. */ +# define V3D_CLE_QCFG_ETFILT BIT(7) +/* If set with ETFILT, just write the clear color to tiles with no + * primitives. + */ +# define V3D_CLE_QCFG_ETPROC BIT(6) +# define V3D_CLE_QCFG_ETSFLUSH BIT(1) +# define V3D_CLE_QCFG_MCDIS BIT(0) + +#define V3D_PTB_BPCA 0x00300 +#define V3D_PTB_BPCS 0x00304 +#define V3D_PTB_BPOA 0x00308 +#define V3D_PTB_BPOS 0x0030c + +#define V3D_PTB_BXCF 0x00310 +# define V3D_PTB_BXCF_RWORDERDISA BIT(1) +# define V3D_PTB_BXCF_CLIPDISA BIT(0) + +#define V3D_GMP_STATUS 0x00800 +# define V3D_GMP_STATUS_GMPRST BIT(31) +# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24) +# define V3D_GMP_STATUS_WR_COUNT_SHIFT 24 +# define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16) +# define V3D_GMP_STATUS_RD_COUNT_SHIFT 16 +# define V3D_GMP_STATUS_WR_ACTIVE BIT(5) +# define V3D_GMP_STATUS_RD_ACTIVE BIT(4) +# define V3D_GMP_STATUS_CFG_BUSY BIT(3) +# define V3D_GMP_STATUS_CNTOVF BIT(2) +# define V3D_GMP_STATUS_INVPROT BIT(1) +# define V3D_GMP_STATUS_VIO BIT(0) + +#define V3D_GMP_CFG 0x00804 +# define V3D_GMP_CFG_LBURSTEN BIT(3) +# define V3D_GMP_CFG_PGCRSEN BIT() +# define V3D_GMP_CFG_STOP_REQ BIT(1) +# define V3D_GMP_CFG_PROT_ENABLE BIT(0) + +#define V3D_GMP_VIO_ADDR 0x00808 +#define V3D_GMP_VIO_TYPE 0x0080c +#define V3D_GMP_TABLE_ADDR 0x00810 +#define V3D_GMP_CLEAR_LOAD 0x00814 +#define V3D_GMP_PRESERVE_LOAD 0x00818 +#define V3D_GMP_VALID_LINES 0x00820 + +#endif /* V3D_REGS_H */ diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c new file mode 100644 index 0000000..b07bece --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2018 Broadcom */ + +/** + * DOC: Broadcom V3D scheduling + * + * The shared DRM GPU scheduler is used to coordinate submitting jobs + * to the hardware. Each DRM fd (roughly a client process) gets its + * own scheduler entity, which will process jobs in order. The GPU + * scheduler will round-robin between clients to submit the next job. + * + * For simplicity, and in order to keep latency low for interactive + * jobs when bulk background jobs are queued up, we submit a new job + * to the HW only when it has completed the last one, instead of + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use + * v3d_job_dependency() to manage the dependency between bin and + * render, instead of having the clients submit jobs with using the + * HW's semaphores to interlock between them. + */ + +#include <linux/kthread.h> + +#include "v3d_drv.h" +#include "v3d_regs.h" +#include "v3d_trace.h" + +static struct v3d_job * +to_v3d_job(struct drm_sched_job *sched_job) +{ + return container_of(sched_job, struct v3d_job, base); +} + +static void +v3d_job_free(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + + v3d_exec_put(job->exec); +} + +/** + * Returns the fences that the bin job depends on, one by one. + * v3d_job_run() won't be called until all of them have been signaled. + */ +static struct dma_fence * +v3d_job_dependency(struct drm_sched_job *sched_job, + struct drm_sched_entity *s_entity) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct dma_fence *fence; + + fence = job->in_fence; + if (fence) { + job->in_fence = NULL; + return fence; + } + + if (q == V3D_RENDER) { + /* If we had a bin job, the render job definitely depends on + * it. We first have to wait for bin to be scheduled, so that + * its done_fence is created. + */ + fence = exec->bin_done_fence; + if (fence) { + exec->bin_done_fence = NULL; + return fence; + } + } + + /* XXX: Wait on a fence for switching the GMP if necessary, + * and then do so. + */ + + return fence; +} + +static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; + struct v3d_dev *v3d = exec->v3d; + struct drm_device *dev = &v3d->drm; + struct dma_fence *fence; + unsigned long irqflags; + + if (unlikely(job->base.s_fence->finished.error)) + return NULL; + + /* Lock required around bin_job update vs + * v3d_overflow_mem_work(). + */ + spin_lock_irqsave(&v3d->job_lock, irqflags); + if (q == V3D_BIN) { + v3d->bin_job = job->exec; + + /* Clear out the overflow allocation, so we don't + * reuse the overflow attached to a previous job. + */ + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); + } else { + v3d->render_job = job->exec; + } + spin_unlock_irqrestore(&v3d->job_lock, irqflags); + + /* Can we avoid this flush when q==RENDER? We need to be + * careful of scheduling, though -- imagine job0 rendering to + * texture and job1 reading, and them being executed as bin0, + * bin1, render0, render1, so that render1's flush at bin time + * wasn't enough. + */ + v3d_invalidate_caches(v3d); + + fence = v3d_fence_create(v3d, q); + if (!fence) + return fence; + + if (job->done_fence) + dma_fence_put(job->done_fence); + job->done_fence = dma_fence_get(fence); + + trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, + job->start, job->end); + + if (q == V3D_BIN) { + if (exec->qma) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); + } + if (exec->qts) { + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, + V3D_CLE_CT0QTS_ENABLE | + exec->qts); + } + } else { + /* XXX: Set the QCFG */ + } + + /* Set the current and end address of the control list. + * Writing the end register is what starts the job. + */ + V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); + V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); + + return fence; +} + +static void +v3d_job_timedout(struct drm_sched_job *sched_job) +{ + struct v3d_job *job = to_v3d_job(sched_job); + struct v3d_exec_info *exec = job->exec; + struct v3d_dev *v3d = exec->v3d; + enum v3d_queue q; + + mutex_lock(&v3d->reset_lock); + + /* block scheduler */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; + + kthread_park(sched->thread); + drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? + sched_job : NULL)); + } + + /* get the GPU back into the init state */ + v3d_reset(v3d); + + /* Unblock schedulers and restart their jobs. */ + for (q = 0; q < V3D_MAX_QUEUES; q++) { + drm_sched_job_recovery(&v3d->queue[q].sched); + kthread_unpark(v3d->queue[q].sched.thread); + } + + mutex_unlock(&v3d->reset_lock); +} + +static const struct drm_sched_backend_ops v3d_sched_ops = { + .dependency = v3d_job_dependency, + .run_job = v3d_job_run, + .timedout_job = v3d_job_timedout, + .free_job = v3d_job_free +}; + +int +v3d_sched_init(struct v3d_dev *v3d) +{ + int hw_jobs_limit = 1; + int job_hang_limit = 0; + int hang_limit_ms = 500; + int ret; + + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_bin"); + if (ret) { + dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); + return ret; + } + + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, + &v3d_sched_ops, + hw_jobs_limit, job_hang_limit, + msecs_to_jiffies(hang_limit_ms), + "v3d_render"); + if (ret) { + dev_err(v3d->dev, "Failed to create render scheduler: %d.", + ret); + drm_sched_fini(&v3d->queue[V3D_BIN].sched); + return ret; + } + + return 0; +} + +void +v3d_sched_fini(struct v3d_dev *v3d) +{ + enum v3d_queue q; + + for (q = 0; q < V3D_MAX_QUEUES; q++) + drm_sched_fini(&v3d->queue[q].sched); +} diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h new file mode 100644 index 0000000..85dd351 --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015-2018 Broadcom */ + +#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) +#define _V3D_TRACE_H_ + +#include <linux/stringify.h> +#include <linux/types.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM v3d +#define TRACE_INCLUDE_FILE v3d_trace + +TRACE_EVENT(v3d_submit_cl, + TP_PROTO(struct drm_device *dev, bool is_render, + uint64_t seqno, + u32 ctnqba, u32 ctnqea), + TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea), + + TP_STRUCT__entry( + __field(u32, dev) + __field(bool, is_render) + __field(u64, seqno) + __field(u32, ctnqba) + __field(u32, ctnqea) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + __entry->is_render = is_render; + __entry->seqno = seqno; + __entry->ctnqba = ctnqba; + __entry->ctnqea = ctnqea; + ), + + TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x", + __entry->dev, + __entry->is_render ? "RCL" : "BCL", + __entry->seqno, + __entry->ctnqba, + __entry->ctnqea) +); + +TRACE_EVENT(v3d_reset_begin, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +TRACE_EVENT(v3d_reset_end, + TP_PROTO(struct drm_device *dev), + TP_ARGS(dev), + + TP_STRUCT__entry( + __field(u32, dev) + ), + + TP_fast_assign( + __entry->dev = dev->primary->index; + ), + + TP_printk("dev=%u", + __entry->dev) +); + +#endif /* _V3D_TRACE_H_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#include <trace/define_trace.h> diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c new file mode 100644 index 0000000..482922d --- /dev/null +++ b/drivers/gpu/drm/v3d/v3d_trace_points.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* Copyright (C) 2015 Broadcom */ + +#include "v3d_drv.h" + +#ifndef __CHECKER__ +#define CREATE_TRACE_POINTS +#include "v3d_trace.h" +#endif diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c index 2decc8e..add9cc9 100644 --- a/drivers/gpu/drm/vc4/vc4_bo.c +++ b/drivers/gpu/drm/vc4/vc4_bo.c @@ -195,6 +195,7 @@ static void vc4_bo_destroy(struct vc4_bo *bo) vc4_bo_set_label(obj, -1); if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; @@ -591,6 +592,7 @@ void vc4_free_object(struct drm_gem_object *gem_bo) } if (bo->validated_shader) { + kfree(bo->validated_shader->uniform_addr_offsets); kfree(bo->validated_shader->texture_samples); kfree(bo->validated_shader); bo->validated_shader = NULL; diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index bf46674..c8650bb 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -42,51 +42,18 @@ #include "vc4_drv.h" #include "vc4_regs.h" -struct vc4_crtc { - struct drm_crtc base; - const struct vc4_crtc_data *data; - void __iomem *regs; - - /* Timestamp at start of vblank irq - unaffected by lock delays. */ - ktime_t t_vblank; - - /* Which HVS channel we're using for our CRTC. */ - int channel; - - u8 lut_r[256]; - u8 lut_g[256]; - u8 lut_b[256]; - /* Size in pixels of the COB memory allocated to this CRTC. */ - u32 cob_size; - - struct drm_pending_vblank_event *event; -}; - struct vc4_crtc_state { struct drm_crtc_state base; /* Dlist area for this CRTC configuration. */ struct drm_mm_node mm; }; -static inline struct vc4_crtc * -to_vc4_crtc(struct drm_crtc *crtc) -{ - return (struct vc4_crtc *)crtc; -} - static inline struct vc4_crtc_state * to_vc4_crtc_state(struct drm_crtc_state *crtc_state) { return (struct vc4_crtc_state *)crtc_state; } -struct vc4_crtc_data { - /* Which channel of the HVS this pixelvalve sources from. */ - int hvs_channel; - - enum vc4_encoder_type encoder_types[4]; -}; - #define CRTC_WRITE(offset, val) writel(val, vc4_crtc->regs + (offset)) #define CRTC_READ(offset) readl(vc4_crtc->regs + (offset)) @@ -298,23 +265,21 @@ vc4_crtc_lut_load(struct drm_crtc *crtc) HVS_WRITE(SCALER_GAMDATA, vc4_crtc->lut_b[i]); } -static int -vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b, - uint32_t size, - struct drm_modeset_acquire_ctx *ctx) +static void +vc4_crtc_update_gamma_lut(struct drm_crtc *crtc) { struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc); + struct drm_color_lut *lut = crtc->state->gamma_lut->data; + u32 length = drm_color_lut_size(crtc->state->gamma_lut); u32 i; - for (i = 0; i < size; i++) { - vc4_crtc->lut_r[i] = r[i] >> 8; - vc4_crtc->lut_g[i] = g[i] >> 8; - vc4_crtc->lut_b[i] = b[i] >> 8; + for (i = 0; i < length; i++) { + vc4_crtc->lut_r[i] = drm_color_lut_extract(lut[i].red, 8); + vc4_crtc->lut_g[i] = drm_color_lut_extract(lut[i].green, 8); + vc4_crtc->lut_b[i] = drm_color_lut_extract(lut[i].blue, 8); } vc4_crtc_lut_load(crtc); - - return 0; } static u32 vc4_get_fifo_full_level(u32 format) @@ -699,6 +664,22 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc, if (crtc->state->active && old_state->active) vc4_crtc_update_dlist(crtc); + if (crtc->state->color_mgmt_changed) { + u32 dispbkgndx = HVS_READ(SCALER_DISPBKGNDX(vc4_crtc->channel)); + + if (crtc->state->gamma_lut) { + vc4_crtc_update_gamma_lut(crtc); + dispbkgndx |= SCALER_DISPBKGND_GAMMA; + } else { + /* Unsetting DISPBKGND_GAMMA skips the gamma lut step + * in hardware, which is the same as a linear lut that + * DRM expects us to use in absence of a user lut. + */ + dispbkgndx &= ~SCALER_DISPBKGND_GAMMA; + } + HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel), dispbkgndx); + } + if (debug_dump_regs) { DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc)); vc4_hvs_dump_state(dev); @@ -760,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) struct vc4_async_flip_state { struct drm_crtc *crtc; struct drm_framebuffer *fb; + struct drm_framebuffer *old_fb; struct drm_pending_vblank_event *event; struct vc4_seqno_cb cb; @@ -789,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) drm_crtc_vblank_put(crtc); drm_framebuffer_put(flip_state->fb); + + /* Decrement the BO usecnt in order to keep the inc/dec calls balanced + * when the planes are updated through the async update path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + if (flip_state->old_fb) { + struct drm_gem_cma_object *cma_bo; + struct vc4_bo *bo; + + cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0); + bo = to_vc4_bo(&cma_bo->base); + vc4_bo_dec_usecnt(bo); + drm_framebuffer_put(flip_state->old_fb); + } + kfree(flip_state); up(&vc4->async_modeset); @@ -813,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + /* Increment the BO usecnt here, so that we never end up with an + * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the + * plane is later updated through the non-async path. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made prepare_fb() + * logic. + */ + ret = vc4_bo_inc_usecnt(bo); + if (ret) + return ret; + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); - if (!flip_state) + if (!flip_state) { + vc4_bo_dec_usecnt(bo); return -ENOMEM; + } drm_framebuffer_get(fb); flip_state->fb = fb; @@ -826,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc, ret = down_interruptible(&vc4->async_modeset); if (ret) { drm_framebuffer_put(fb); + vc4_bo_dec_usecnt(bo); kfree(flip_state); return ret; } + /* Save the current FB before it's replaced by the new one in + * drm_atomic_set_fb_for_plane(). We'll need the old FB in + * vc4_async_page_flip_complete() to decrement the BO usecnt and keep + * it consistent. + * FIXME: we should move to generic async-page-flip when it's + * available, so that we can get rid of this hand-made cleanup_fb() + * logic. + */ + flip_state->old_fb = plane->state->fb; + if (flip_state->old_fb) + drm_framebuffer_get(flip_state->old_fb); + WARN_ON(drm_crtc_vblank_get(crtc) != 0); /* Immediately update the plane's legacy fb pointer, so that later @@ -909,7 +934,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = { .reset = vc4_crtc_reset, .atomic_duplicate_state = vc4_crtc_duplicate_state, .atomic_destroy_state = vc4_crtc_destroy_state, - .gamma_set = vc4_crtc_gamma_set, + .gamma_set = drm_atomic_helper_legacy_gamma_set, .enable_vblank = vc4_enable_vblank, .disable_vblank = vc4_disable_vblank, }; @@ -1035,6 +1060,12 @@ static int vc4_crtc_bind(struct device *dev, struct device *master, void *data) primary_plane->crtc = crtc; vc4_crtc->channel = vc4_crtc->data->hvs_channel; drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r)); + drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size); + + /* We support CTM, but only for one CRTC at a time. It's therefore + * implemented as private driver state in vc4_kms, not here. + */ + drm_crtc_enable_color_mgmt(crtc, 0, true, crtc->gamma_size); /* Set up some arbitrary number of planes. We're not limited * by a set number of physical registers, just the space in diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 94b99c9..d9b8b70 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -175,7 +175,8 @@ static struct drm_driver vc4_drm_driver = { DRIVER_GEM | DRIVER_HAVE_IRQ | DRIVER_RENDER | - DRIVER_PRIME), + DRIVER_PRIME | + DRIVER_SYNCOBJ), .lastclose = drm_fb_helper_lastclose, .open = vc4_open, .postclose = vc4_close, @@ -318,8 +319,8 @@ dev_unref: static void vc4_drm_unbind(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct drm_device *drm = platform_get_drvdata(pdev); + struct drm_device *drm = dev_get_drvdata(dev); + struct vc4_dev *vc4 = to_vc4_dev(drm); drm_dev_unregister(drm); @@ -327,6 +328,8 @@ static void vc4_drm_unbind(struct device *dev) drm_mode_config_cleanup(drm); + drm_atomic_private_obj_fini(&vc4->ctm_manager); + drm_dev_unref(drm); } diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 1b4cd1f..554a4e8 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -10,6 +10,8 @@ #include <drm/drmP.h> #include <drm/drm_encoder.h> #include <drm/drm_gem_cma_helper.h> +#include <drm/drm_atomic.h> +#include <drm/drm_syncobj.h> #include "uapi/drm/vc4_drm.h" @@ -193,6 +195,9 @@ struct vc4_dev { } hangcheck; struct semaphore async_modeset; + + struct drm_modeset_lock ctm_state_lock; + struct drm_private_obj ctm_manager; }; static inline struct vc4_dev * @@ -392,6 +397,39 @@ to_vc4_encoder(struct drm_encoder *encoder) return container_of(encoder, struct vc4_encoder, base); } +struct vc4_crtc_data { + /* Which channel of the HVS this pixelvalve sources from. */ + int hvs_channel; + + enum vc4_encoder_type encoder_types[4]; +}; + +struct vc4_crtc { + struct drm_crtc base; + const struct vc4_crtc_data *data; + void __iomem *regs; + + /* Timestamp at start of vblank irq - unaffected by lock delays. */ + ktime_t t_vblank; + + /* Which HVS channel we're using for our CRTC. */ + int channel; + + u8 lut_r[256]; + u8 lut_g[256]; + u8 lut_b[256]; + /* Size in pixels of the COB memory allocated to this CRTC. */ + u32 cob_size; + + struct drm_pending_vblank_event *event; +}; + +static inline struct vc4_crtc * +to_vc4_crtc(struct drm_crtc *crtc) +{ + return (struct vc4_crtc *)crtc; +} + #define V3D_READ(offset) readl(vc4->v3d->regs + offset) #define V3D_WRITE(offset, val) writel(val, vc4->v3d->regs + offset) #define HVS_READ(offset) readl(vc4->hvs->regs + offset) diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 94085f8..8aa8978 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -753,6 +753,11 @@ static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps) (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) | (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0)); int ret; + bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) & + DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS)); + + if (ulps == ulps_currently_enabled) + return; DSI_PORT_WRITE(STAT, stat_ulps); DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps); diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 2107b0d..7910b9a 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -27,6 +27,7 @@ #include <linux/device.h> #include <linux/io.h> #include <linux/sched/signal.h> +#include <linux/dma-fence-array.h> #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" @@ -655,7 +656,8 @@ retry: */ static int vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, - struct ww_acquire_ctx *acquire_ctx) + struct ww_acquire_ctx *acquire_ctx, + struct drm_syncobj *out_sync) { struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_exec_info *renderjob; @@ -678,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec, fence->seqno = exec->seqno; exec->fence = &fence->base; + if (out_sync) + drm_syncobj_replace_fence(out_sync, exec->fence); + vc4_update_bo_seqnos(exec, seqno); vc4_unlock_bo_reservations(dev, exec, acquire_ctx); @@ -1113,8 +1118,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, struct vc4_dev *vc4 = to_vc4_dev(dev); struct vc4_file *vc4file = file_priv->driver_priv; struct drm_vc4_submit_cl *args = data; + struct drm_syncobj *out_sync = NULL; struct vc4_exec_info *exec; struct ww_acquire_ctx acquire_ctx; + struct dma_fence *in_fence; int ret = 0; if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR | @@ -1126,7 +1133,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } if (args->pad2 != 0) { - DRM_DEBUG("->pad2 must be set to zero\n"); + DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2); return -EINVAL; } @@ -1164,6 +1171,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, } } + if (args->in_sync) { + ret = drm_syncobj_find_fence(file_priv, args->in_sync, + &in_fence); + if (ret) + goto fail; + + /* When the fence (or fence array) is exclusively from our + * context we can skip the wait since jobs are executed in + * order of their submission through this ioctl and this can + * only have fences from a prior job. + */ + if (!dma_fence_match_context(in_fence, + vc4->dma_fence_context)) { + ret = dma_fence_wait(in_fence, true); + if (ret) { + dma_fence_put(in_fence); + goto fail; + } + } + + dma_fence_put(in_fence); + } + if (exec->args->bin_cl_size != 0) { ret = vc4_get_bcl(dev, exec); if (ret) @@ -1181,12 +1211,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data, if (ret) goto fail; + if (args->out_sync) { + out_sync = drm_syncobj_find(file_priv, args->out_sync); + if (!out_sync) { + ret = -EINVAL; + goto fail; + } + + /* We replace the fence in out_sync in vc4_queue_submit since + * the render job could execute immediately after that call. + * If it finishes before our ioctl processing resumes the + * render job fence could already have been freed. + */ + } + /* Clear this out of the struct we'll be putting in the queue, * since it's part of our stack. */ exec->args = NULL; - ret = vc4_queue_submit(dev, exec, &acquire_ctx); + ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync); + + /* The syncobj isn't part of the exec data and we need to free our + * reference even if job submission failed. + */ + if (out_sync) + drm_syncobj_put(out_sync); + if (ret) goto fail; diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 1a6db29..b8d5053 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -995,15 +995,17 @@ static const struct snd_soc_dapm_route vc4_hdmi_audio_routes[] = { { "TX", NULL, "Playback" }, }; -static const struct snd_soc_codec_driver vc4_hdmi_audio_codec_drv = { - .component_driver = { - .controls = vc4_hdmi_audio_controls, - .num_controls = ARRAY_SIZE(vc4_hdmi_audio_controls), - .dapm_widgets = vc4_hdmi_audio_widgets, - .num_dapm_widgets = ARRAY_SIZE(vc4_hdmi_audio_widgets), - .dapm_routes = vc4_hdmi_audio_routes, - .num_dapm_routes = ARRAY_SIZE(vc4_hdmi_audio_routes), - }, +static const struct snd_soc_component_driver vc4_hdmi_audio_component_drv = { + .controls = vc4_hdmi_audio_controls, + .num_controls = ARRAY_SIZE(vc4_hdmi_audio_controls), + .dapm_widgets = vc4_hdmi_audio_widgets, + .num_dapm_widgets = ARRAY_SIZE(vc4_hdmi_audio_widgets), + .dapm_routes = vc4_hdmi_audio_routes, + .num_dapm_routes = ARRAY_SIZE(vc4_hdmi_audio_routes), + .idle_bias_on = 1, + .use_pmdown_time = 1, + .endianness = 1, + .non_legacy_dai_naming = 1, }; static const struct snd_soc_dai_ops vc4_hdmi_audio_dai_ops = { @@ -1101,11 +1103,11 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi) return ret; } - /* register codec and codec dai */ - ret = snd_soc_register_codec(dev, &vc4_hdmi_audio_codec_drv, + /* register component and codec dai */ + ret = devm_snd_soc_register_component(dev, &vc4_hdmi_audio_component_drv, &vc4_hdmi_audio_codec_dai_drv, 1); if (ret) { - dev_err(dev, "Could not register codec: %d\n", ret); + dev_err(dev, "Could not register component: %d\n", ret); return ret; } @@ -1130,29 +1132,11 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *hdmi) */ snd_soc_card_set_drvdata(card, hdmi); ret = devm_snd_soc_register_card(dev, card); - if (ret) { + if (ret) dev_err(dev, "Could not register sound card: %d\n", ret); - goto unregister_codec; - } - - return 0; - -unregister_codec: - snd_soc_unregister_codec(dev); return ret; -} - -static void vc4_hdmi_audio_cleanup(struct vc4_hdmi *hdmi) -{ - struct device *dev = &hdmi->pdev->dev; - /* - * If drvdata is not set this means the audio card was not - * registered, just skip codec unregistration in this case. - */ - if (dev_get_drvdata(dev)) - snd_soc_unregister_codec(dev); } #ifdef CONFIG_DRM_VC4_HDMI_CEC @@ -1480,7 +1464,6 @@ static void vc4_hdmi_unbind(struct device *dev, struct device *master, struct vc4_dev *vc4 = drm->dev_private; struct vc4_hdmi *hdmi = vc4->hdmi; - vc4_hdmi_audio_cleanup(hdmi); cec_unregister_adapter(hdmi->cec_adap); vc4_hdmi_connector_destroy(hdmi->connector); vc4_hdmi_encoder_destroy(hdmi->encoder); diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c index 2b62fc5..5d8c749 100644 --- a/drivers/gpu/drm/vc4/vc4_hvs.c +++ b/drivers/gpu/drm/vc4/vc4_hvs.c @@ -58,6 +58,10 @@ static const struct { HVS_REG(SCALER_DISPSTAT2), HVS_REG(SCALER_DISPBASE2), HVS_REG(SCALER_DISPALPHA2), + HVS_REG(SCALER_OLEDOFFS), + HVS_REG(SCALER_OLEDCOEF0), + HVS_REG(SCALER_OLEDCOEF1), + HVS_REG(SCALER_OLEDCOEF2), }; void vc4_hvs_dump_state(struct drm_device *dev) diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index ba60153..8a411e5 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -23,6 +23,117 @@ #include <drm/drm_fb_cma_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include "vc4_drv.h" +#include "vc4_regs.h" + +struct vc4_ctm_state { + struct drm_private_state base; + struct drm_color_ctm *ctm; + int fifo; +}; + +static struct vc4_ctm_state *to_vc4_ctm_state(struct drm_private_state *priv) +{ + return container_of(priv, struct vc4_ctm_state, base); +} + +static struct vc4_ctm_state *vc4_get_ctm_state(struct drm_atomic_state *state, + struct drm_private_obj *manager) +{ + struct drm_device *dev = state->dev; + struct vc4_dev *vc4 = dev->dev_private; + struct drm_private_state *priv_state; + int ret; + + ret = drm_modeset_lock(&vc4->ctm_state_lock, state->acquire_ctx); + if (ret) + return ERR_PTR(ret); + + priv_state = drm_atomic_get_private_obj_state(state, manager); + if (IS_ERR(priv_state)) + return ERR_CAST(priv_state); + + return to_vc4_ctm_state(priv_state); +} + +static struct drm_private_state * +vc4_ctm_duplicate_state(struct drm_private_obj *obj) +{ + struct vc4_ctm_state *state; + + state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); + if (!state) + return NULL; + + __drm_atomic_helper_private_obj_duplicate_state(obj, &state->base); + + return &state->base; +} + +static void vc4_ctm_destroy_state(struct drm_private_obj *obj, + struct drm_private_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(state); + + kfree(ctm_state); +} + +static const struct drm_private_state_funcs vc4_ctm_state_funcs = { + .atomic_duplicate_state = vc4_ctm_duplicate_state, + .atomic_destroy_state = vc4_ctm_destroy_state, +}; + +/* Converts a DRM S31.32 value to the HW S0.9 format. */ +static u16 vc4_ctm_s31_32_to_s0_9(u64 in) +{ + u16 r; + + /* Sign bit. */ + r = in & BIT_ULL(63) ? BIT(9) : 0; + + if ((in & GENMASK_ULL(62, 32)) > 0) { + /* We have zero integer bits so we can only saturate here. */ + r |= GENMASK(8, 0); + } else { + /* Otherwise take the 9 most important fractional bits. */ + r |= (in >> 23) & GENMASK(8, 0); + } + + return r; +} + +static void +vc4_ctm_commit(struct vc4_dev *vc4, struct drm_atomic_state *state) +{ + struct vc4_ctm_state *ctm_state = to_vc4_ctm_state(vc4->ctm_manager.state); + struct drm_color_ctm *ctm = ctm_state->ctm; + + if (ctm_state->fifo) { + HVS_WRITE(SCALER_OLEDCOEF2, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[0]), + SCALER_OLEDCOEF2_R_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[3]), + SCALER_OLEDCOEF2_R_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[6]), + SCALER_OLEDCOEF2_R_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF1, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[1]), + SCALER_OLEDCOEF1_G_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[4]), + SCALER_OLEDCOEF1_G_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[7]), + SCALER_OLEDCOEF1_G_TO_B)); + HVS_WRITE(SCALER_OLEDCOEF0, + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[2]), + SCALER_OLEDCOEF0_B_TO_R) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[5]), + SCALER_OLEDCOEF0_B_TO_G) | + VC4_SET_FIELD(vc4_ctm_s31_32_to_s0_9(ctm->matrix[8]), + SCALER_OLEDCOEF0_B_TO_B)); + } + + HVS_WRITE(SCALER_OLEDOFFS, + VC4_SET_FIELD(ctm_state->fifo, SCALER_OLEDOFFS_DISPFIFO)); +} static void vc4_atomic_complete_commit(struct drm_atomic_state *state) @@ -36,6 +147,8 @@ vc4_atomic_complete_commit(struct drm_atomic_state *state) drm_atomic_helper_commit_modeset_disables(dev, state); + vc4_ctm_commit(vc4, state); + drm_atomic_helper_commit_planes(dev, state, 0); drm_atomic_helper_commit_modeset_enables(dev, state); @@ -90,6 +203,26 @@ static int vc4_atomic_commit(struct drm_device *dev, struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + if (state->async_update) { + ret = down_interruptible(&vc4->async_modeset); + if (ret) + return ret; + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) { + up(&vc4->async_modeset); + return ret; + } + + drm_atomic_helper_async_commit(dev, state); + + drm_atomic_helper_cleanup_planes(dev, state); + + up(&vc4->async_modeset); + + return 0; + } + ret = drm_atomic_helper_setup_commit(state, nonblock); if (ret) return ret; @@ -187,9 +320,89 @@ static struct drm_framebuffer *vc4_fb_create(struct drm_device *dev, return drm_gem_fb_create(dev, file_priv, mode_cmd); } +/* Our CTM has some peculiar limitations: we can only enable it for one CRTC + * at a time and the HW only supports S0.9 scalars. To account for the latter, + * we don't allow userland to set a CTM that we have no hope of approximating. + */ +static int +vc4_ctm_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state = NULL; + struct drm_crtc *crtc; + struct drm_crtc_state *old_crtc_state, *new_crtc_state; + struct drm_color_ctm *ctm; + int i; + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + /* CTM is being disabled. */ + if (!new_crtc_state->ctm && old_crtc_state->ctm) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + ctm_state->fifo = 0; + } + } + + for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) { + if (new_crtc_state->ctm == old_crtc_state->ctm) + continue; + + if (!ctm_state) { + ctm_state = vc4_get_ctm_state(state, &vc4->ctm_manager); + if (IS_ERR(ctm_state)) + return PTR_ERR(ctm_state); + } + + /* CTM is being enabled or the matrix changed. */ + if (new_crtc_state->ctm) { + /* fifo is 1-based since 0 disables CTM. */ + int fifo = to_vc4_crtc(crtc)->channel + 1; + + /* Check userland isn't trying to turn on CTM for more + * than one CRTC at a time. + */ + if (ctm_state->fifo && ctm_state->fifo != fifo) { + DRM_DEBUG_DRIVER("Too many CTM configured\n"); + return -EINVAL; + } + + /* Check we can approximate the specified CTM. + * We disallow scalars |c| > 1.0 since the HW has + * no integer bits. + */ + ctm = new_crtc_state->ctm->data; + for (i = 0; i < ARRAY_SIZE(ctm->matrix); i++) { + u64 val = ctm->matrix[i]; + + val &= ~BIT_ULL(63); + if (val > BIT_ULL(32)) + return -EINVAL; + } + + ctm_state->fifo = fifo; + ctm_state->ctm = ctm; + } + } + + return 0; +} + +static int +vc4_atomic_check(struct drm_device *dev, struct drm_atomic_state *state) +{ + int ret; + + ret = vc4_ctm_atomic_check(dev, state); + if (ret < 0) + return ret; + + return drm_atomic_helper_check(dev, state); +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = drm_fb_helper_output_poll_changed, - .atomic_check = drm_atomic_helper_check, + .atomic_check = vc4_atomic_check, .atomic_commit = vc4_atomic_commit, .fb_create = vc4_fb_create, }; @@ -197,6 +410,7 @@ static const struct drm_mode_config_funcs vc4_mode_funcs = { int vc4_kms_load(struct drm_device *dev) { struct vc4_dev *vc4 = to_vc4_dev(dev); + struct vc4_ctm_state *ctm_state; int ret; sema_init(&vc4->async_modeset, 1); @@ -217,6 +431,14 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.async_page_flip = true; dev->mode_config.allow_fb_modifiers = true; + drm_modeset_lock_init(&vc4->ctm_state_lock); + + ctm_state = kzalloc(sizeof(*ctm_state), GFP_KERNEL); + if (!ctm_state) + return -ENOMEM; + drm_atomic_private_obj_init(&vc4->ctm_manager, &ctm_state->base, + &vc4_ctm_state_funcs); + drm_mode_config_reset(dev); if (dev->mode_config.num_connector) diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index ce39390..3483c05 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -201,6 +201,7 @@ static void vc4_plane_reset(struct drm_plane *plane) return; plane->state = &vc4_state->base; + plane->state->alpha = DRM_BLEND_ALPHA_OPAQUE; vc4_state->base.plane = plane; } @@ -467,6 +468,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, u32 ctl0_offset = vc4_state->dlist_count; const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); int num_planes = drm_format_num_planes(format->drm); + bool mix_plane_alpha; bool covers_screen; u32 scl0, scl1, pitch0; u32 lbm_size, tiling; @@ -552,7 +554,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 0: Image Positions and Alpha Value */ vc4_state->pos0_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) | + VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); @@ -565,6 +567,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS1_SCL_HEIGHT)); } + /* Don't waste cycles mixing with plane alpha if the set alpha + * is opaque or there is no per-pixel alpha information. + * In any case we use the alpha property value as the fixed alpha. + */ + mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && + fb->format->has_alpha; + /* Position Word 2: Source Image Size, Alpha */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, @@ -572,6 +581,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane, SCALER_POS2_ALPHA_MODE_PIPELINE : SCALER_POS2_ALPHA_MODE_FIXED, SCALER_POS2_ALPHA_MODE) | + (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | (fb->format->has_alpha ? SCALER_POS2_ALPHA_PREMULT : 0) | VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) | VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT)); @@ -653,10 +663,11 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->crtc_w == state->crtc->mode.hdisplay && vc4_state->crtc_h == state->crtc->mode.vdisplay; /* Background fill might be necessary when the plane has per-pixel - * alpha content and blends from the background or does not cover - * the entire screen. + * alpha content or a non-opaque plane alpha and could blend from the + * background or does not cover the entire screen. */ - vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen; + vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || + state->alpha != DRM_BLEND_ALPHA_OPAQUE; return 0; } @@ -741,6 +752,57 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) vc4_state->dlist[vc4_state->ptr0_offset] = addr; } +static void vc4_plane_atomic_async_update(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); + + if (plane->state->fb != state->fb) { + vc4_plane_async_set_fb(plane, state->fb); + drm_atomic_set_fb_for_plane(plane->state, state->fb); + } + + /* Set the cursor's position on the screen. This is the + * expected change from the drm_mode_cursor_universal() + * helper. + */ + plane->state->crtc_x = state->crtc_x; + plane->state->crtc_y = state->crtc_y; + + /* Allow changing the start position within the cursor BO, if + * that matters. + */ + plane->state->src_x = state->src_x; + plane->state->src_y = state->src_y; + + /* Update the display list based on the new crtc_x/y. */ + vc4_plane_atomic_check(plane, plane->state); + + /* Note that we can't just call vc4_plane_write_dlist() + * because that would smash the context data that the HVS is + * currently using. + */ + writel(vc4_state->dlist[vc4_state->pos0_offset], + &vc4_state->hw_dlist[vc4_state->pos0_offset]); + writel(vc4_state->dlist[vc4_state->pos2_offset], + &vc4_state->hw_dlist[vc4_state->pos2_offset]); + writel(vc4_state->dlist[vc4_state->ptr0_offset], + &vc4_state->hw_dlist[vc4_state->ptr0_offset]); +} + +static int vc4_plane_atomic_async_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + /* No configuring new scaling in the fast path. */ + if (plane->state->crtc_w != state->crtc_w || + plane->state->crtc_h != state->crtc_h || + plane->state->src_w != state->src_w || + plane->state->src_h != state->src_h) + return -EINVAL; + + return 0; +} + static int vc4_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state) { @@ -780,6 +842,8 @@ static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .atomic_update = vc4_plane_atomic_update, .prepare_fb = vc4_prepare_fb, .cleanup_fb = vc4_cleanup_fb, + .atomic_async_check = vc4_plane_atomic_async_check, + .atomic_async_update = vc4_plane_atomic_async_update, }; static void vc4_plane_destroy(struct drm_plane *plane) @@ -788,82 +852,6 @@ static void vc4_plane_destroy(struct drm_plane *plane) drm_plane_cleanup(plane); } -/* Implements immediate (non-vblank-synced) updates of the cursor - * position, or falls back to the atomic helper otherwise. - */ -static int -vc4_update_plane(struct drm_plane *plane, - struct drm_crtc *crtc, - struct drm_framebuffer *fb, - int crtc_x, int crtc_y, - unsigned int crtc_w, unsigned int crtc_h, - uint32_t src_x, uint32_t src_y, - uint32_t src_w, uint32_t src_h, - struct drm_modeset_acquire_ctx *ctx) -{ - struct drm_plane_state *plane_state; - struct vc4_plane_state *vc4_state; - - if (plane != crtc->cursor) - goto out; - - plane_state = plane->state; - vc4_state = to_vc4_plane_state(plane_state); - - if (!plane_state) - goto out; - - /* No configuring new scaling in the fast path. */ - if (crtc_w != plane_state->crtc_w || - crtc_h != plane_state->crtc_h || - src_w != plane_state->src_w || - src_h != plane_state->src_h) { - goto out; - } - - if (fb != plane_state->fb) { - drm_atomic_set_fb_for_plane(plane->state, fb); - vc4_plane_async_set_fb(plane, fb); - } - - /* Set the cursor's position on the screen. This is the - * expected change from the drm_mode_cursor_universal() - * helper. - */ - plane_state->crtc_x = crtc_x; - plane_state->crtc_y = crtc_y; - - /* Allow changing the start position within the cursor BO, if - * that matters. - */ - plane_state->src_x = src_x; - plane_state->src_y = src_y; - - /* Update the display list based on the new crtc_x/y. */ - vc4_plane_atomic_check(plane, plane_state); - - /* Note that we can't just call vc4_plane_write_dlist() - * because that would smash the context data that the HVS is - * currently using. - */ - writel(vc4_state->dlist[vc4_state->pos0_offset], - &vc4_state->hw_dlist[vc4_state->pos0_offset]); - writel(vc4_state->dlist[vc4_state->pos2_offset], - &vc4_state->hw_dlist[vc4_state->pos2_offset]); - writel(vc4_state->dlist[vc4_state->ptr0_offset], - &vc4_state->hw_dlist[vc4_state->ptr0_offset]); - - return 0; - -out: - return drm_atomic_helper_update_plane(plane, crtc, fb, - crtc_x, crtc_y, - crtc_w, crtc_h, - src_x, src_y, - src_w, src_h, - ctx); -} - static bool vc4_format_mod_supported(struct drm_plane *plane, uint32_t format, uint64_t modifier) @@ -891,7 +879,7 @@ static bool vc4_format_mod_supported(struct drm_plane *plane, } static const struct drm_plane_funcs vc4_plane_funcs = { - .update_plane = vc4_update_plane, + .update_plane = drm_atomic_helper_update_plane, .disable_plane = drm_atomic_helper_disable_plane, .destroy = vc4_plane_destroy, .set_property = NULL, @@ -939,5 +927,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, drm_plane_helper_add(plane, &vc4_plane_helper_funcs); + drm_plane_create_alpha_property(plane); + return plane; } diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index a141496..d1fb6fe 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -330,6 +330,21 @@ #define SCALER_DISPCTRL0 0x00000040 # define SCALER_DISPCTRLX_ENABLE BIT(31) # define SCALER_DISPCTRLX_RESET BIT(30) +/* Generates a single frame when VSTART is seen and stops at the last + * pixel read from the FIFO. + */ +# define SCALER_DISPCTRLX_ONESHOT BIT(29) +/* Processes a single context in the dlist and then task switch, + * instead of an entire line. + */ +# define SCALER_DISPCTRLX_ONECTX BIT(28) +/* Set to have DISPSLAVE return 2 16bpp pixels and no status data. */ +# define SCALER_DISPCTRLX_FIFO32 BIT(27) +/* Turns on output to the DISPSLAVE register instead of the normal + * FIFO. + */ +# define SCALER_DISPCTRLX_FIFOREG BIT(26) + # define SCALER_DISPCTRLX_WIDTH_MASK VC4_MASK(23, 12) # define SCALER_DISPCTRLX_WIDTH_SHIFT 12 # define SCALER_DISPCTRLX_HEIGHT_MASK VC4_MASK(11, 0) @@ -402,6 +417,68 @@ */ # define SCALER_GAMADDR_SRAMENB BIT(30) +#define SCALER_OLEDOFFS 0x00000080 +/* Clamps R to [16,235] and G/B to [16,240]. */ +# define SCALER_OLEDOFFS_YUVCLAMP BIT(31) + +/* Chooses which display FIFO the matrix applies to. */ +# define SCALER_OLEDOFFS_DISPFIFO_MASK VC4_MASK(25, 24) +# define SCALER_OLEDOFFS_DISPFIFO_SHIFT 24 +# define SCALER_OLEDOFFS_DISPFIFO_DISABLED 0 +# define SCALER_OLEDOFFS_DISPFIFO_0 1 +# define SCALER_OLEDOFFS_DISPFIFO_1 2 +# define SCALER_OLEDOFFS_DISPFIFO_2 3 + +/* Offsets are 8-bit 2s-complement. */ +# define SCALER_OLEDOFFS_RED_MASK VC4_MASK(23, 16) +# define SCALER_OLEDOFFS_RED_SHIFT 16 +# define SCALER_OLEDOFFS_GREEN_MASK VC4_MASK(15, 8) +# define SCALER_OLEDOFFS_GREEN_SHIFT 8 +# define SCALER_OLEDOFFS_BLUE_MASK VC4_MASK(7, 0) +# define SCALER_OLEDOFFS_BLUE_SHIFT 0 + +/* The coefficients are S0.9 fractions. */ +#define SCALER_OLEDCOEF0 0x00000084 +# define SCALER_OLEDCOEF0_B_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF0_B_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF0_B_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF0_B_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF0_B_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF0_B_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF1 0x00000088 +# define SCALER_OLEDCOEF1_G_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF1_G_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF1_G_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF1_G_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF1_G_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF1_G_TO_B_SHIFT 0 + +#define SCALER_OLEDCOEF2 0x0000008c +# define SCALER_OLEDCOEF2_R_TO_R_MASK VC4_MASK(29, 20) +# define SCALER_OLEDCOEF2_R_TO_R_SHIFT 20 +# define SCALER_OLEDCOEF2_R_TO_G_MASK VC4_MASK(19, 10) +# define SCALER_OLEDCOEF2_R_TO_G_SHIFT 10 +# define SCALER_OLEDCOEF2_R_TO_B_MASK VC4_MASK(9, 0) +# define SCALER_OLEDCOEF2_R_TO_B_SHIFT 0 + +/* Slave addresses for DMAing from HVS composition output to other + * devices. The top bits are valid only in !FIFO32 mode. + */ +#define SCALER_DISPSLAVE0 0x000000c0 +#define SCALER_DISPSLAVE1 0x000000c9 +#define SCALER_DISPSLAVE2 0x000000d0 +# define SCALER_DISPSLAVE_ISSUE_VSTART BIT(31) +# define SCALER_DISPSLAVE_ISSUE_HSTART BIT(30) +/* Set when the current line has been read and an HSTART is required. */ +# define SCALER_DISPSLAVE_EOL BIT(26) +/* Set when the display FIFO is empty. */ +# define SCALER_DISPSLAVE_EMPTY BIT(25) +/* Set when there is RGB data ready to read. */ +# define SCALER_DISPSLAVE_VALID BIT(24) +# define SCALER_DISPSLAVE_RGB_MASK VC4_MASK(23, 0) +# define SCALER_DISPSLAVE_RGB_SHIFT 0 + #define SCALER_GAMDATA 0x000000e0 #define SCALER_DLIST_START 0x00002000 #define SCALER_DLIST_SIZE 0x00004000 @@ -767,6 +844,10 @@ enum hvs_pixel_format { HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9, HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10, HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11, + HVS_PIXEL_FORMAT_H264 = 12, + HVS_PIXEL_FORMAT_PALETTE = 13, + HVS_PIXEL_FORMAT_YUV444_RGB = 14, + HVS_PIXEL_FORMAT_AYUV444_RGB = 15, }; /* Note: the LSB is the rightmost character shown. Only valid for @@ -800,12 +881,27 @@ enum hvs_pixel_format { #define SCALER_CTL0_TILING_128B 2 #define SCALER_CTL0_TILING_256B_OR_T 3 +#define SCALER_CTL0_ALPHA_MASK BIT(19) #define SCALER_CTL0_HFLIP BIT(16) #define SCALER_CTL0_VFLIP BIT(15) +#define SCALER_CTL0_KEY_MODE_MASK VC4_MASK(18, 17) +#define SCALER_CTL0_KEY_MODE_SHIFT 17 +#define SCALER_CTL0_KEY_DISABLED 0 +#define SCALER_CTL0_KEY_LUMA_OR_COMMON_RGB 1 +#define SCALER_CTL0_KEY_MATCH 2 /* turn transparent */ +#define SCALER_CTL0_KEY_REPLACE 3 /* replace with value from key mask word 2 */ + #define SCALER_CTL0_ORDER_MASK VC4_MASK(14, 13) #define SCALER_CTL0_ORDER_SHIFT 13 +#define SCALER_CTL0_RGBA_EXPAND_MASK VC4_MASK(12, 11) +#define SCALER_CTL0_RGBA_EXPAND_SHIFT 11 +#define SCALER_CTL0_RGBA_EXPAND_ZERO 0 +#define SCALER_CTL0_RGBA_EXPAND_LSB 1 +#define SCALER_CTL0_RGBA_EXPAND_MSB 2 +#define SCALER_CTL0_RGBA_EXPAND_ROUND 3 + #define SCALER_CTL0_SCL1_MASK VC4_MASK(10, 8) #define SCALER_CTL0_SCL1_SHIFT 8 @@ -849,6 +945,7 @@ enum hvs_pixel_format { #define SCALER_POS2_ALPHA_MODE_FIXED_NONZERO 2 #define SCALER_POS2_ALPHA_MODE_FIXED_OVER_0x07 3 #define SCALER_POS2_ALPHA_PREMULT BIT(29) +#define SCALER_POS2_ALPHA_MIX BIT(28) #define SCALER_POS2_HEIGHT_MASK VC4_MASK(27, 16) #define SCALER_POS2_HEIGHT_SHIFT 16 diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index bfc2fa7..e47e294 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -218,8 +218,7 @@ try_again: * overall CMA pool before they make scenes complicated enough to run * out of bin space. */ -int -vc4_allocate_bin_bo(struct drm_device *drm) +static int vc4_allocate_bin_bo(struct drm_device *drm) { struct vc4_dev *vc4 = to_vc4_dev(drm); struct vc4_v3d *v3d = vc4->v3d; diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index d3f15bf..7cf82b0 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -942,6 +942,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj) fail: kfree(validation_state.branch_targets); if (validated_shader) { + kfree(validated_shader->uniform_addr_offsets); kfree(validated_shader->texture_samples); kfree(validated_shader); } diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c index 8cc8c34..a5edd86 100644 --- a/drivers/gpu/drm/virtio/virtgpu_display.c +++ b/drivers/gpu/drm/virtio/virtgpu_display.c @@ -208,7 +208,7 @@ static int virtio_gpu_conn_get_modes(struct drm_connector *connector) return count; } -static int virtio_gpu_conn_mode_valid(struct drm_connector *connector, +static enum drm_mode_status virtio_gpu_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct virtio_gpu_output *output = diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c index 48e4f1d..020070d 100644 --- a/drivers/gpu/drm/virtio/virtgpu_vq.c +++ b/drivers/gpu/drm/virtio/virtgpu_vq.c @@ -293,7 +293,7 @@ retry: ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC); if (ret == -ENOSPC) { spin_unlock(&vgdev->ctrlq.qlock); - wait_event(vgdev->ctrlq.ack_queue, vq->num_free); + wait_event(vgdev->ctrlq.ack_queue, vq->num_free >= outcnt + incnt); spin_lock(&vgdev->ctrlq.qlock); goto retry; } else { @@ -368,7 +368,7 @@ retry: ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC); if (ret == -ENOSPC) { spin_unlock(&vgdev->cursorq.qlock); - wait_event(vgdev->cursorq.ack_queue, vq->num_free); + wait_event(vgdev->cursorq.ack_queue, vq->num_free >= outcnt); spin_lock(&vgdev->cursorq.qlock); goto retry; } else { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 70e1a88..97f37c3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -159,14 +159,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = { DRM_RENDER_ALLOW), VMW_IOCTL_DEF(VMW_CURSOR_BYPASS, vmw_kms_cursor_bypass_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl, - DRM_MASTER | DRM_CONTROL_ALLOW), + DRM_MASTER), VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl, DRM_AUTH | DRM_RENDER_ALLOW), diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index 2582ffd..ba0cdb7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set) struct drm_crtc *crtc = set->crtc; struct drm_framebuffer *fb; struct drm_crtc *tmp; - struct drm_modeset_acquire_ctx *ctx; struct drm_device *dev = set->crtc->dev; + struct drm_modeset_acquire_ctx ctx; int ret; - ctx = dev->mode_config.acquire_ctx; + drm_modeset_acquire_init(&ctx, 0); restart: /* @@ -458,7 +458,7 @@ restart: fb = set->fb; - ret = crtc->funcs->set_config(set, ctx); + ret = crtc->funcs->set_config(set, &ctx); if (ret == 0) { crtc->primary->crtc = crtc; crtc->primary->fb = fb; @@ -473,20 +473,13 @@ restart: } if (ret == -EDEADLK) { - dev->mode_config.acquire_ctx = NULL; - -retry_locking: - drm_modeset_backoff(ctx); - - ret = drm_modeset_lock_all_ctx(dev, ctx); - if (ret) - goto retry_locking; - - dev->mode_config.acquire_ctx = ctx; - + drm_modeset_backoff(&ctx); goto restart; } + drm_modeset_drop_locks(&ctx); + drm_modeset_acquire_fini(&ctx); + return ret; } @@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info) } mutex_lock(&par->bo_mutex); - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_fb_kms_framebuffer(info); if (ret) goto out_unlock; @@ -657,7 +649,6 @@ out_unlock: drm_mode_destroy(vmw_priv->dev, old_mode); par->set_mode = mode; - drm_modeset_unlock_all(vmw_priv->dev); mutex_unlock(&par->bo_mutex); return ret; @@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv) par->max_width = fb_width; par->max_height = fb_height; - drm_modeset_lock_all(vmw_priv->dev); ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width, par->max_height, &par->con, &par->crtc, &init_mode); - if (ret) { - drm_modeset_unlock_all(vmw_priv->dev); + if (ret) goto err_kms; - } info->var.xres = init_mode->hdisplay; info->var.yres = init_mode->vdisplay; - drm_modeset_unlock_all(vmw_priv->dev); /* * Create buffers and alloc memory @@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv) cancel_delayed_work_sync(&par->local_work); unregister_framebuffer(info); + mutex_lock(&par->bo_mutex); (void) vmw_fb_kms_detach(par, true, true); + mutex_unlock(&par->bo_mutex); vfree(par->vmalloc); framebuffer_release(info); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index f11601b..01f2dc9 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -384,9 +384,9 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x = du->hotspot_x; hotspot_y = du->hotspot_y; - if (plane->fb) { - hotspot_x += plane->fb->hot_x; - hotspot_y += plane->fb->hot_y; + if (plane->state->fb) { + hotspot_x += plane->state->fb->hot_x; + hotspot_y += plane->state->fb->hot_y; } du->cursor_surface = vps->surf; @@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx, vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf, out_fence, NULL); + vmw_dmabuf_unreference(&ctx->buf); vmw_resource_unreserve(res, false, NULL, 0); mutex_unlock(&res->dev_priv->cmdbuf_mutex); } @@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, struct vmw_display_unit *du; struct drm_display_mode *mode; int i = 0; + int ret = 0; + mutex_lock(&dev_priv->dev->mode_config.mutex); list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list, head) { if (i == unit) @@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (i != unit) { DRM_ERROR("Could not find initial display unit.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } if (list_empty(&con->modes)) @@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, if (list_empty(&con->modes)) { DRM_ERROR("Could not find initial display mode.\n"); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } du = vmw_connector_to_du(con); @@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv, head); } - return 0; + out_unlock: + mutex_unlock(&dev_priv->dev->mode_config.mutex); + + return ret; } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 3824595..4a5907e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -281,39 +281,6 @@ drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { * Legacy Display Plane Functions */ -/** - * vmw_ldu_primary_plane_cleanup_fb - Noop - * - * @plane: display plane - * @old_state: Contains the FB to clean up - * - * Unpins the display surface - * - * Returns 0 on success - */ -static void -vmw_ldu_primary_plane_cleanup_fb(struct drm_plane *plane, - struct drm_plane_state *old_state) -{ -} - - -/** - * vmw_ldu_primary_plane_prepare_fb - Noop - * - * @plane: display plane - * @new_state: info on the new plane state, including the FB - * - * Returns 0 on success - */ -static int -vmw_ldu_primary_plane_prepare_fb(struct drm_plane *plane, - struct drm_plane_state *new_state) -{ - return 0; -} - - static void vmw_ldu_primary_plane_atomic_update(struct drm_plane *plane, struct drm_plane_state *old_state) @@ -373,8 +340,6 @@ static const struct drm_plane_helper_funcs vmw_ldu_primary_plane_helper_funcs = { .atomic_check = vmw_du_primary_plane_atomic_check, .atomic_update = vmw_ldu_primary_plane_atomic_update, - .prepare_fb = vmw_ldu_primary_plane_prepare_fb, - .cleanup_fb = vmw_ldu_primary_plane_cleanup_fb, }; static const struct drm_crtc_helper_funcs vmw_ldu_crtc_helper_funcs = { diff --git a/drivers/gpu/drm/xen/Kconfig b/drivers/gpu/drm/xen/Kconfig new file mode 100644 index 0000000..4cca160 --- /dev/null +++ b/drivers/gpu/drm/xen/Kconfig @@ -0,0 +1,17 @@ +config DRM_XEN + bool "DRM Support for Xen guest OS" + depends on XEN + help + Choose this option if you want to enable DRM support + for Xen. + +config DRM_XEN_FRONTEND + tristate "Para-virtualized frontend driver for Xen guest OS" + depends on DRM_XEN + depends on DRM + select DRM_KMS_HELPER + select VIDEOMODE_HELPERS + select XEN_XENBUS_FRONTEND + help + Choose this option if you want to enable a para-virtualized + frontend DRM/KMS driver for Xen guest OSes. diff --git a/drivers/gpu/drm/xen/Makefile b/drivers/gpu/drm/xen/Makefile new file mode 100644 index 0000000..712afff --- /dev/null +++ b/drivers/gpu/drm/xen/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 OR MIT + +drm_xen_front-objs := xen_drm_front.o \ + xen_drm_front_kms.o \ + xen_drm_front_conn.o \ + xen_drm_front_evtchnl.o \ + xen_drm_front_shbuf.o \ + xen_drm_front_cfg.o \ + xen_drm_front_gem.o + +obj-$(CONFIG_DRM_XEN_FRONTEND) += drm_xen_front.o diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c new file mode 100644 index 0000000..3345ac7 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front.c @@ -0,0 +1,840 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_gem.h> + +#include <linux/of_device.h> + +#include <xen/platform_pci.h> +#include <xen/xen.h> +#include <xen/xenbus.h> + +#include <xen/interface/io/displif.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_cfg.h" +#include "xen_drm_front_evtchnl.h" +#include "xen_drm_front_gem.h" +#include "xen_drm_front_kms.h" +#include "xen_drm_front_shbuf.h" + +struct xen_drm_front_dbuf { + struct list_head list; + u64 dbuf_cookie; + u64 fb_cookie; + struct xen_drm_front_shbuf *shbuf; +}; + +static int dbuf_add_to_list(struct xen_drm_front_info *front_info, + struct xen_drm_front_shbuf *shbuf, u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *dbuf; + + dbuf = kzalloc(sizeof(*dbuf), GFP_KERNEL); + if (!dbuf) + return -ENOMEM; + + dbuf->dbuf_cookie = dbuf_cookie; + dbuf->shbuf = shbuf; + list_add(&dbuf->list, &front_info->dbuf_list); + return 0; +} + +static struct xen_drm_front_dbuf *dbuf_get(struct list_head *dbuf_list, + u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->dbuf_cookie == dbuf_cookie) + return buf; + + return NULL; +} + +static void dbuf_flush_fb(struct list_head *dbuf_list, u64 fb_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->fb_cookie == fb_cookie) + xen_drm_front_shbuf_flush(buf->shbuf); +} + +static void dbuf_free(struct list_head *dbuf_list, u64 dbuf_cookie) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) + if (buf->dbuf_cookie == dbuf_cookie) { + list_del(&buf->list); + xen_drm_front_shbuf_unmap(buf->shbuf); + xen_drm_front_shbuf_free(buf->shbuf); + kfree(buf); + break; + } +} + +static void dbuf_free_all(struct list_head *dbuf_list) +{ + struct xen_drm_front_dbuf *buf, *q; + + list_for_each_entry_safe(buf, q, dbuf_list, list) { + list_del(&buf->list); + xen_drm_front_shbuf_unmap(buf->shbuf); + xen_drm_front_shbuf_free(buf->shbuf); + kfree(buf); + } +} + +static struct xendispl_req * +be_prepare_req(struct xen_drm_front_evtchnl *evtchnl, u8 operation) +{ + struct xendispl_req *req; + + req = RING_GET_REQUEST(&evtchnl->u.req.ring, + evtchnl->u.req.ring.req_prod_pvt); + req->operation = operation; + req->id = evtchnl->evt_next_id++; + evtchnl->evt_id = req->id; + return req; +} + +static int be_stream_do_io(struct xen_drm_front_evtchnl *evtchnl, + struct xendispl_req *req) +{ + reinit_completion(&evtchnl->u.req.completion); + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return -EIO; + + xen_drm_front_evtchnl_flush(evtchnl); + return 0; +} + +static int be_stream_wait_io(struct xen_drm_front_evtchnl *evtchnl) +{ + if (wait_for_completion_timeout(&evtchnl->u.req.completion, + msecs_to_jiffies(XEN_DRM_FRONT_WAIT_BACK_MS)) <= 0) + return -ETIMEDOUT; + + return evtchnl->u.req.resp_status; +} + +int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, + u32 x, u32 y, u32 width, u32 height, + u32 bpp, u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_info *front_info; + struct xendispl_req *req; + unsigned long flags; + int ret; + + front_info = pipeline->drm_info->front_info; + evtchnl = &front_info->evt_pairs[pipeline->index].req; + if (unlikely(!evtchnl)) + return -EIO; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_SET_CONFIG); + req->op.set_config.x = x; + req->op.set_config.y = y; + req->op.set_config.width = width; + req->op.set_config.height = height; + req->op.set_config.bpp = bpp; + req->op.set_config.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u32 width, u32 height, + u32 bpp, u64 size, struct page **pages) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_shbuf *shbuf; + struct xendispl_req *req; + struct xen_drm_front_shbuf_cfg buf_cfg; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + memset(&buf_cfg, 0, sizeof(buf_cfg)); + buf_cfg.xb_dev = front_info->xb_dev; + buf_cfg.pages = pages; + buf_cfg.size = size; + buf_cfg.be_alloc = front_info->cfg.be_alloc; + + shbuf = xen_drm_front_shbuf_alloc(&buf_cfg); + if (IS_ERR(shbuf)) + return PTR_ERR(shbuf); + + ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie); + if (ret < 0) { + xen_drm_front_shbuf_free(shbuf); + return ret; + } + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_CREATE); + req->op.dbuf_create.gref_directory = + xen_drm_front_shbuf_get_dir_start(shbuf); + req->op.dbuf_create.buffer_sz = size; + req->op.dbuf_create.dbuf_cookie = dbuf_cookie; + req->op.dbuf_create.width = width; + req->op.dbuf_create.height = height; + req->op.dbuf_create.bpp = bpp; + if (buf_cfg.be_alloc) + req->op.dbuf_create.flags |= XENDISPL_DBUF_FLG_REQ_ALLOC; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret < 0) + goto fail; + + ret = be_stream_wait_io(evtchnl); + if (ret < 0) + goto fail; + + ret = xen_drm_front_shbuf_map(shbuf); + if (ret < 0) + goto fail; + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return 0; + +fail: + mutex_unlock(&evtchnl->u.req.req_io_lock); + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + return ret; +} + +static int xen_drm_front_dbuf_destroy(struct xen_drm_front_info *front_info, + u64 dbuf_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + bool be_alloc; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + be_alloc = front_info->cfg.be_alloc; + + /* + * For the backend allocated buffer release references now, so backend + * can free the buffer. + */ + if (be_alloc) + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_DESTROY); + req->op.dbuf_destroy.dbuf_cookie = dbuf_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + /* + * Do this regardless of communication status with the backend: + * if we cannot remove remote resources remove what we can locally. + */ + if (!be_alloc) + dbuf_free(&front_info->dbuf_list, dbuf_cookie); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u64 fb_cookie, u32 width, + u32 height, u32 pixel_format) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xen_drm_front_dbuf *buf; + struct xendispl_req *req; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + buf = dbuf_get(&front_info->dbuf_list, dbuf_cookie); + if (!buf) + return -EINVAL; + + buf->fb_cookie = fb_cookie; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_FB_ATTACH); + req->op.fb_attach.dbuf_cookie = dbuf_cookie; + req->op.fb_attach.fb_cookie = fb_cookie; + req->op.fb_attach.width = width; + req->op.fb_attach.height = height; + req->op.fb_attach.pixel_format = pixel_format; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info, + u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + int ret; + + evtchnl = &front_info->evt_pairs[GENERIC_OP_EVT_CHNL].req; + if (unlikely(!evtchnl)) + return -EIO; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_FB_DETACH); + req->op.fb_detach.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +int xen_drm_front_page_flip(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie) +{ + struct xen_drm_front_evtchnl *evtchnl; + struct xendispl_req *req; + unsigned long flags; + int ret; + + if (unlikely(conn_idx >= front_info->num_evt_pairs)) + return -EINVAL; + + dbuf_flush_fb(&front_info->dbuf_list, fb_cookie); + evtchnl = &front_info->evt_pairs[conn_idx].req; + + mutex_lock(&evtchnl->u.req.req_io_lock); + + spin_lock_irqsave(&front_info->io_lock, flags); + req = be_prepare_req(evtchnl, XENDISPL_OP_PG_FLIP); + req->op.pg_flip.fb_cookie = fb_cookie; + + ret = be_stream_do_io(evtchnl, req); + spin_unlock_irqrestore(&front_info->io_lock, flags); + + if (ret == 0) + ret = be_stream_wait_io(evtchnl); + + mutex_unlock(&evtchnl->u.req.req_io_lock); + return ret; +} + +void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie) +{ + struct xen_drm_front_drm_info *drm_info = front_info->drm_info; + + if (unlikely(conn_idx >= front_info->cfg.num_connectors)) + return; + + xen_drm_front_kms_on_frame_done(&drm_info->pipeline[conn_idx], + fb_cookie); +} + +static int xen_drm_drv_dumb_create(struct drm_file *filp, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct drm_gem_object *obj; + int ret; + + /* + * Dumb creation is a two stage process: first we create a fully + * constructed GEM object which is communicated to the backend, and + * only after that we can create GEM's handle. This is done so, + * because of the possible races: once you create a handle it becomes + * immediately visible to user-space, so the latter can try accessing + * object without pages etc. + * For details also see drm_gem_handle_create + */ + args->pitch = DIV_ROUND_UP(args->width * args->bpp, 8); + args->size = args->pitch * args->height; + + obj = xen_drm_front_gem_create(dev, args->size); + if (IS_ERR_OR_NULL(obj)) { + ret = PTR_ERR(obj); + goto fail; + } + + ret = xen_drm_front_dbuf_create(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj), + args->width, args->height, args->bpp, + args->size, + xen_drm_front_gem_get_pages(obj)); + if (ret) + goto fail_backend; + + /* This is the tail of GEM object creation */ + ret = drm_gem_handle_create(filp, obj, &args->handle); + if (ret) + goto fail_handle; + + /* Drop reference from allocate - handle holds it now */ + drm_gem_object_put_unlocked(obj); + return 0; + +fail_handle: + xen_drm_front_dbuf_destroy(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj)); +fail_backend: + /* drop reference from allocate */ + drm_gem_object_put_unlocked(obj); +fail: + DRM_ERROR("Failed to create dumb buffer: %d\n", ret); + return ret; +} + +static void xen_drm_drv_free_object_unlocked(struct drm_gem_object *obj) +{ + struct xen_drm_front_drm_info *drm_info = obj->dev->dev_private; + int idx; + + if (drm_dev_enter(obj->dev, &idx)) { + xen_drm_front_dbuf_destroy(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(obj)); + drm_dev_exit(idx); + } else { + dbuf_free(&drm_info->front_info->dbuf_list, + xen_drm_front_dbuf_to_cookie(obj)); + } + + xen_drm_front_gem_free_object_unlocked(obj); +} + +static void xen_drm_drv_release(struct drm_device *dev) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_drm_front_info *front_info = drm_info->front_info; + + xen_drm_front_kms_fini(drm_info); + + drm_atomic_helper_shutdown(dev); + drm_mode_config_cleanup(dev); + + drm_dev_fini(dev); + kfree(dev); + + if (front_info->cfg.be_alloc) + xenbus_switch_state(front_info->xb_dev, + XenbusStateInitialising); + + kfree(drm_info); +} + +static const struct file_operations xen_drm_dev_fops = { + .owner = THIS_MODULE, + .open = drm_open, + .release = drm_release, + .unlocked_ioctl = drm_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = drm_compat_ioctl, +#endif + .poll = drm_poll, + .read = drm_read, + .llseek = no_llseek, + .mmap = xen_drm_front_gem_mmap, +}; + +static const struct vm_operations_struct xen_drm_drv_vm_ops = { + .open = drm_gem_vm_open, + .close = drm_gem_vm_close, +}; + +static struct drm_driver xen_drm_driver = { + .driver_features = DRIVER_GEM | DRIVER_MODESET | + DRIVER_PRIME | DRIVER_ATOMIC, + .release = xen_drm_drv_release, + .gem_vm_ops = &xen_drm_drv_vm_ops, + .gem_free_object_unlocked = xen_drm_drv_free_object_unlocked, + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, + .gem_prime_import = drm_gem_prime_import, + .gem_prime_export = drm_gem_prime_export, + .gem_prime_import_sg_table = xen_drm_front_gem_import_sg_table, + .gem_prime_get_sg_table = xen_drm_front_gem_get_sg_table, + .gem_prime_vmap = xen_drm_front_gem_prime_vmap, + .gem_prime_vunmap = xen_drm_front_gem_prime_vunmap, + .gem_prime_mmap = xen_drm_front_gem_prime_mmap, + .dumb_create = xen_drm_drv_dumb_create, + .fops = &xen_drm_dev_fops, + .name = "xendrm-du", + .desc = "Xen PV DRM Display Unit", + .date = "20180221", + .major = 1, + .minor = 0, + +}; + +static int xen_drm_drv_init(struct xen_drm_front_info *front_info) +{ + struct device *dev = &front_info->xb_dev->dev; + struct xen_drm_front_drm_info *drm_info; + struct drm_device *drm_dev; + int ret; + + DRM_INFO("Creating %s\n", xen_drm_driver.desc); + + drm_info = kzalloc(sizeof(*drm_info), GFP_KERNEL); + if (!drm_info) { + ret = -ENOMEM; + goto fail; + } + + drm_info->front_info = front_info; + front_info->drm_info = drm_info; + + drm_dev = drm_dev_alloc(&xen_drm_driver, dev); + if (IS_ERR(drm_dev)) { + ret = PTR_ERR(drm_dev); + goto fail; + } + + drm_info->drm_dev = drm_dev; + + drm_dev->dev_private = drm_info; + + ret = xen_drm_front_kms_init(drm_info); + if (ret) { + DRM_ERROR("Failed to initialize DRM/KMS, ret %d\n", ret); + goto fail_modeset; + } + + ret = drm_dev_register(drm_dev, 0); + if (ret) + goto fail_register; + + DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", + xen_drm_driver.name, xen_drm_driver.major, + xen_drm_driver.minor, xen_drm_driver.patchlevel, + xen_drm_driver.date, drm_dev->primary->index); + + return 0; + +fail_register: + drm_dev_unregister(drm_dev); +fail_modeset: + drm_kms_helper_poll_fini(drm_dev); + drm_mode_config_cleanup(drm_dev); +fail: + kfree(drm_info); + return ret; +} + +static void xen_drm_drv_fini(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_drm_info *drm_info = front_info->drm_info; + struct drm_device *dev; + + if (!drm_info) + return; + + dev = drm_info->drm_dev; + if (!dev) + return; + + /* Nothing to do if device is already unplugged */ + if (drm_dev_is_unplugged(dev)) + return; + + drm_kms_helper_poll_fini(dev); + drm_dev_unplug(dev); + + front_info->drm_info = NULL; + + xen_drm_front_evtchnl_free_all(front_info); + dbuf_free_all(&front_info->dbuf_list); + + /* + * If we are not using backend allocated buffers, then tell the + * backend we are ready to (re)initialize. Otherwise, wait for + * drm_driver.release. + */ + if (!front_info->cfg.be_alloc) + xenbus_switch_state(front_info->xb_dev, + XenbusStateInitialising); +} + +static int displback_initwait(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_cfg *cfg = &front_info->cfg; + int ret; + + cfg->front_info = front_info; + ret = xen_drm_front_cfg_card(front_info, cfg); + if (ret < 0) + return ret; + + DRM_INFO("Have %d conector(s)\n", cfg->num_connectors); + /* Create event channels for all connectors and publish */ + ret = xen_drm_front_evtchnl_create_all(front_info); + if (ret < 0) + return ret; + + return xen_drm_front_evtchnl_publish_all(front_info); +} + +static int displback_connect(struct xen_drm_front_info *front_info) +{ + xen_drm_front_evtchnl_set_state(front_info, EVTCHNL_STATE_CONNECTED); + return xen_drm_drv_init(front_info); +} + +static void displback_disconnect(struct xen_drm_front_info *front_info) +{ + if (!front_info->drm_info) + return; + + /* Tell the backend to wait until we release the DRM driver. */ + xenbus_switch_state(front_info->xb_dev, XenbusStateReconfiguring); + + xen_drm_drv_fini(front_info); +} + +static void displback_changed(struct xenbus_device *xb_dev, + enum xenbus_state backend_state) +{ + struct xen_drm_front_info *front_info = dev_get_drvdata(&xb_dev->dev); + int ret; + + DRM_DEBUG("Backend state is %s, front is %s\n", + xenbus_strstate(backend_state), + xenbus_strstate(xb_dev->state)); + + switch (backend_state) { + case XenbusStateReconfiguring: + /* fall through */ + case XenbusStateReconfigured: + /* fall through */ + case XenbusStateInitialised: + break; + + case XenbusStateInitialising: + if (xb_dev->state == XenbusStateReconfiguring) + break; + + /* recovering after backend unexpected closure */ + displback_disconnect(front_info); + break; + + case XenbusStateInitWait: + if (xb_dev->state == XenbusStateReconfiguring) + break; + + /* recovering after backend unexpected closure */ + displback_disconnect(front_info); + if (xb_dev->state != XenbusStateInitialising) + break; + + ret = displback_initwait(front_info); + if (ret < 0) + xenbus_dev_fatal(xb_dev, ret, "initializing frontend"); + else + xenbus_switch_state(xb_dev, XenbusStateInitialised); + break; + + case XenbusStateConnected: + if (xb_dev->state != XenbusStateInitialised) + break; + + ret = displback_connect(front_info); + if (ret < 0) { + displback_disconnect(front_info); + xenbus_dev_fatal(xb_dev, ret, "connecting backend"); + } else { + xenbus_switch_state(xb_dev, XenbusStateConnected); + } + break; + + case XenbusStateClosing: + /* + * in this state backend starts freeing resources, + * so let it go into closed state, so we can also + * remove ours + */ + break; + + case XenbusStateUnknown: + /* fall through */ + case XenbusStateClosed: + if (xb_dev->state == XenbusStateClosed) + break; + + displback_disconnect(front_info); + break; + } +} + +static int xen_drv_probe(struct xenbus_device *xb_dev, + const struct xenbus_device_id *id) +{ + struct xen_drm_front_info *front_info; + struct device *dev = &xb_dev->dev; + int ret; + + /* + * The device is not spawn from a device tree, so arch_setup_dma_ops + * is not called, thus leaving the device with dummy DMA ops. + * This makes the device return error on PRIME buffer import, which + * is not correct: to fix this call of_dma_configure() with a NULL + * node to set default DMA ops. + */ + dev->bus->force_dma = true; + dev->coherent_dma_mask = DMA_BIT_MASK(32); + ret = of_dma_configure(dev, NULL); + if (ret < 0) { + DRM_ERROR("Cannot setup DMA ops, ret %d", ret); + return ret; + } + + front_info = devm_kzalloc(&xb_dev->dev, + sizeof(*front_info), GFP_KERNEL); + if (!front_info) + return -ENOMEM; + + front_info->xb_dev = xb_dev; + spin_lock_init(&front_info->io_lock); + INIT_LIST_HEAD(&front_info->dbuf_list); + dev_set_drvdata(&xb_dev->dev, front_info); + + return xenbus_switch_state(xb_dev, XenbusStateInitialising); +} + +static int xen_drv_remove(struct xenbus_device *dev) +{ + struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev); + int to = 100; + + xenbus_switch_state(dev, XenbusStateClosing); + + /* + * On driver removal it is disconnected from XenBus, + * so no backend state change events come via .otherend_changed + * callback. This prevents us from exiting gracefully, e.g. + * signaling the backend to free event channels, waiting for its + * state to change to XenbusStateClosed and cleaning at our end. + * Normally when front driver removed backend will finally go into + * XenbusStateInitWait state. + * + * Workaround: read backend's state manually and wait with time-out. + */ + while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state", + XenbusStateUnknown) != XenbusStateInitWait) && + --to) + msleep(10); + + if (!to) { + unsigned int state; + + state = xenbus_read_unsigned(front_info->xb_dev->otherend, + "state", XenbusStateUnknown); + DRM_ERROR("Backend state is %s while removing driver\n", + xenbus_strstate(state)); + } + + xen_drm_drv_fini(front_info); + xenbus_frontend_closed(dev); + return 0; +} + +static const struct xenbus_device_id xen_driver_ids[] = { + { XENDISPL_DRIVER_NAME }, + { "" } +}; + +static struct xenbus_driver xen_driver = { + .ids = xen_driver_ids, + .probe = xen_drv_probe, + .remove = xen_drv_remove, + .otherend_changed = displback_changed, +}; + +static int __init xen_drv_init(void) +{ + /* At the moment we only support case with XEN_PAGE_SIZE == PAGE_SIZE */ + if (XEN_PAGE_SIZE != PAGE_SIZE) { + DRM_ERROR(XENDISPL_DRIVER_NAME ": different kernel and Xen page sizes are not supported: XEN_PAGE_SIZE (%lu) != PAGE_SIZE (%lu)\n", + XEN_PAGE_SIZE, PAGE_SIZE); + return -ENODEV; + } + + if (!xen_domain()) + return -ENODEV; + + if (!xen_has_pv_devices()) + return -ENODEV; + + DRM_INFO("Registering XEN PV " XENDISPL_DRIVER_NAME "\n"); + return xenbus_register_frontend(&xen_driver); +} + +static void __exit xen_drv_fini(void) +{ + DRM_INFO("Unregistering XEN PV " XENDISPL_DRIVER_NAME "\n"); + xenbus_unregister_driver(&xen_driver); +} + +module_init(xen_drv_init); +module_exit(xen_drv_fini); + +MODULE_DESCRIPTION("Xen para-virtualized display device frontend"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("xen:" XENDISPL_DRIVER_NAME); diff --git a/drivers/gpu/drm/xen/xen_drm_front.h b/drivers/gpu/drm/xen/xen_drm_front.h new file mode 100644 index 0000000..2c2479b --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_H_ +#define __XEN_DRM_FRONT_H_ + +#include <drm/drmP.h> +#include <drm/drm_simple_kms_helper.h> + +#include <linux/scatterlist.h> + +#include "xen_drm_front_cfg.h" + +/** + * DOC: Driver modes of operation in terms of display buffers used + * + * Depending on the requirements for the para-virtualized environment, namely + * requirements dictated by the accompanying DRM/(v)GPU drivers running in both + * host and guest environments, display buffers can be allocated by either + * frontend driver or backend. + */ + +/** + * DOC: Buffers allocated by the frontend driver + * + * In this mode of operation driver allocates buffers from system memory. + * + * Note! If used with accompanying DRM/(v)GPU drivers this mode of operation + * may require IOMMU support on the platform, so accompanying DRM/vGPU + * hardware can still reach display buffer memory while importing PRIME + * buffers from the frontend driver. + */ + +/** + * DOC: Buffers allocated by the backend + * + * This mode of operation is run-time configured via guest domain configuration + * through XenStore entries. + * + * For systems which do not provide IOMMU support, but having specific + * requirements for display buffers it is possible to allocate such buffers + * at backend side and share those with the frontend. + * For example, if host domain is 1:1 mapped and has DRM/GPU hardware expecting + * physically contiguous memory, this allows implementing zero-copying + * use-cases. + * + * Note, while using this scenario the following should be considered: + * + * #. If guest domain dies then pages/grants received from the backend + * cannot be claimed back + * + * #. Misbehaving guest may send too many requests to the + * backend exhausting its grant references and memory + * (consider this from security POV) + */ + +/** + * DOC: Driver limitations + * + * #. Only primary plane without additional properties is supported. + * + * #. Only one video mode per connector supported which is configured + * via XenStore. + * + * #. All CRTCs operate at fixed frequency of 60Hz. + */ + +/* timeout in ms to wait for backend to respond */ +#define XEN_DRM_FRONT_WAIT_BACK_MS 3000 + +#ifndef GRANT_INVALID_REF +/* + * Note on usage of grant reference 0 as invalid grant reference: + * grant reference 0 is valid, but never exposed to a PV driver, + * because of the fact it is already in use/reserved by the PV console. + */ +#define GRANT_INVALID_REF 0 +#endif + +struct xen_drm_front_info { + struct xenbus_device *xb_dev; + struct xen_drm_front_drm_info *drm_info; + + /* to protect data between backend IO code and interrupt handler */ + spinlock_t io_lock; + + int num_evt_pairs; + struct xen_drm_front_evtchnl_pair *evt_pairs; + struct xen_drm_front_cfg cfg; + + /* display buffers */ + struct list_head dbuf_list; +}; + +struct xen_drm_front_drm_pipeline { + struct xen_drm_front_drm_info *drm_info; + + int index; + + struct drm_simple_display_pipe pipe; + + struct drm_connector conn; + /* These are only for connector mode checking */ + int width, height; + + struct drm_pending_vblank_event *pending_event; + + struct delayed_work pflip_to_worker; + + bool conn_connected; +}; + +struct xen_drm_front_drm_info { + struct xen_drm_front_info *front_info; + struct drm_device *drm_dev; + + struct xen_drm_front_drm_pipeline pipeline[XEN_DRM_FRONT_MAX_CRTCS]; +}; + +static inline u64 xen_drm_front_fb_to_cookie(struct drm_framebuffer *fb) +{ + return (u64)fb; +} + +static inline u64 xen_drm_front_dbuf_to_cookie(struct drm_gem_object *gem_obj) +{ + return (u64)gem_obj; +} + +int xen_drm_front_mode_set(struct xen_drm_front_drm_pipeline *pipeline, + u32 x, u32 y, u32 width, u32 height, + u32 bpp, u64 fb_cookie); + +int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u32 width, u32 height, + u32 bpp, u64 size, struct page **pages); + +int xen_drm_front_fb_attach(struct xen_drm_front_info *front_info, + u64 dbuf_cookie, u64 fb_cookie, u32 width, + u32 height, u32 pixel_format); + +int xen_drm_front_fb_detach(struct xen_drm_front_info *front_info, + u64 fb_cookie); + +int xen_drm_front_page_flip(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie); + +void xen_drm_front_on_frame_done(struct xen_drm_front_info *front_info, + int conn_idx, u64 fb_cookie); + +#endif /* __XEN_DRM_FRONT_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.c b/drivers/gpu/drm/xen/xen_drm_front_cfg.c new file mode 100644 index 0000000..5baf2b9 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#include <linux/device.h> + +#include <xen/interface/io/displif.h> +#include <xen/xenbus.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_cfg.h" + +static int cfg_connector(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg_connector *connector, + const char *path, int index) +{ + char *connector_path; + + connector_path = devm_kasprintf(&front_info->xb_dev->dev, + GFP_KERNEL, "%s/%d", path, index); + if (!connector_path) + return -ENOMEM; + + if (xenbus_scanf(XBT_NIL, connector_path, XENDISPL_FIELD_RESOLUTION, + "%d" XENDISPL_RESOLUTION_SEPARATOR "%d", + &connector->width, &connector->height) < 0) { + /* either no entry configured or wrong resolution set */ + connector->width = 0; + connector->height = 0; + return -EINVAL; + } + + connector->xenstore_path = connector_path; + + DRM_INFO("Connector %s: resolution %dx%d\n", + connector_path, connector->width, connector->height); + return 0; +} + +int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg *cfg) +{ + struct xenbus_device *xb_dev = front_info->xb_dev; + int ret, i; + + if (xenbus_read_unsigned(front_info->xb_dev->nodename, + XENDISPL_FIELD_BE_ALLOC, 0)) { + DRM_INFO("Backend can provide display buffers\n"); + cfg->be_alloc = true; + } + + cfg->num_connectors = 0; + for (i = 0; i < ARRAY_SIZE(cfg->connectors); i++) { + ret = cfg_connector(front_info, &cfg->connectors[i], + xb_dev->nodename, i); + if (ret < 0) + break; + cfg->num_connectors++; + } + + if (!cfg->num_connectors) { + DRM_ERROR("No connector(s) configured at %s\n", + xb_dev->nodename); + return -ENODEV; + } + + return 0; +} + diff --git a/drivers/gpu/drm/xen/xen_drm_front_cfg.h b/drivers/gpu/drm/xen/xen_drm_front_cfg.h new file mode 100644 index 0000000..aa8490b --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_cfg.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_CFG_H_ +#define __XEN_DRM_FRONT_CFG_H_ + +#include <linux/types.h> + +#define XEN_DRM_FRONT_MAX_CRTCS 4 + +struct xen_drm_front_cfg_connector { + int width; + int height; + char *xenstore_path; +}; + +struct xen_drm_front_cfg { + struct xen_drm_front_info *front_info; + /* number of connectors in this configuration */ + int num_connectors; + /* connector configurations */ + struct xen_drm_front_cfg_connector connectors[XEN_DRM_FRONT_MAX_CRTCS]; + /* set if dumb buffers are allocated externally on backend side */ + bool be_alloc; +}; + +int xen_drm_front_cfg_card(struct xen_drm_front_info *front_info, + struct xen_drm_front_cfg *cfg); + +#endif /* __XEN_DRM_FRONT_CFG_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.c b/drivers/gpu/drm/xen/xen_drm_front_conn.c new file mode 100644 index 0000000..c91ae53 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> + +#include <video/videomode.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_conn.h" +#include "xen_drm_front_kms.h" + +static struct xen_drm_front_drm_pipeline * +to_xen_drm_pipeline(struct drm_connector *connector) +{ + return container_of(connector, struct xen_drm_front_drm_pipeline, conn); +} + +static const u32 plane_formats[] = { + DRM_FORMAT_RGB565, + DRM_FORMAT_RGB888, + DRM_FORMAT_XRGB8888, + DRM_FORMAT_ARGB8888, + DRM_FORMAT_XRGB4444, + DRM_FORMAT_ARGB4444, + DRM_FORMAT_XRGB1555, + DRM_FORMAT_ARGB1555, +}; + +const u32 *xen_drm_front_conn_get_formats(int *format_count) +{ + *format_count = ARRAY_SIZE(plane_formats); + return plane_formats; +} + +static int connector_detect(struct drm_connector *connector, + struct drm_modeset_acquire_ctx *ctx, + bool force) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + + if (drm_dev_is_unplugged(connector->dev)) + pipeline->conn_connected = false; + + return pipeline->conn_connected ? connector_status_connected : + connector_status_disconnected; +} + +#define XEN_DRM_CRTC_VREFRESH_HZ 60 + +static int connector_get_modes(struct drm_connector *connector) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + struct drm_display_mode *mode; + struct videomode videomode; + int width, height; + + mode = drm_mode_create(connector->dev); + if (!mode) + return 0; + + memset(&videomode, 0, sizeof(videomode)); + videomode.hactive = pipeline->width; + videomode.vactive = pipeline->height; + width = videomode.hactive + videomode.hfront_porch + + videomode.hback_porch + videomode.hsync_len; + height = videomode.vactive + videomode.vfront_porch + + videomode.vback_porch + videomode.vsync_len; + videomode.pixelclock = width * height * XEN_DRM_CRTC_VREFRESH_HZ; + mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER; + + drm_display_mode_from_videomode(&videomode, mode); + drm_mode_probed_add(connector, mode); + return 1; +} + +static const struct drm_connector_helper_funcs connector_helper_funcs = { + .get_modes = connector_get_modes, + .detect_ctx = connector_detect, +}; + +static const struct drm_connector_funcs connector_funcs = { + .dpms = drm_helper_connector_dpms, + .fill_modes = drm_helper_probe_single_connector_modes, + .destroy = drm_connector_cleanup, + .reset = drm_atomic_helper_connector_reset, + .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, +}; + +int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info, + struct drm_connector *connector) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(connector); + + drm_connector_helper_add(connector, &connector_helper_funcs); + + pipeline->conn_connected = true; + + connector->polled = DRM_CONNECTOR_POLL_CONNECT | + DRM_CONNECTOR_POLL_DISCONNECT; + + return drm_connector_init(drm_info->drm_dev, connector, + &connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_conn.h b/drivers/gpu/drm/xen/xen_drm_front_conn.h new file mode 100644 index 0000000..39de7cf --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_conn.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_CONN_H_ +#define __XEN_DRM_FRONT_CONN_H_ + +#include <drm/drmP.h> +#include <drm/drm_crtc.h> +#include <drm/drm_encoder.h> + +#include <linux/wait.h> + +struct xen_drm_front_drm_info; + +int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info, + struct drm_connector *connector); + +const u32 *xen_drm_front_conn_get_formats(int *format_count); + +#endif /* __XEN_DRM_FRONT_CONN_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c new file mode 100644 index 0000000..945226a --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.c @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#include <linux/errno.h> +#include <linux/irq.h> + +#include <xen/xenbus.h> +#include <xen/events.h> +#include <xen/grant_table.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_evtchnl.h" + +static irqreturn_t evtchnl_interrupt_ctrl(int irq, void *dev_id) +{ + struct xen_drm_front_evtchnl *evtchnl = dev_id; + struct xen_drm_front_info *front_info = evtchnl->front_info; + struct xendispl_resp *resp; + RING_IDX i, rp; + unsigned long flags; + + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return IRQ_HANDLED; + + spin_lock_irqsave(&front_info->io_lock, flags); + +again: + rp = evtchnl->u.req.ring.sring->rsp_prod; + /* ensure we see queued responses up to rp */ + virt_rmb(); + + for (i = evtchnl->u.req.ring.rsp_cons; i != rp; i++) { + resp = RING_GET_RESPONSE(&evtchnl->u.req.ring, i); + if (unlikely(resp->id != evtchnl->evt_id)) + continue; + + switch (resp->operation) { + case XENDISPL_OP_PG_FLIP: + case XENDISPL_OP_FB_ATTACH: + case XENDISPL_OP_FB_DETACH: + case XENDISPL_OP_DBUF_CREATE: + case XENDISPL_OP_DBUF_DESTROY: + case XENDISPL_OP_SET_CONFIG: + evtchnl->u.req.resp_status = resp->status; + complete(&evtchnl->u.req.completion); + break; + + default: + DRM_ERROR("Operation %d is not supported\n", + resp->operation); + break; + } + } + + evtchnl->u.req.ring.rsp_cons = i; + + if (i != evtchnl->u.req.ring.req_prod_pvt) { + int more_to_do; + + RING_FINAL_CHECK_FOR_RESPONSES(&evtchnl->u.req.ring, + more_to_do); + if (more_to_do) + goto again; + } else { + evtchnl->u.req.ring.sring->rsp_event = i + 1; + } + + spin_unlock_irqrestore(&front_info->io_lock, flags); + return IRQ_HANDLED; +} + +static irqreturn_t evtchnl_interrupt_evt(int irq, void *dev_id) +{ + struct xen_drm_front_evtchnl *evtchnl = dev_id; + struct xen_drm_front_info *front_info = evtchnl->front_info; + struct xendispl_event_page *page = evtchnl->u.evt.page; + u32 cons, prod; + unsigned long flags; + + if (unlikely(evtchnl->state != EVTCHNL_STATE_CONNECTED)) + return IRQ_HANDLED; + + spin_lock_irqsave(&front_info->io_lock, flags); + + prod = page->in_prod; + /* ensure we see ring contents up to prod */ + virt_rmb(); + if (prod == page->in_cons) + goto out; + + for (cons = page->in_cons; cons != prod; cons++) { + struct xendispl_evt *event; + + event = &XENDISPL_IN_RING_REF(page, cons); + if (unlikely(event->id != evtchnl->evt_id++)) + continue; + + switch (event->type) { + case XENDISPL_EVT_PG_FLIP: + xen_drm_front_on_frame_done(front_info, evtchnl->index, + event->op.pg_flip.fb_cookie); + break; + } + } + page->in_cons = cons; + /* ensure ring contents */ + virt_wmb(); + +out: + spin_unlock_irqrestore(&front_info->io_lock, flags); + return IRQ_HANDLED; +} + +static void evtchnl_free(struct xen_drm_front_info *front_info, + struct xen_drm_front_evtchnl *evtchnl) +{ + unsigned long page = 0; + + if (evtchnl->type == EVTCHNL_TYPE_REQ) + page = (unsigned long)evtchnl->u.req.ring.sring; + else if (evtchnl->type == EVTCHNL_TYPE_EVT) + page = (unsigned long)evtchnl->u.evt.page; + if (!page) + return; + + evtchnl->state = EVTCHNL_STATE_DISCONNECTED; + + if (evtchnl->type == EVTCHNL_TYPE_REQ) { + /* release all who still waits for response if any */ + evtchnl->u.req.resp_status = -EIO; + complete_all(&evtchnl->u.req.completion); + } + + if (evtchnl->irq) + unbind_from_irqhandler(evtchnl->irq, evtchnl); + + if (evtchnl->port) + xenbus_free_evtchn(front_info->xb_dev, evtchnl->port); + + /* end access and free the page */ + if (evtchnl->gref != GRANT_INVALID_REF) + gnttab_end_foreign_access(evtchnl->gref, 0, page); + + memset(evtchnl, 0, sizeof(*evtchnl)); +} + +static int evtchnl_alloc(struct xen_drm_front_info *front_info, int index, + struct xen_drm_front_evtchnl *evtchnl, + enum xen_drm_front_evtchnl_type type) +{ + struct xenbus_device *xb_dev = front_info->xb_dev; + unsigned long page; + grant_ref_t gref; + irq_handler_t handler; + int ret; + + memset(evtchnl, 0, sizeof(*evtchnl)); + evtchnl->type = type; + evtchnl->index = index; + evtchnl->front_info = front_info; + evtchnl->state = EVTCHNL_STATE_DISCONNECTED; + evtchnl->gref = GRANT_INVALID_REF; + + page = get_zeroed_page(GFP_NOIO | __GFP_HIGH); + if (!page) { + ret = -ENOMEM; + goto fail; + } + + if (type == EVTCHNL_TYPE_REQ) { + struct xen_displif_sring *sring; + + init_completion(&evtchnl->u.req.completion); + mutex_init(&evtchnl->u.req.req_io_lock); + sring = (struct xen_displif_sring *)page; + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&evtchnl->u.req.ring, sring, XEN_PAGE_SIZE); + + ret = xenbus_grant_ring(xb_dev, sring, 1, &gref); + if (ret < 0) { + evtchnl->u.req.ring.sring = NULL; + free_page(page); + goto fail; + } + + handler = evtchnl_interrupt_ctrl; + } else { + ret = gnttab_grant_foreign_access(xb_dev->otherend_id, + virt_to_gfn((void *)page), 0); + if (ret < 0) { + free_page(page); + goto fail; + } + + evtchnl->u.evt.page = (struct xendispl_event_page *)page; + gref = ret; + handler = evtchnl_interrupt_evt; + } + evtchnl->gref = gref; + + ret = xenbus_alloc_evtchn(xb_dev, &evtchnl->port); + if (ret < 0) + goto fail; + + ret = bind_evtchn_to_irqhandler(evtchnl->port, + handler, 0, xb_dev->devicetype, + evtchnl); + if (ret < 0) + goto fail; + + evtchnl->irq = ret; + return 0; + +fail: + DRM_ERROR("Failed to allocate ring: %d\n", ret); + return ret; +} + +int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info) +{ + struct xen_drm_front_cfg *cfg; + int ret, conn; + + cfg = &front_info->cfg; + + front_info->evt_pairs = + kcalloc(cfg->num_connectors, + sizeof(struct xen_drm_front_evtchnl_pair), + GFP_KERNEL); + if (!front_info->evt_pairs) { + ret = -ENOMEM; + goto fail; + } + + for (conn = 0; conn < cfg->num_connectors; conn++) { + ret = evtchnl_alloc(front_info, conn, + &front_info->evt_pairs[conn].req, + EVTCHNL_TYPE_REQ); + if (ret < 0) { + DRM_ERROR("Error allocating control channel\n"); + goto fail; + } + + ret = evtchnl_alloc(front_info, conn, + &front_info->evt_pairs[conn].evt, + EVTCHNL_TYPE_EVT); + if (ret < 0) { + DRM_ERROR("Error allocating in-event channel\n"); + goto fail; + } + } + front_info->num_evt_pairs = cfg->num_connectors; + return 0; + +fail: + xen_drm_front_evtchnl_free_all(front_info); + return ret; +} + +static int evtchnl_publish(struct xenbus_transaction xbt, + struct xen_drm_front_evtchnl *evtchnl, + const char *path, const char *node_ring, + const char *node_chnl) +{ + struct xenbus_device *xb_dev = evtchnl->front_info->xb_dev; + int ret; + + /* write control channel ring reference */ + ret = xenbus_printf(xbt, path, node_ring, "%u", evtchnl->gref); + if (ret < 0) { + xenbus_dev_error(xb_dev, ret, "writing ring-ref"); + return ret; + } + + /* write event channel ring reference */ + ret = xenbus_printf(xbt, path, node_chnl, "%u", evtchnl->port); + if (ret < 0) { + xenbus_dev_error(xb_dev, ret, "writing event channel"); + return ret; + } + + return 0; +} + +int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info) +{ + struct xenbus_transaction xbt; + struct xen_drm_front_cfg *plat_data; + int ret, conn; + + plat_data = &front_info->cfg; + +again: + ret = xenbus_transaction_start(&xbt); + if (ret < 0) { + xenbus_dev_fatal(front_info->xb_dev, ret, + "starting transaction"); + return ret; + } + + for (conn = 0; conn < plat_data->num_connectors; conn++) { + ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].req, + plat_data->connectors[conn].xenstore_path, + XENDISPL_FIELD_REQ_RING_REF, + XENDISPL_FIELD_REQ_CHANNEL); + if (ret < 0) + goto fail; + + ret = evtchnl_publish(xbt, &front_info->evt_pairs[conn].evt, + plat_data->connectors[conn].xenstore_path, + XENDISPL_FIELD_EVT_RING_REF, + XENDISPL_FIELD_EVT_CHANNEL); + if (ret < 0) + goto fail; + } + + ret = xenbus_transaction_end(xbt, 0); + if (ret < 0) { + if (ret == -EAGAIN) + goto again; + + xenbus_dev_fatal(front_info->xb_dev, ret, + "completing transaction"); + goto fail_to_end; + } + + return 0; + +fail: + xenbus_transaction_end(xbt, 1); + +fail_to_end: + xenbus_dev_fatal(front_info->xb_dev, ret, "writing Xen store"); + return ret; +} + +void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl) +{ + int notify; + + evtchnl->u.req.ring.req_prod_pvt++; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&evtchnl->u.req.ring, notify); + if (notify) + notify_remote_via_irq(evtchnl->irq); +} + +void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info, + enum xen_drm_front_evtchnl_state state) +{ + unsigned long flags; + int i; + + if (!front_info->evt_pairs) + return; + + spin_lock_irqsave(&front_info->io_lock, flags); + for (i = 0; i < front_info->num_evt_pairs; i++) { + front_info->evt_pairs[i].req.state = state; + front_info->evt_pairs[i].evt.state = state; + } + spin_unlock_irqrestore(&front_info->io_lock, flags); +} + +void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info) +{ + int i; + + if (!front_info->evt_pairs) + return; + + for (i = 0; i < front_info->num_evt_pairs; i++) { + evtchnl_free(front_info, &front_info->evt_pairs[i].req); + evtchnl_free(front_info, &front_info->evt_pairs[i].evt); + } + + kfree(front_info->evt_pairs); + front_info->evt_pairs = NULL; +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h new file mode 100644 index 0000000..b0af699 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_evtchnl.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_EVTCHNL_H_ +#define __XEN_DRM_FRONT_EVTCHNL_H_ + +#include <linux/completion.h> +#include <linux/types.h> + +#include <xen/interface/io/ring.h> +#include <xen/interface/io/displif.h> + +/* + * All operations which are not connector oriented use this ctrl event channel, + * e.g. fb_attach/destroy which belong to a DRM device, not to a CRTC. + */ +#define GENERIC_OP_EVT_CHNL 0 + +enum xen_drm_front_evtchnl_state { + EVTCHNL_STATE_DISCONNECTED, + EVTCHNL_STATE_CONNECTED, +}; + +enum xen_drm_front_evtchnl_type { + EVTCHNL_TYPE_REQ, + EVTCHNL_TYPE_EVT, +}; + +struct xen_drm_front_drm_info; + +struct xen_drm_front_evtchnl { + struct xen_drm_front_info *front_info; + int gref; + int port; + int irq; + int index; + enum xen_drm_front_evtchnl_state state; + enum xen_drm_front_evtchnl_type type; + /* either response id or incoming event id */ + u16 evt_id; + /* next request id or next expected event id */ + u16 evt_next_id; + union { + struct { + struct xen_displif_front_ring ring; + struct completion completion; + /* latest response status */ + int resp_status; + /* serializer for backend IO: request/response */ + struct mutex req_io_lock; + } req; + struct { + struct xendispl_event_page *page; + } evt; + } u; +}; + +struct xen_drm_front_evtchnl_pair { + struct xen_drm_front_evtchnl req; + struct xen_drm_front_evtchnl evt; +}; + +int xen_drm_front_evtchnl_create_all(struct xen_drm_front_info *front_info); + +int xen_drm_front_evtchnl_publish_all(struct xen_drm_front_info *front_info); + +void xen_drm_front_evtchnl_flush(struct xen_drm_front_evtchnl *evtchnl); + +void xen_drm_front_evtchnl_set_state(struct xen_drm_front_info *front_info, + enum xen_drm_front_evtchnl_state state); + +void xen_drm_front_evtchnl_free_all(struct xen_drm_front_info *front_info); + +#endif /* __XEN_DRM_FRONT_EVTCHNL_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c new file mode 100644 index 0000000..c85bfe7 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c @@ -0,0 +1,308 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include "xen_drm_front_gem.h" + +#include <drm/drmP.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_fb_helper.h> +#include <drm/drm_gem.h> + +#include <linux/dma-buf.h> +#include <linux/scatterlist.h> +#include <linux/shmem_fs.h> + +#include <xen/balloon.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_shbuf.h" + +struct xen_gem_object { + struct drm_gem_object base; + + size_t num_pages; + struct page **pages; + + /* set for buffers allocated by the backend */ + bool be_alloc; + + /* this is for imported PRIME buffer */ + struct sg_table *sgt_imported; +}; + +static inline struct xen_gem_object * +to_xen_gem_obj(struct drm_gem_object *gem_obj) +{ + return container_of(gem_obj, struct xen_gem_object, base); +} + +static int gem_alloc_pages_array(struct xen_gem_object *xen_obj, + size_t buf_size) +{ + xen_obj->num_pages = DIV_ROUND_UP(buf_size, PAGE_SIZE); + xen_obj->pages = kvmalloc_array(xen_obj->num_pages, + sizeof(struct page *), GFP_KERNEL); + return !xen_obj->pages ? -ENOMEM : 0; +} + +static void gem_free_pages_array(struct xen_gem_object *xen_obj) +{ + kvfree(xen_obj->pages); + xen_obj->pages = NULL; +} + +static struct xen_gem_object *gem_create_obj(struct drm_device *dev, + size_t size) +{ + struct xen_gem_object *xen_obj; + int ret; + + xen_obj = kzalloc(sizeof(*xen_obj), GFP_KERNEL); + if (!xen_obj) + return ERR_PTR(-ENOMEM); + + ret = drm_gem_object_init(dev, &xen_obj->base, size); + if (ret < 0) { + kfree(xen_obj); + return ERR_PTR(ret); + } + + return xen_obj; +} + +static struct xen_gem_object *gem_create(struct drm_device *dev, size_t size) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_gem_object *xen_obj; + int ret; + + size = round_up(size, PAGE_SIZE); + xen_obj = gem_create_obj(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return xen_obj; + + if (drm_info->front_info->cfg.be_alloc) { + /* + * backend will allocate space for this buffer, so + * only allocate array of pointers to pages + */ + ret = gem_alloc_pages_array(xen_obj, size); + if (ret < 0) + goto fail; + + /* + * allocate ballooned pages which will be used to map + * grant references provided by the backend + */ + ret = alloc_xenballooned_pages(xen_obj->num_pages, + xen_obj->pages); + if (ret < 0) { + DRM_ERROR("Cannot allocate %zu ballooned pages: %d\n", + xen_obj->num_pages, ret); + gem_free_pages_array(xen_obj); + goto fail; + } + + xen_obj->be_alloc = true; + return xen_obj; + } + /* + * need to allocate backing pages now, so we can share those + * with the backend + */ + xen_obj->num_pages = DIV_ROUND_UP(size, PAGE_SIZE); + xen_obj->pages = drm_gem_get_pages(&xen_obj->base); + if (IS_ERR_OR_NULL(xen_obj->pages)) { + ret = PTR_ERR(xen_obj->pages); + xen_obj->pages = NULL; + goto fail; + } + + return xen_obj; + +fail: + DRM_ERROR("Failed to allocate buffer with size %zu\n", size); + return ERR_PTR(ret); +} + +struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, + size_t size) +{ + struct xen_gem_object *xen_obj; + + xen_obj = gem_create(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return ERR_CAST(xen_obj); + + return &xen_obj->base; +} + +void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (xen_obj->base.import_attach) { + drm_prime_gem_destroy(&xen_obj->base, xen_obj->sgt_imported); + gem_free_pages_array(xen_obj); + } else { + if (xen_obj->pages) { + if (xen_obj->be_alloc) { + free_xenballooned_pages(xen_obj->num_pages, + xen_obj->pages); + gem_free_pages_array(xen_obj); + } else { + drm_gem_put_pages(&xen_obj->base, + xen_obj->pages, true, false); + } + } + } + drm_gem_object_release(gem_obj); + kfree(xen_obj); +} + +struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + return xen_obj->pages; +} + +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (!xen_obj->pages) + return NULL; + + return drm_prime_pages_to_sg(xen_obj->pages, xen_obj->num_pages); +} + +struct drm_gem_object * +xen_drm_front_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + struct xen_gem_object *xen_obj; + size_t size; + int ret; + + size = attach->dmabuf->size; + xen_obj = gem_create_obj(dev, size); + if (IS_ERR_OR_NULL(xen_obj)) + return ERR_CAST(xen_obj); + + ret = gem_alloc_pages_array(xen_obj, size); + if (ret < 0) + return ERR_PTR(ret); + + xen_obj->sgt_imported = sgt; + + ret = drm_prime_sg_to_page_addr_arrays(sgt, xen_obj->pages, + NULL, xen_obj->num_pages); + if (ret < 0) + return ERR_PTR(ret); + + ret = xen_drm_front_dbuf_create(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(&xen_obj->base), + 0, 0, 0, size, xen_obj->pages); + if (ret < 0) + return ERR_PTR(ret); + + DRM_DEBUG("Imported buffer of size %zu with nents %u\n", + size, sgt->nents); + + return &xen_obj->base; +} + +static int gem_mmap_obj(struct xen_gem_object *xen_obj, + struct vm_area_struct *vma) +{ + unsigned long addr = vma->vm_start; + int i; + + /* + * clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the + * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map + * the whole buffer. + */ + vma->vm_flags &= ~VM_PFNMAP; + vma->vm_flags |= VM_MIXEDMAP; + vma->vm_pgoff = 0; + vma->vm_page_prot = + pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); + + /* + * vm_operations_struct.fault handler will be called if CPU access + * to VM is here. For GPUs this isn't the case, because CPU + * doesn't touch the memory. Insert pages now, so both CPU and GPU are + * happy. + * FIXME: as we insert all the pages now then no .fault handler must + * be called, so don't provide one + */ + for (i = 0; i < xen_obj->num_pages; i++) { + int ret; + + ret = vm_insert_page(vma, addr, xen_obj->pages[i]); + if (ret < 0) { + DRM_ERROR("Failed to insert pages into vma: %d\n", ret); + return ret; + } + + addr += PAGE_SIZE; + } + return 0; +} + +int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct xen_gem_object *xen_obj; + struct drm_gem_object *gem_obj; + int ret; + + ret = drm_gem_mmap(filp, vma); + if (ret < 0) + return ret; + + gem_obj = vma->vm_private_data; + xen_obj = to_xen_gem_obj(gem_obj); + return gem_mmap_obj(xen_obj, vma); +} + +void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj) +{ + struct xen_gem_object *xen_obj = to_xen_gem_obj(gem_obj); + + if (!xen_obj->pages) + return NULL; + + return vmap(xen_obj->pages, xen_obj->num_pages, + VM_MAP, pgprot_writecombine(PAGE_KERNEL)); +} + +void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj, + void *vaddr) +{ + vunmap(vaddr); +} + +int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj, + struct vm_area_struct *vma) +{ + struct xen_gem_object *xen_obj; + int ret; + + ret = drm_gem_mmap_obj(gem_obj, gem_obj->size, vma); + if (ret < 0) + return ret; + + xen_obj = to_xen_gem_obj(gem_obj); + return gem_mmap_obj(xen_obj, vma); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.h b/drivers/gpu/drm/xen/xen_drm_front_gem.h new file mode 100644 index 0000000..d5ab734 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_gem.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_GEM_H +#define __XEN_DRM_FRONT_GEM_H + +#include <drm/drmP.h> + +struct drm_gem_object *xen_drm_front_gem_create(struct drm_device *dev, + size_t size); + +struct drm_gem_object * +xen_drm_front_gem_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sgt); + +struct sg_table *xen_drm_front_gem_get_sg_table(struct drm_gem_object *gem_obj); + +struct page **xen_drm_front_gem_get_pages(struct drm_gem_object *obj); + +void xen_drm_front_gem_free_object_unlocked(struct drm_gem_object *gem_obj); + +int xen_drm_front_gem_mmap(struct file *filp, struct vm_area_struct *vma); + +void *xen_drm_front_gem_prime_vmap(struct drm_gem_object *gem_obj); + +void xen_drm_front_gem_prime_vunmap(struct drm_gem_object *gem_obj, + void *vaddr); + +int xen_drm_front_gem_prime_mmap(struct drm_gem_object *gem_obj, + struct vm_area_struct *vma); + +#endif /* __XEN_DRM_FRONT_GEM_H */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.c b/drivers/gpu/drm/xen/xen_drm_front_kms.c new file mode 100644 index 0000000..a3479eb --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include "xen_drm_front_kms.h" + +#include <drm/drmP.h> +#include <drm/drm_atomic.h> +#include <drm/drm_atomic_helper.h> +#include <drm/drm_crtc_helper.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_framebuffer_helper.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_conn.h" + +/* + * Timeout in ms to wait for frame done event from the backend: + * must be a bit more than IO time-out + */ +#define FRAME_DONE_TO_MS (XEN_DRM_FRONT_WAIT_BACK_MS + 100) + +static struct xen_drm_front_drm_pipeline * +to_xen_drm_pipeline(struct drm_simple_display_pipe *pipe) +{ + return container_of(pipe, struct xen_drm_front_drm_pipeline, pipe); +} + +static void fb_destroy(struct drm_framebuffer *fb) +{ + struct xen_drm_front_drm_info *drm_info = fb->dev->dev_private; + int idx; + + if (drm_dev_enter(fb->dev, &idx)) { + xen_drm_front_fb_detach(drm_info->front_info, + xen_drm_front_fb_to_cookie(fb)); + drm_dev_exit(idx); + } + drm_gem_fb_destroy(fb); +} + +static struct drm_framebuffer_funcs fb_funcs = { + .destroy = fb_destroy, +}; + +static struct drm_framebuffer * +fb_create(struct drm_device *dev, struct drm_file *filp, + const struct drm_mode_fb_cmd2 *mode_cmd) +{ + struct xen_drm_front_drm_info *drm_info = dev->dev_private; + static struct drm_framebuffer *fb; + struct drm_gem_object *gem_obj; + int ret; + + fb = drm_gem_fb_create_with_funcs(dev, filp, mode_cmd, &fb_funcs); + if (IS_ERR_OR_NULL(fb)) + return fb; + + gem_obj = drm_gem_object_lookup(filp, mode_cmd->handles[0]); + if (!gem_obj) { + DRM_ERROR("Failed to lookup GEM object\n"); + ret = -ENOENT; + goto fail; + } + + drm_gem_object_put_unlocked(gem_obj); + + ret = xen_drm_front_fb_attach(drm_info->front_info, + xen_drm_front_dbuf_to_cookie(gem_obj), + xen_drm_front_fb_to_cookie(fb), + fb->width, fb->height, + fb->format->format); + if (ret < 0) { + DRM_ERROR("Back failed to attach FB %p: %d\n", fb, ret); + goto fail; + } + + return fb; + +fail: + drm_gem_fb_destroy(fb); + return ERR_PTR(ret); +} + +static const struct drm_mode_config_funcs mode_config_funcs = { + .fb_create = fb_create, + .atomic_check = drm_atomic_helper_check, + .atomic_commit = drm_atomic_helper_commit, +}; + +static void send_pending_event(struct xen_drm_front_drm_pipeline *pipeline) +{ + struct drm_crtc *crtc = &pipeline->pipe.crtc; + struct drm_device *dev = crtc->dev; + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + if (pipeline->pending_event) + drm_crtc_send_vblank_event(crtc, pipeline->pending_event); + pipeline->pending_event = NULL; + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +static void display_enable(struct drm_simple_display_pipe *pipe, + struct drm_crtc_state *crtc_state, + struct drm_plane_state *plane_state) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct drm_crtc *crtc = &pipe->crtc; + struct drm_framebuffer *fb = plane_state->fb; + int ret, idx; + + if (!drm_dev_enter(pipe->crtc.dev, &idx)) + return; + + ret = xen_drm_front_mode_set(pipeline, crtc->x, crtc->y, + fb->width, fb->height, + fb->format->cpp[0] * 8, + xen_drm_front_fb_to_cookie(fb)); + + if (ret) { + DRM_ERROR("Failed to enable display: %d\n", ret); + pipeline->conn_connected = false; + } + + drm_dev_exit(idx); +} + +static void display_disable(struct drm_simple_display_pipe *pipe) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + int ret = 0, idx; + + if (drm_dev_enter(pipe->crtc.dev, &idx)) { + ret = xen_drm_front_mode_set(pipeline, 0, 0, 0, 0, 0, + xen_drm_front_fb_to_cookie(NULL)); + drm_dev_exit(idx); + } + if (ret) + DRM_ERROR("Failed to disable display: %d\n", ret); + + /* Make sure we can restart with enabled connector next time */ + pipeline->conn_connected = true; + + /* release stalled event if any */ + send_pending_event(pipeline); +} + +void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline, + u64 fb_cookie) +{ + /* + * This runs in interrupt context, e.g. under + * drm_info->front_info->io_lock, so we cannot call _sync version + * to cancel the work + */ + cancel_delayed_work(&pipeline->pflip_to_worker); + + send_pending_event(pipeline); +} + +static void pflip_to_worker(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct xen_drm_front_drm_pipeline *pipeline = + container_of(delayed_work, + struct xen_drm_front_drm_pipeline, + pflip_to_worker); + + DRM_ERROR("Frame done timed-out, releasing"); + send_pending_event(pipeline); +} + +static bool display_send_page_flip(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_plane_state) +{ + struct drm_plane_state *plane_state = + drm_atomic_get_new_plane_state(old_plane_state->state, + &pipe->plane); + + /* + * If old_plane_state->fb is NULL and plane_state->fb is not, + * then this is an atomic commit which will enable display. + * If old_plane_state->fb is not NULL and plane_state->fb is, + * then this is an atomic commit which will disable display. + * Ignore these and do not send page flip as this framebuffer will be + * sent to the backend as a part of display_set_config call. + */ + if (old_plane_state->fb && plane_state->fb) { + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct xen_drm_front_drm_info *drm_info = pipeline->drm_info; + int ret; + + schedule_delayed_work(&pipeline->pflip_to_worker, + msecs_to_jiffies(FRAME_DONE_TO_MS)); + + ret = xen_drm_front_page_flip(drm_info->front_info, + pipeline->index, + xen_drm_front_fb_to_cookie(plane_state->fb)); + if (ret) { + DRM_ERROR("Failed to send page flip request to backend: %d\n", ret); + + pipeline->conn_connected = false; + /* + * Report the flip not handled, so pending event is + * sent, unblocking user-space. + */ + return false; + } + /* + * Signal that page flip was handled, pending event will be sent + * on frame done event from the backend. + */ + return true; + } + + return false; +} + +static void display_update(struct drm_simple_display_pipe *pipe, + struct drm_plane_state *old_plane_state) +{ + struct xen_drm_front_drm_pipeline *pipeline = + to_xen_drm_pipeline(pipe); + struct drm_crtc *crtc = &pipe->crtc; + struct drm_pending_vblank_event *event; + int idx; + + event = crtc->state->event; + if (event) { + struct drm_device *dev = crtc->dev; + unsigned long flags; + + WARN_ON(pipeline->pending_event); + + spin_lock_irqsave(&dev->event_lock, flags); + crtc->state->event = NULL; + + pipeline->pending_event = event; + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + if (!drm_dev_enter(pipe->crtc.dev, &idx)) { + send_pending_event(pipeline); + return; + } + + /* + * Send page flip request to the backend *after* we have event cached + * above, so on page flip done event from the backend we can + * deliver it and there is no race condition between this code and + * event from the backend. + * If this is not a page flip, e.g. no flip done event from the backend + * is expected, then send now. + */ + if (!display_send_page_flip(pipe, old_plane_state)) + send_pending_event(pipeline); + + drm_dev_exit(idx); +} + +static enum drm_mode_status +display_mode_valid(struct drm_crtc *crtc, const struct drm_display_mode *mode) +{ + struct xen_drm_front_drm_pipeline *pipeline = + container_of(crtc, struct xen_drm_front_drm_pipeline, + pipe.crtc); + + if (mode->hdisplay != pipeline->width) + return MODE_ERROR; + + if (mode->vdisplay != pipeline->height) + return MODE_ERROR; + + return MODE_OK; +} + +static const struct drm_simple_display_pipe_funcs display_funcs = { + .mode_valid = display_mode_valid, + .enable = display_enable, + .disable = display_disable, + .prepare_fb = drm_gem_fb_simple_display_pipe_prepare_fb, + .update = display_update, +}; + +static int display_pipe_init(struct xen_drm_front_drm_info *drm_info, + int index, struct xen_drm_front_cfg_connector *cfg, + struct xen_drm_front_drm_pipeline *pipeline) +{ + struct drm_device *dev = drm_info->drm_dev; + const u32 *formats; + int format_count; + int ret; + + pipeline->drm_info = drm_info; + pipeline->index = index; + pipeline->height = cfg->height; + pipeline->width = cfg->width; + + INIT_DELAYED_WORK(&pipeline->pflip_to_worker, pflip_to_worker); + + ret = xen_drm_front_conn_init(drm_info, &pipeline->conn); + if (ret) + return ret; + + formats = xen_drm_front_conn_get_formats(&format_count); + + return drm_simple_display_pipe_init(dev, &pipeline->pipe, + &display_funcs, formats, + format_count, NULL, + &pipeline->conn); +} + +int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info) +{ + struct drm_device *dev = drm_info->drm_dev; + int i, ret; + + drm_mode_config_init(dev); + + dev->mode_config.min_width = 0; + dev->mode_config.min_height = 0; + dev->mode_config.max_width = 4095; + dev->mode_config.max_height = 2047; + dev->mode_config.funcs = &mode_config_funcs; + + for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) { + struct xen_drm_front_cfg_connector *cfg = + &drm_info->front_info->cfg.connectors[i]; + struct xen_drm_front_drm_pipeline *pipeline = + &drm_info->pipeline[i]; + + ret = display_pipe_init(drm_info, i, cfg, pipeline); + if (ret) { + drm_mode_config_cleanup(dev); + return ret; + } + } + + drm_mode_config_reset(dev); + drm_kms_helper_poll_init(dev); + return 0; +} + +void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info) +{ + int i; + + for (i = 0; i < drm_info->front_info->cfg.num_connectors; i++) { + struct xen_drm_front_drm_pipeline *pipeline = + &drm_info->pipeline[i]; + + cancel_delayed_work_sync(&pipeline->pflip_to_worker); + + send_pending_event(pipeline); + } +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_kms.h b/drivers/gpu/drm/xen/xen_drm_front_kms.h new file mode 100644 index 0000000..ab2fbad --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_kms.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_KMS_H_ +#define __XEN_DRM_FRONT_KMS_H_ + +#include <linux/types.h> + +struct xen_drm_front_drm_info; +struct xen_drm_front_drm_pipeline; + +int xen_drm_front_kms_init(struct xen_drm_front_drm_info *drm_info); + +void xen_drm_front_kms_fini(struct xen_drm_front_drm_info *drm_info); + +void xen_drm_front_kms_on_frame_done(struct xen_drm_front_drm_pipeline *pipeline, + u64 fb_cookie); + +#endif /* __XEN_DRM_FRONT_KMS_H_ */ diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c new file mode 100644 index 0000000..8099cb3 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c @@ -0,0 +1,414 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#include <drm/drmP.h> + +#if defined(CONFIG_X86) +#include <drm/drm_cache.h> +#endif +#include <linux/errno.h> +#include <linux/mm.h> + +#include <asm/xen/hypervisor.h> +#include <xen/balloon.h> +#include <xen/xen.h> +#include <xen/xenbus.h> +#include <xen/interface/io/ring.h> +#include <xen/interface/io/displif.h> + +#include "xen_drm_front.h" +#include "xen_drm_front_shbuf.h" + +struct xen_drm_front_shbuf_ops { + /* + * Calculate number of grefs required to handle this buffer, + * e.g. if grefs are required for page directory only or the buffer + * pages as well. + */ + void (*calc_num_grefs)(struct xen_drm_front_shbuf *buf); + /* Fill page directory according to para-virtual display protocol. */ + void (*fill_page_dir)(struct xen_drm_front_shbuf *buf); + /* Claim grant references for the pages of the buffer. */ + int (*grant_refs_for_buffer)(struct xen_drm_front_shbuf *buf, + grant_ref_t *priv_gref_head, int gref_idx); + /* Map grant references of the buffer. */ + int (*map)(struct xen_drm_front_shbuf *buf); + /* Unmap grant references of the buffer. */ + int (*unmap)(struct xen_drm_front_shbuf *buf); +}; + +grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf) +{ + if (!buf->grefs) + return GRANT_INVALID_REF; + + return buf->grefs[0]; +} + +int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf) +{ + if (buf->ops->map) + return buf->ops->map(buf); + + /* no need to map own grant references */ + return 0; +} + +int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf) +{ + if (buf->ops->unmap) + return buf->ops->unmap(buf); + + /* no need to unmap own grant references */ + return 0; +} + +void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf) +{ +#if defined(CONFIG_X86) + drm_clflush_pages(buf->pages, buf->num_pages); +#endif +} + +void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf) +{ + if (buf->grefs) { + int i; + + for (i = 0; i < buf->num_grefs; i++) + if (buf->grefs[i] != GRANT_INVALID_REF) + gnttab_end_foreign_access(buf->grefs[i], + 0, 0UL); + } + kfree(buf->grefs); + kfree(buf->directory); + kfree(buf); +} + +/* + * number of grefs a page can hold with respect to the + * struct xendispl_page_directory header + */ +#define XEN_DRM_NUM_GREFS_PER_PAGE ((PAGE_SIZE - \ + offsetof(struct xendispl_page_directory, gref)) / \ + sizeof(grant_ref_t)) + +static int get_num_pages_dir(struct xen_drm_front_shbuf *buf) +{ + /* number of pages the page directory consumes itself */ + return DIV_ROUND_UP(buf->num_pages, XEN_DRM_NUM_GREFS_PER_PAGE); +} + +static void backend_calc_num_grefs(struct xen_drm_front_shbuf *buf) +{ + /* only for pages the page directory consumes itself */ + buf->num_grefs = get_num_pages_dir(buf); +} + +static void guest_calc_num_grefs(struct xen_drm_front_shbuf *buf) +{ + /* + * number of pages the page directory consumes itself + * plus grefs for the buffer pages + */ + buf->num_grefs = get_num_pages_dir(buf) + buf->num_pages; +} + +#define xen_page_to_vaddr(page) \ + ((phys_addr_t)pfn_to_kaddr(page_to_xen_pfn(page))) + +static int backend_unmap(struct xen_drm_front_shbuf *buf) +{ + struct gnttab_unmap_grant_ref *unmap_ops; + int i, ret; + + if (!buf->pages || !buf->backend_map_handles || !buf->grefs) + return 0; + + unmap_ops = kcalloc(buf->num_pages, sizeof(*unmap_ops), + GFP_KERNEL); + if (!unmap_ops) { + DRM_ERROR("Failed to get memory while unmapping\n"); + return -ENOMEM; + } + + for (i = 0; i < buf->num_pages; i++) { + phys_addr_t addr; + + addr = xen_page_to_vaddr(buf->pages[i]); + gnttab_set_unmap_op(&unmap_ops[i], addr, GNTMAP_host_map, + buf->backend_map_handles[i]); + } + + ret = gnttab_unmap_refs(unmap_ops, NULL, buf->pages, + buf->num_pages); + + for (i = 0; i < buf->num_pages; i++) { + if (unlikely(unmap_ops[i].status != GNTST_okay)) + DRM_ERROR("Failed to unmap page %d: %d\n", + i, unmap_ops[i].status); + } + + if (ret) + DRM_ERROR("Failed to unmap grant references, ret %d", ret); + + kfree(unmap_ops); + kfree(buf->backend_map_handles); + buf->backend_map_handles = NULL; + return ret; +} + +static int backend_map(struct xen_drm_front_shbuf *buf) +{ + struct gnttab_map_grant_ref *map_ops = NULL; + unsigned char *ptr; + int ret, cur_gref, cur_dir_page, cur_page, grefs_left; + + map_ops = kcalloc(buf->num_pages, sizeof(*map_ops), GFP_KERNEL); + if (!map_ops) + return -ENOMEM; + + buf->backend_map_handles = kcalloc(buf->num_pages, + sizeof(*buf->backend_map_handles), + GFP_KERNEL); + if (!buf->backend_map_handles) { + kfree(map_ops); + return -ENOMEM; + } + + /* + * read page directory to get grefs from the backend: for external + * buffer we only allocate buf->grefs for the page directory, + * so buf->num_grefs has number of pages in the page directory itself + */ + ptr = buf->directory; + grefs_left = buf->num_pages; + cur_page = 0; + for (cur_dir_page = 0; cur_dir_page < buf->num_grefs; cur_dir_page++) { + struct xendispl_page_directory *page_dir = + (struct xendispl_page_directory *)ptr; + int to_copy = XEN_DRM_NUM_GREFS_PER_PAGE; + + if (to_copy > grefs_left) + to_copy = grefs_left; + + for (cur_gref = 0; cur_gref < to_copy; cur_gref++) { + phys_addr_t addr; + + addr = xen_page_to_vaddr(buf->pages[cur_page]); + gnttab_set_map_op(&map_ops[cur_page], addr, + GNTMAP_host_map, + page_dir->gref[cur_gref], + buf->xb_dev->otherend_id); + cur_page++; + } + + grefs_left -= to_copy; + ptr += PAGE_SIZE; + } + ret = gnttab_map_refs(map_ops, NULL, buf->pages, buf->num_pages); + + /* save handles even if error, so we can unmap */ + for (cur_page = 0; cur_page < buf->num_pages; cur_page++) { + buf->backend_map_handles[cur_page] = map_ops[cur_page].handle; + if (unlikely(map_ops[cur_page].status != GNTST_okay)) + DRM_ERROR("Failed to map page %d: %d\n", + cur_page, map_ops[cur_page].status); + } + + if (ret) { + DRM_ERROR("Failed to map grant references, ret %d", ret); + backend_unmap(buf); + } + + kfree(map_ops); + return ret; +} + +static void backend_fill_page_dir(struct xen_drm_front_shbuf *buf) +{ + struct xendispl_page_directory *page_dir; + unsigned char *ptr; + int i, num_pages_dir; + + ptr = buf->directory; + num_pages_dir = get_num_pages_dir(buf); + + /* fill only grefs for the page directory itself */ + for (i = 0; i < num_pages_dir - 1; i++) { + page_dir = (struct xendispl_page_directory *)ptr; + + page_dir->gref_dir_next_page = buf->grefs[i + 1]; + ptr += PAGE_SIZE; + } + /* last page must say there is no more pages */ + page_dir = (struct xendispl_page_directory *)ptr; + page_dir->gref_dir_next_page = GRANT_INVALID_REF; +} + +static void guest_fill_page_dir(struct xen_drm_front_shbuf *buf) +{ + unsigned char *ptr; + int cur_gref, grefs_left, to_copy, i, num_pages_dir; + + ptr = buf->directory; + num_pages_dir = get_num_pages_dir(buf); + + /* + * while copying, skip grefs at start, they are for pages + * granted for the page directory itself + */ + cur_gref = num_pages_dir; + grefs_left = buf->num_pages; + for (i = 0; i < num_pages_dir; i++) { + struct xendispl_page_directory *page_dir = + (struct xendispl_page_directory *)ptr; + + if (grefs_left <= XEN_DRM_NUM_GREFS_PER_PAGE) { + to_copy = grefs_left; + page_dir->gref_dir_next_page = GRANT_INVALID_REF; + } else { + to_copy = XEN_DRM_NUM_GREFS_PER_PAGE; + page_dir->gref_dir_next_page = buf->grefs[i + 1]; + } + memcpy(&page_dir->gref, &buf->grefs[cur_gref], + to_copy * sizeof(grant_ref_t)); + ptr += PAGE_SIZE; + grefs_left -= to_copy; + cur_gref += to_copy; + } +} + +static int guest_grant_refs_for_buffer(struct xen_drm_front_shbuf *buf, + grant_ref_t *priv_gref_head, + int gref_idx) +{ + int i, cur_ref, otherend_id; + + otherend_id = buf->xb_dev->otherend_id; + for (i = 0; i < buf->num_pages; i++) { + cur_ref = gnttab_claim_grant_reference(priv_gref_head); + if (cur_ref < 0) + return cur_ref; + + gnttab_grant_foreign_access_ref(cur_ref, otherend_id, + xen_page_to_gfn(buf->pages[i]), + 0); + buf->grefs[gref_idx++] = cur_ref; + } + return 0; +} + +static int grant_references(struct xen_drm_front_shbuf *buf) +{ + grant_ref_t priv_gref_head; + int ret, i, j, cur_ref; + int otherend_id, num_pages_dir; + + ret = gnttab_alloc_grant_references(buf->num_grefs, &priv_gref_head); + if (ret < 0) { + DRM_ERROR("Cannot allocate grant references\n"); + return ret; + } + + otherend_id = buf->xb_dev->otherend_id; + j = 0; + num_pages_dir = get_num_pages_dir(buf); + for (i = 0; i < num_pages_dir; i++) { + unsigned long frame; + + cur_ref = gnttab_claim_grant_reference(&priv_gref_head); + if (cur_ref < 0) + return cur_ref; + + frame = xen_page_to_gfn(virt_to_page(buf->directory + + PAGE_SIZE * i)); + gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0); + buf->grefs[j++] = cur_ref; + } + + if (buf->ops->grant_refs_for_buffer) { + ret = buf->ops->grant_refs_for_buffer(buf, &priv_gref_head, j); + if (ret) + return ret; + } + + gnttab_free_grant_references(priv_gref_head); + return 0; +} + +static int alloc_storage(struct xen_drm_front_shbuf *buf) +{ + buf->grefs = kcalloc(buf->num_grefs, sizeof(*buf->grefs), GFP_KERNEL); + if (!buf->grefs) + return -ENOMEM; + + buf->directory = kcalloc(get_num_pages_dir(buf), PAGE_SIZE, GFP_KERNEL); + if (!buf->directory) + return -ENOMEM; + + return 0; +} + +/* + * For be allocated buffers we don't need grant_refs_for_buffer as those + * grant references are allocated at backend side + */ +static const struct xen_drm_front_shbuf_ops backend_ops = { + .calc_num_grefs = backend_calc_num_grefs, + .fill_page_dir = backend_fill_page_dir, + .map = backend_map, + .unmap = backend_unmap +}; + +/* For locally granted references we do not need to map/unmap the references */ +static const struct xen_drm_front_shbuf_ops local_ops = { + .calc_num_grefs = guest_calc_num_grefs, + .fill_page_dir = guest_fill_page_dir, + .grant_refs_for_buffer = guest_grant_refs_for_buffer, +}; + +struct xen_drm_front_shbuf * +xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg) +{ + struct xen_drm_front_shbuf *buf; + int ret; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + if (cfg->be_alloc) + buf->ops = &backend_ops; + else + buf->ops = &local_ops; + + buf->xb_dev = cfg->xb_dev; + buf->num_pages = DIV_ROUND_UP(cfg->size, PAGE_SIZE); + buf->pages = cfg->pages; + + buf->ops->calc_num_grefs(buf); + + ret = alloc_storage(buf); + if (ret) + goto fail; + + ret = grant_references(buf); + if (ret) + goto fail; + + buf->ops->fill_page_dir(buf); + + return buf; + +fail: + xen_drm_front_shbuf_free(buf); + return ERR_PTR(ret); +} diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.h b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h new file mode 100644 index 0000000..7545c69 --- /dev/null +++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ + +/* + * Xen para-virtual DRM device + * + * Copyright (C) 2016-2018 EPAM Systems Inc. + * + * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com> + */ + +#ifndef __XEN_DRM_FRONT_SHBUF_H_ +#define __XEN_DRM_FRONT_SHBUF_H_ + +#include <linux/kernel.h> +#include <linux/scatterlist.h> + +#include <xen/grant_table.h> + +struct xen_drm_front_shbuf { + /* + * number of references granted for the backend use: + * - for allocated/imported dma-buf's this holds number of grant + * references for the page directory and pages of the buffer + * - for the buffer provided by the backend this holds number of + * grant references for the page directory as grant references for + * the buffer will be provided by the backend + */ + int num_grefs; + grant_ref_t *grefs; + unsigned char *directory; + + int num_pages; + struct page **pages; + + struct xenbus_device *xb_dev; + + /* these are the ops used internally depending on be_alloc mode */ + const struct xen_drm_front_shbuf_ops *ops; + + /* Xen map handles for the buffer allocated by the backend */ + grant_handle_t *backend_map_handles; +}; + +struct xen_drm_front_shbuf_cfg { + struct xenbus_device *xb_dev; + size_t size; + struct page **pages; + bool be_alloc; +}; + +struct xen_drm_front_shbuf * +xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg); + +grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf); + +int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf); + +int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf); + +void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf); + +void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf); + +#endif /* __XEN_DRM_FRONT_SHBUF_H_ */ diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c index 94545ad..d1931f5 100644 --- a/drivers/gpu/drm/zte/zx_plane.c +++ b/drivers/gpu/drm/zte/zx_plane.c @@ -268,7 +268,7 @@ static void zx_plane_atomic_disable(struct drm_plane *plane, struct zx_plane *zplane = to_zx_plane(plane); void __iomem *hbsc = zplane->hbsc; - zx_vou_layer_disable(plane); + zx_vou_layer_disable(plane, old_state); /* Disable HBSC block */ zx_writel_mask(hbsc + HBSC_CTRL0, HBSC_CTRL_EN, 0); diff --git a/drivers/gpu/drm/zte/zx_vou.c b/drivers/gpu/drm/zte/zx_vou.c index 7491813..442311d 100644 --- a/drivers/gpu/drm/zte/zx_vou.c +++ b/drivers/gpu/drm/zte/zx_vou.c @@ -627,9 +627,10 @@ void zx_vou_layer_enable(struct drm_plane *plane) zx_writel_mask(vou->osd + OSD_CTRL0, bits->enable, bits->enable); } -void zx_vou_layer_disable(struct drm_plane *plane) +void zx_vou_layer_disable(struct drm_plane *plane, + struct drm_plane_state *old_state) { - struct zx_crtc *zcrtc = to_zx_crtc(plane->crtc); + struct zx_crtc *zcrtc = to_zx_crtc(old_state->crtc); struct zx_vou_hw *vou = zcrtc->vou; struct zx_plane *zplane = to_zx_plane(plane); const struct vou_layer_bits *bits = zplane->bits; diff --git a/drivers/gpu/drm/zte/zx_vou.h b/drivers/gpu/drm/zte/zx_vou.h index 97d72bf..5b7f84f 100644 --- a/drivers/gpu/drm/zte/zx_vou.h +++ b/drivers/gpu/drm/zte/zx_vou.h @@ -62,6 +62,7 @@ void zx_vou_config_dividers(struct drm_crtc *crtc, struct vou_div_config *configs, int num); void zx_vou_layer_enable(struct drm_plane *plane); -void zx_vou_layer_disable(struct drm_plane *plane); +void zx_vou_layer_disable(struct drm_plane *plane, + struct drm_plane_state *old_state); #endif /* __ZX_VOU_H__ */ |