summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dma-buf/sync_debug.h10
-rw-r--r--drivers/gpu/drm/Kconfig11
-rw-r--r--drivers/gpu/drm/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c1043
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c572
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c111
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15d.h5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Makefile10
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_regs.h3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h560
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm274
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm1214
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_chardev.c52
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c11
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c131
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c114
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c84
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c65
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_events.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c119
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c92
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c39
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c9
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c340
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c319
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_module.c7
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c3
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c443
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c2
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c392
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h583
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h112
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process.c50
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c22
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_queue.c8
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.h1
-rw-r--r--drivers/gpu/drm/amd/amdkfd/soc15_int.h47
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h26
-rw-r--r--drivers/gpu/drm/amd/include/v9_structs.h48
-rw-r--r--drivers/gpu/drm/bridge/adv7511/Kconfig2
-rw-r--r--drivers/gpu/drm/bridge/adv7511/adv7511_drv.c2
-rw-r--r--drivers/gpu/drm/bridge/dumb-vga-dac.c4
-rw-r--r--drivers/gpu/drm/drm_atomic.c4
-rw-r--r--drivers/gpu/drm/drm_atomic_helper.c2
-rw-r--r--drivers/gpu/drm/drm_connector.c50
-rw-r--r--drivers/gpu/drm/drm_crtc.c9
-rw-r--r--drivers/gpu/drm/drm_dp_helper.c22
-rw-r--r--drivers/gpu/drm/drm_drv.c10
-rw-r--r--drivers/gpu/drm/drm_edid.c41
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c12
-rw-r--r--drivers/gpu/drm/drm_framebuffer.c3
-rw-r--r--drivers/gpu/drm/drm_ioc32.c4
-rw-r--r--drivers/gpu/drm/drm_ioctl.c85
-rw-r--r--drivers/gpu/drm/drm_modes.c179
-rw-r--r--drivers/gpu/drm/drm_panel_orientation_quirks.c7
-rw-r--r--drivers/gpu/drm/drm_prime.c8
-rw-r--r--drivers/gpu/drm/drm_property.c27
-rw-r--r--drivers/gpu/drm/drm_rect.c74
-rw-r--r--drivers/gpu/drm/drm_sysfs.c4
-rw-r--r--drivers/gpu/drm/exynos/Kconfig18
-rw-r--r--drivers/gpu/drm/exynos/Makefile2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.c35
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_drv.h10
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_dsi.c40
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.c1080
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimc.h23
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_fimd.c8
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.c21
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gem.h3
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c1075
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.h24
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.c916
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_ipp.h175
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_rotator.c758
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_scaler.c694
-rw-r--r--drivers/gpu/drm/exynos/exynos_hdmi.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_mixer.c22
-rw-r--r--drivers/gpu/drm/exynos/regs-mixer.h1
-rw-r--r--drivers/gpu/drm/exynos/regs-scaler.h426
-rw-r--r--drivers/gpu/drm/gma500/cdv_device.c4
-rw-r--r--drivers/gpu/drm/gma500/psb_intel_sdvo.c2
-rw-r--r--drivers/gpu/drm/i915/Kconfig.debug2
-rw-r--r--drivers/gpu/drm/i915/i915_drv.c8
-rw-r--r--drivers/gpu/drm/i915/intel_lrc.c1
-rw-r--r--drivers/gpu/drm/i915/intel_sdvo.c5
-rw-r--r--drivers/gpu/drm/i915/intel_sprite.c146
-rw-r--r--drivers/gpu/drm/mediatek/Kconfig1
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dpi.c60
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_gem.c2
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c14
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c1
-rw-r--r--drivers/gpu/drm/msm/msm_debugfs.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_display.c4
-rw-r--r--drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c14
-rw-r--r--drivers/gpu/drm/pl111/Makefile1
-rw-r--r--drivers/gpu/drm/pl111/pl111_drm.h1
-rw-r--r--drivers/gpu/drm/pl111/pl111_drv.c34
-rw-r--r--drivers/gpu/drm/pl111/pl111_versatile.c56
-rw-r--r--drivers/gpu/drm/pl111/pl111_vexpress.c134
-rw-r--r--drivers/gpu/drm/pl111/pl111_vexpress.h29
-rw-r--r--drivers/gpu/drm/qxl/qxl_cmd.c36
-rw-r--r--drivers/gpu/drm/qxl/qxl_display.c276
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.h3
-rw-r--r--drivers/gpu/drm/qxl/qxl_fb.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_irq.c3
-rw-r--r--drivers/gpu/drm/qxl/qxl_ttm.c8
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.c26
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_crtc.h3
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.c51
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_drv.h4
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.c16
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_group.h2
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_kms.c25
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_of.c1
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_regs.h16
-rw-r--r--drivers/gpu/drm/rcar-du/rcar_du_vsp.c10
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c13
-rw-r--r--drivers/gpu/drm/selftests/Makefile2
-rw-r--r--drivers/gpu/drm/selftests/drm_helper_selftests.h9
-rw-r--r--drivers/gpu/drm/selftests/test-drm-helper.c247
-rw-r--r--drivers/gpu/drm/sti/sti_crtc.c2
-rw-r--r--drivers/gpu/drm/stm/ltdc.c53
-rw-r--r--drivers/gpu/drm/tilcdc/tilcdc_crtc.c2
-rw-r--r--drivers/gpu/drm/tinydrm/mi0283qt.c41
-rw-r--r--drivers/gpu/drm/v3d/Kconfig9
-rw-r--r--drivers/gpu/drm/v3d/Makefile18
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c389
-rw-r--r--drivers/gpu/drm/v3d/v3d_debugfs.c191
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c371
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h294
-rw-r--r--drivers/gpu/drm/v3d/v3d_fence.c58
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c668
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c206
-rw-r--r--drivers/gpu/drm/v3d/v3d_mmu.c122
-rw-r--r--drivers/gpu/drm/v3d/v3d_regs.h295
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c228
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace.h82
-rw-r--r--drivers/gpu/drm/v3d/v3d_trace_points.c9
-rw-r--r--drivers/gpu/drm/vc4/vc4_crtc.c46
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.c3
-rw-r--r--drivers/gpu/drm/vc4/vc4_drv.h1
-rw-r--r--drivers/gpu/drm/vc4/vc4_dsi.c5
-rw-r--r--drivers/gpu/drm/vc4/vc4_gem.c57
-rw-r--r--drivers/gpu/drm/vc4/vc4_v3d.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.c8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_fb.c31
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c14
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front.c10
-rw-r--r--drivers/gpu/drm/xen/xen_drm_front_shbuf.c2
-rw-r--r--drivers/video/hdmi.c3
165 files changed, 13762 insertions, 3718 deletions
diff --git a/drivers/dma-buf/sync_debug.h b/drivers/dma-buf/sync_debug.h
index d615a89..05e33f9 100644
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@@ -62,8 +62,6 @@ struct sync_pt {
struct rb_node node;
};
-#ifdef CONFIG_SW_SYNC
-
extern const struct file_operations sw_sync_debugfs_fops;
void sync_timeline_debug_add(struct sync_timeline *obj);
@@ -72,12 +70,4 @@ void sync_file_debug_add(struct sync_file *fence);
void sync_file_debug_remove(struct sync_file *fence);
void sync_dump(void);
-#else
-# define sync_timeline_debug_add(obj)
-# define sync_timeline_debug_remove(obj)
-# define sync_file_debug_add(fence)
-# define sync_file_debug_remove(fence)
-# define sync_dump()
-#endif
-
#endif /* _LINUX_SYNC_H */
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 757825a..2a72d2f 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -49,16 +49,17 @@ config DRM_DEBUG_MM
If in doubt, say "N".
-config DRM_DEBUG_MM_SELFTEST
- tristate "kselftests for DRM range manager (struct drm_mm)"
+config DRM_DEBUG_SELFTEST
+ tristate "kselftests for DRM"
depends on DRM
depends on DEBUG_KERNEL
select PRIME_NUMBERS
select DRM_LIB_RANDOM
+ select DRM_KMS_HELPER
default n
help
- This option provides a kernel module that can be used to test
- the DRM range manager (drm_mm) and its API. This option is not
+ This option provides kernel modules that can be used to run
+ various selftests on parts of the DRM api. This option is not
useful for distributions or general kernels, but only for kernel
developers working on DRM and associated drivers.
@@ -267,6 +268,8 @@ source "drivers/gpu/drm/amd/amdkfd/Kconfig"
source "drivers/gpu/drm/imx/Kconfig"
+source "drivers/gpu/drm/v3d/Kconfig"
+
source "drivers/gpu/drm/vc4/Kconfig"
source "drivers/gpu/drm/etnaviv/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 9d66657..ef9f3da 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -43,7 +43,7 @@ drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
-obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += selftests/
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
obj-$(CONFIG_DRM) += drm.o
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
@@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_MGA) += mga/
obj-$(CONFIG_DRM_I810) += i810/
obj-$(CONFIG_DRM_I915) += i915/
obj-$(CONFIG_DRM_MGAG200) += mgag200/
+obj-$(CONFIG_DRM_V3D) += v3d/
obj-$(CONFIG_DRM_VC4) += vc4/
obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/
obj-$(CONFIG_DRM_SIS) += sis/
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 2ca2b51..f300202 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -130,7 +130,8 @@ amdgpu-y += \
amdgpu_amdkfd.o \
amdgpu_amdkfd_fence.o \
amdgpu_amdkfd_gpuvm.o \
- amdgpu_amdkfd_gfx_v8.o
+ amdgpu_amdkfd_gfx_v8.o \
+ amdgpu_amdkfd_gfx_v9.o
# add cgs
amdgpu-y += amdgpu_cgs.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 4d36203..cd0e8f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -92,6 +92,10 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
case CHIP_POLARIS11:
kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
+ break;
default:
dev_dbg(adev->dev, "kfd not supported on this ASIC\n");
return;
@@ -175,6 +179,28 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
+ if (adev->asic_type >= CHIP_VEGA10) {
+ /* On SOC15 the BIF is involved in routing
+ * doorbells using the low 12 bits of the
+ * address. Communicate the assignments to
+ * KFD. KFD uses two doorbell pages per
+ * process in case of 64-bit doorbells so we
+ * can use each doorbell assignment twice.
+ */
+ gpu_resources.sdma_doorbell[0][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0;
+ gpu_resources.sdma_doorbell[0][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200;
+ gpu_resources.sdma_doorbell[1][0] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1;
+ gpu_resources.sdma_doorbell[1][1] =
+ AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200;
+ /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for
+ * SDMA, IH and VCN. So don't use them for the CP.
+ */
+ gpu_resources.reserved_doorbell_mask = 0x1f0;
+ gpu_resources.reserved_doorbell_val = 0x0f0;
+ }
kgd2kfd->device_init(adev->kfd, &gpu_resources);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index c2c2bea..12367a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -28,6 +28,7 @@
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/mmu_context.h>
+#include <linux/workqueue.h>
#include <kgd_kfd_interface.h>
#include <drm/ttm/ttm_execbuf_util.h>
#include "amdgpu_sync.h"
@@ -59,7 +60,9 @@ struct kgd_mem {
uint32_t mapping_flags;
+ atomic_t invalid;
struct amdkfd_process_info *process_info;
+ struct page **user_pages;
struct amdgpu_sync sync;
@@ -84,6 +87,9 @@ struct amdkfd_process_info {
struct list_head vm_list_head;
/* List head for all KFD BOs that belong to a KFD process. */
struct list_head kfd_bo_list;
+ /* List of userptr BOs that are valid or invalid */
+ struct list_head userptr_valid_list;
+ struct list_head userptr_inval_list;
/* Lock to protect kfd_bo_list */
struct mutex lock;
@@ -91,6 +97,11 @@ struct amdkfd_process_info {
unsigned int n_vms;
/* Eviction Fence */
struct amdgpu_amdkfd_fence *eviction_fence;
+
+ /* MMU-notifier related fields */
+ atomic_t evicted_bos;
+ struct delayed_work restore_userptr_work;
+ struct pid *pid;
};
int amdgpu_amdkfd_init(void);
@@ -104,12 +115,14 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev);
void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev);
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea54e53..0ff36d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -98,8 +98,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -183,7 +181,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -309,13 +306,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 89264c9..6ef9762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -57,8 +57,6 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
uint32_t sh_mem_bases);
static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
uint32_t queue_id, uint32_t __user *wptr,
@@ -141,7 +139,6 @@ static const struct kfd2kgd_calls kfd2kgd = {
.free_pasid = amdgpu_pasid_free,
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
- .init_pipeline = kgd_init_pipeline,
.init_interrupts = kgd_init_interrupts,
.hqd_load = kgd_hqd_load,
.hqd_sdma_load = kgd_hqd_sdma_load,
@@ -270,13 +267,6 @@ static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
return 0;
}
-static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr)
-{
- /* amdgpu owns the per-pipe state */
- return 0;
-}
-
static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
new file mode 100644
index 0000000..8f37991
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -0,0 +1,1043 @@
+/*
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "vega10_enum.h"
+#include "sdma0/sdma0_4_0_offset.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+#include "sdma1/sdma1_4_0_offset.h"
+#include "sdma1/sdma1_4_0_sh_mask.h"
+#include "athub/athub_1_0_offset.h"
+#include "athub/athub_1_0_sh_mask.h"
+#include "oss/osssys_4_0_offset.h"
+#include "oss/osssys_4_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v9_structs.h"
+#include "soc15.h"
+#include "soc15d.h"
+
+/* HACK: MMHUB and GC both have VM-related register with the same
+ * names but different offsets. Define the MMHUB register we need here
+ * with a prefix. A proper solution would be to move the functions
+ * programming these registers into gfx_v9_0.c and mmhub_v1_0.c
+ * respectively.
+ */
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3
+#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705
+#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c
+#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0
+
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728
+#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0
+
+#define V9_PIPE_PER_MEC (4)
+#define V9_QUEUES_PER_PIPE_MEC (8)
+
+enum hqd_dequeue_request_type {
+ NO_ACTION = 0,
+ DRAIN_PIPE,
+ RESET_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout);
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+ struct tile_config *config)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ config->gb_addr_config = adev->gfx.config.gb_addr_config;
+
+ config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+ config->num_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ config->macro_tile_config_ptr =
+ adev->gfx.config.macrotile_mode_array;
+ config->num_macro_tile_configs =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+ return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+ .init_gtt_mem_allocation = alloc_gtt_mem,
+ .free_gtt_mem = free_gtt_mem,
+ .get_local_mem_info = get_local_mem_info,
+ .get_gpu_clock_counter = get_gpu_clock_counter,
+ .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+ .alloc_pasid = amdgpu_pasid_alloc,
+ .free_pasid = amdgpu_pasid_free,
+ .program_sh_mem_settings = kgd_program_sh_mem_settings,
+ .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+ .init_interrupts = kgd_init_interrupts,
+ .hqd_load = kgd_hqd_load,
+ .hqd_sdma_load = kgd_hqd_sdma_load,
+ .hqd_dump = kgd_hqd_dump,
+ .hqd_sdma_dump = kgd_hqd_sdma_dump,
+ .hqd_is_occupied = kgd_hqd_is_occupied,
+ .hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+ .hqd_destroy = kgd_hqd_destroy,
+ .hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+ .address_watch_disable = kgd_address_watch_disable,
+ .address_watch_execute = kgd_address_watch_execute,
+ .wave_control_execute = kgd_wave_control_execute,
+ .address_watch_get_offset = kgd_address_watch_get_offset,
+ .get_atc_vmid_pasid_mapping_pasid =
+ get_atc_vmid_pasid_mapping_pasid,
+ .get_atc_vmid_pasid_mapping_valid =
+ get_atc_vmid_pasid_mapping_valid,
+ .get_fw_version = get_fw_version,
+ .set_scratch_backing_va = set_scratch_backing_va,
+ .get_tile_config = amdgpu_amdkfd_get_tile_config,
+ .get_cu_info = get_cu_info,
+ .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
+ .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+ .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
+ .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
+ .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
+ .set_vm_context_page_table_base = set_vm_context_page_table_base,
+ .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu,
+ .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu,
+ .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu,
+ .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu,
+ .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory,
+ .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel,
+ .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos,
+ .invalidate_tlbs = invalidate_tlbs,
+ .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+ .submit_ib = amdgpu_amdkfd_submit_ib,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
+{
+ return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+ return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+ uint32_t queue, uint32_t vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ mutex_lock(&adev->srbm_mutex);
+ soc15_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ soc15_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+ queue_id) & 31;
+
+ return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+ unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t sh_mem_config,
+ uint32_t sh_mem_ape1_base,
+ uint32_t sh_mem_ape1_limit,
+ uint32_t sh_mem_bases)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ lock_srbm(kgd, 0, 0, 0, vmid);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+ /* APE1 no longer exists on GFX9 */
+
+ unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+ unsigned int vmid)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ /*
+ * We have to assume that there is no outstanding mapping.
+ * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+ * a mapping is in progress or because a mapping finished
+ * and the SW cleared it.
+ * So the protocol is to always wait & clear.
+ */
+ uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+ ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+ /*
+ * need to do this twice, once for gfx and once for mmhub
+ * for ATC add 16 to VMID for mmhub, for IH different registers.
+ * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+ */
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << vmid)))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << vmid);
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+ pasid_mapping);
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID16_PASID_MAPPING) + vmid,
+ pasid_mapping);
+
+ while (!(RREG32(SOC15_REG_OFFSET(
+ ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+ (1U << (vmid + 16))))
+ cpu_relax();
+
+ WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+ mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+ 1U << (vmid + 16));
+
+ /* Mapping vmid to pasid also for IH block */
+ WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT_MM) + vmid,
+ pasid_mapping);
+ return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t mec;
+ uint32_t pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ lock_srbm(kgd, mec, pipe, 0, 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+ CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+ CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+ unlock_srbm(kgd);
+
+ return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+ unsigned int engine_id,
+ unsigned int queue_id)
+{
+ uint32_t base[2] = {
+ SOC15_REG_OFFSET(SDMA0, 0,
+ mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+ SOC15_REG_OFFSET(SDMA1, 0,
+ mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL
+ };
+ uint32_t retval;
+
+ retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+ mmSDMA0_RLC0_RB_CNTL);
+
+ pr_debug("sdma base address: 0x%x\n", retval);
+
+ return retval;
+}
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+ uint32_t queue_id, uint32_t __user *wptr,
+ uint32_t wptr_shift, uint32_t wptr_mask,
+ struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_mqd *m;
+ uint32_t *mqd_hqd;
+ uint32_t reg, hqd_base, data;
+
+ m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ /* HIQ is set during driver init period with vmid set to 0*/
+ if (m->cp_hqd_vmid == 0) {
+ uint32_t value, mec, pipe;
+
+ mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+ pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+ pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+ mec, pipe, queue_id);
+ value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+ value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+ ((mec << 5) | (pipe << 3) | queue_id | 0x80));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+ }
+
+ /* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+ mqd_hqd = &m->cp_mqd_base_addr_lo;
+ hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+ for (reg = hqd_base;
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+ /* Activate doorbell logic before triggering WPTR poll. */
+ data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+ CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+ if (wptr) {
+ /* Don't read wptr with get_user because the user
+ * context may not be accessible (if this function
+ * runs in a work queue). Instead trigger a one-shot
+ * polling read from memory in the CP. This assumes
+ * that wptr is GPU-accessible in the queue's VMID via
+ * ATC or SVM. WPTR==RPTR before starting the poll so
+ * the CP starts fetching new commands from the right
+ * place.
+ *
+ * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+ * tricky. Assume that the queue didn't overflow. The
+ * number of valid bits in the 32-bit RPTR depends on
+ * the queue size. The remaining bits are taken from
+ * the saved 64-bit WPTR. If the WPTR wrapped, add the
+ * queue size.
+ */
+ uint32_t queue_size =
+ 2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+ CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+ uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+ if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+ guessed_wptr += queue_size;
+ guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+ guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+ lower_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+ upper_32_bits(guessed_wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+ lower_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+ upper_32_bits((uint64_t)wptr));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+ get_queue_mask(adev, pipe_id, queue_id));
+ }
+
+ /* Start the EOP fetcher */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+ REG_SET_FIELD(m->cp_hqd_eop_rptr,
+ CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+ data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+ release_queue(kgd);
+
+ return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+ uint32_t pipe_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do { \
+ if (WARN_ON_ONCE(i >= HQD_N_REGS)) \
+ break; \
+ (*dump)[i][0] = (addr) << 2; \
+ (*dump)[i++][1] = RREG32(addr); \
+ } while (0)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+ reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+ DUMP_REG(reg);
+
+ release_queue(kgd);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+ uint32_t __user *wptr, struct mm_struct *mm)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+ unsigned long end_jiffies;
+ uint32_t data;
+ uint64_t data64;
+ uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+ sdmax_gfx_context_cntl = m->sdma_engine_id ?
+ SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+ SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+ end_jiffies = msecs_to_jiffies(2000) + jiffies;
+ while (true) {
+ data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+ data = RREG32(sdmax_gfx_context_cntl);
+ data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+ RESUME_CTX, 0);
+ WREG32(sdmax_gfx_context_cntl, data);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+ m->sdmax_rlcx_doorbell_offset);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+ ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+ if (read_user_wptr(mm, wptr64, data64)) {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ lower_32_bits(data64));
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ upper_32_bits(data64));
+ } else {
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+ m->sdmax_rlcx_rb_rptr);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+ m->sdmax_rlcx_rb_rptr_hi);
+ }
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+ m->sdmax_rlcx_rb_base_hi);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+ m->sdmax_rlcx_rb_rptr_addr_lo);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+ m->sdmax_rlcx_rb_rptr_addr_hi);
+
+ data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+ RB_ENABLE, 1);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+ return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+ uint32_t engine_id, uint32_t queue_id,
+ uint32_t (**dump)[2], uint32_t *n_regs)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+ uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+ *dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+ if (*dump == NULL)
+ return -ENOMEM;
+
+ for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+ reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+ for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+ reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+ DUMP_REG(sdma_base_addr + reg);
+
+ WARN_ON_ONCE(i != HQD_N_REGS);
+ *n_regs = i;
+
+ return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+ uint32_t pipe_id, uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t act;
+ bool retval = false;
+ uint32_t low, high;
+
+ acquire_queue(kgd, pipe_id, queue_id);
+ act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (act) {
+ low = lower_32_bits(queue_address >> 8);
+ high = upper_32_bits(queue_address >> 8);
+
+ if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+ high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+ retval = true;
+ }
+ release_queue(kgd);
+ return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t sdma_rlc_rb_cntl;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+ if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+ return true;
+
+ return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+ enum kfd_preempt_type reset_type,
+ unsigned int utimeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ enum hqd_dequeue_request_type type;
+ unsigned long end_jiffies;
+ uint32_t temp;
+ struct v9_mqd *m = get_mqd(mqd);
+
+ acquire_queue(kgd, pipe_id, queue_id);
+
+ if (m->cp_hqd_vmid == 0)
+ WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+ switch (reset_type) {
+ case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+ type = DRAIN_PIPE;
+ break;
+ case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+ type = RESET_WAVES;
+ break;
+ default:
+ type = DRAIN_PIPE;
+ break;
+ }
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+ end_jiffies = (utimeout * HZ / 1000) + jiffies;
+ while (true) {
+ temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+ if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+ break;
+ if (time_after(jiffies, end_jiffies)) {
+ pr_err("cp queue preemption time out.\n");
+ release_queue(kgd);
+ return -ETIME;
+ }
+ usleep_range(500, 1000);
+ }
+
+ release_queue(kgd);
+ return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+ unsigned int utimeout)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ struct v9_sdma_mqd *m;
+ uint32_t sdma_base_addr;
+ uint32_t temp;
+ unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+ m = get_sdma_mqd(mqd);
+ sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+ m->sdma_queue_id);
+
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+ temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+ while (true) {
+ temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+ if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+ break;
+ if (time_after(jiffies, end_jiffies))
+ return -ETIME;
+ usleep_range(500, 1000);
+ }
+
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+ WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+ SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+ m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+ m->sdmax_rlcx_rb_rptr_hi =
+ RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+ return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+ uint8_t vmid)
+{
+ uint32_t reg;
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+ + vmid);
+ return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ uint32_t req = (1 << vmid) |
+ (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK |
+ VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK;
+
+ mutex_lock(&adev->srbm_mutex);
+
+ /* Use legacy mode tlb invalidation.
+ *
+ * Currently on Raven the code below is broken for anything but
+ * legacy mode due to a MMHUB power gating problem. A workaround
+ * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+ * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+ * bit.
+ *
+ * TODO 1: agree on the right set of invalidation registers for
+ * KFD use. Use the last one for now. Invalidate both GC and
+ * MMHUB.
+ *
+ * TODO 2: support range-based invalidation, requires kfg2kgd
+ * interface change
+ */
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32),
+ 0xffffffff);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32),
+ 0x0000001f);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ),
+ req);
+
+ while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0,
+ mmMMHUB_VM_INVALIDATE_ENG16_ACK)) &
+ (1 << vmid)))
+ cpu_relax();
+
+ mutex_unlock(&adev->srbm_mutex);
+
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+ signed long r;
+ uint32_t seq;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ spin_lock(&adev->gfx.kiq.ring_lock);
+ amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+ amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+ amdgpu_ring_write(ring,
+ PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+ PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
+ PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+ PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+ amdgpu_fence_emit_polling(ring, &seq);
+ amdgpu_ring_commit(ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+ if (r < 1) {
+ DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+ return -ETIME;
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int vmid;
+ struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+ if (ring->ready)
+ return invalidate_tlbs_with_kiq(adev, pasid);
+
+ for (vmid = 0; vmid < 16; vmid++) {
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+ continue;
+ if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+ if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+ == pasid) {
+ write_vmid_invalidate_request(kgd, vmid);
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("non kfd vmid %d\n", vmid);
+ return 0;
+ }
+
+ write_vmid_invalidate_request(kgd, vmid);
+ return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+ return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ uint32_t cntl_val,
+ uint32_t addr_hi,
+ uint32_t addr_lo)
+{
+ return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+ uint32_t gfx_index_val,
+ uint32_t sq_cmd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint32_t data = 0;
+
+ mutex_lock(&adev->grbm_idx_mutex);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ INSTANCE_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SH_BROADCAST_WRITES, 1);
+ data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+ SE_BROADCAST_WRITES, 1);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+ mutex_unlock(&adev->grbm_idx_mutex);
+
+ return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+ unsigned int watch_point_id,
+ unsigned int reg_offset)
+{
+ return 0;
+}
+
+static void set_scratch_backing_va(struct kgd_dev *kgd,
+ uint64_t va, uint32_t vmid)
+{
+ /* No longer needed on GFXv9. The scratch base address is
+ * passed to the shader by the CP. It's the user mode driver's
+ * responsibility.
+ */
+}
+
+/* FIXME: Does this need to be ASIC-specific code? */
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ const union amdgpu_firmware_header *hdr;
+
+ switch (type) {
+ case KGD_ENGINE_PFP:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.pfp_fw->data;
+ break;
+
+ case KGD_ENGINE_ME:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.me_fw->data;
+ break;
+
+ case KGD_ENGINE_CE:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.ce_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC1:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec_fw->data;
+ break;
+
+ case KGD_ENGINE_MEC2:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.mec2_fw->data;
+ break;
+
+ case KGD_ENGINE_RLC:
+ hdr = (const union amdgpu_firmware_header *)adev->gfx.rlc_fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA1:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[0].fw->data;
+ break;
+
+ case KGD_ENGINE_SDMA2:
+ hdr = (const union amdgpu_firmware_header *)adev->sdma.instance[1].fw->data;
+ break;
+
+ default:
+ return 0;
+ }
+
+ if (hdr == NULL)
+ return 0;
+
+ /* Only 12 bit in use*/
+ return hdr->common.ucode_version;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+ uint32_t page_table_base)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+ uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT |
+ AMDGPU_PTE_VALID;
+
+ if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+ pr_err("trying to set page table base for wrong VMID %u\n",
+ vmid);
+ return;
+ }
+
+ /* TODO: take advantage of per-process address space size. For
+ * now, all processes share the same address space size, like
+ * on GFX8 and older.
+ */
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+ lower_32_bits(adev->vm_manager.max_pfn - 1));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+ upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+ WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 1d6e147..5296e24 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -23,6 +23,7 @@
#define pr_fmt(fmt) "kfd2kgd: " fmt
#include <linux/list.h>
+#include <linux/sched/mm.h>
#include <drm/drmP.h>
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -33,10 +34,20 @@
*/
#define VI_BO_SIZE_ALIGN (0x8000)
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
+
+/* Userptr restore delay, just long enough to allow consecutive VM
+ * changes to accumulate
+ */
+#define AMDGPU_USERPTR_RESTORE_DELAY_MS 1
+
/* Impose limit on how much memory KFD can use */
static struct {
uint64_t max_system_mem_limit;
+ uint64_t max_userptr_mem_limit;
int64_t system_mem_used;
+ int64_t userptr_mem_used;
spinlock_t mem_limit_lock;
} kfd_mem_limit;
@@ -57,6 +68,7 @@ static const char * const domain_bit_to_string[] = {
#define domain_string(domain) domain_bit_to_string[ffs(domain)-1]
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work);
static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
@@ -78,6 +90,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
/* Set memory usage limits. Current, limits are
* System (kernel) memory - 3/8th System RAM
+ * Userptr memory - 3/4th System RAM
*/
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -90,8 +103,10 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
spin_lock_init(&kfd_mem_limit.mem_limit_lock);
kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
- pr_debug("Kernel memory limit %lluM\n",
- (kfd_mem_limit.max_system_mem_limit >> 20));
+ kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
+ pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
+ (kfd_mem_limit.max_system_mem_limit >> 20),
+ (kfd_mem_limit.max_userptr_mem_limit >> 20));
}
static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
@@ -111,6 +126,16 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
goto err_no_mem;
}
kfd_mem_limit.system_mem_used += (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ if ((kfd_mem_limit.system_mem_used + acc_size >
+ kfd_mem_limit.max_system_mem_limit) ||
+ (kfd_mem_limit.userptr_mem_used + (size + acc_size) >
+ kfd_mem_limit.max_userptr_mem_limit)) {
+ ret = -ENOMEM;
+ goto err_no_mem;
+ }
+ kfd_mem_limit.system_mem_used += acc_size;
+ kfd_mem_limit.userptr_mem_used += size;
}
err_no_mem:
spin_unlock(&kfd_mem_limit.mem_limit_lock);
@@ -126,10 +151,16 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
sizeof(struct amdgpu_bo));
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (domain == AMDGPU_GEM_DOMAIN_GTT)
+ if (domain == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
+ } else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
+ kfd_mem_limit.system_mem_used -= acc_size;
+ kfd_mem_limit.userptr_mem_used -= size;
+ }
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -138,12 +169,17 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
{
spin_lock(&kfd_mem_limit.mem_limit_lock);
- if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
+ if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
+ kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
+ } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
kfd_mem_limit.system_mem_used -=
(bo->tbo.acc_size + amdgpu_bo_size(bo));
}
WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
"kfd system memory accounting unbalanced");
+ WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
+ "kfd userptr memory accounting unbalanced");
spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -506,7 +542,8 @@ static void remove_bo_from_vm(struct amdgpu_device *adev,
}
static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
- struct amdkfd_process_info *process_info)
+ struct amdkfd_process_info *process_info,
+ bool userptr)
{
struct ttm_validate_buffer *entry = &mem->validate_list;
struct amdgpu_bo *bo = mem->bo;
@@ -515,10 +552,95 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
entry->shared = true;
entry->bo = &bo->tbo;
mutex_lock(&process_info->lock);
- list_add_tail(&entry->head, &process_info->kfd_bo_list);
+ if (userptr)
+ list_add_tail(&entry->head, &process_info->userptr_valid_list);
+ else
+ list_add_tail(&entry->head, &process_info->kfd_bo_list);
mutex_unlock(&process_info->lock);
}
+/* Initializes user pages. It registers the MMU notifier and validates
+ * the userptr BO in the GTT domain.
+ *
+ * The BO must already be on the userptr_valid_list. Otherwise an
+ * eviction and restore may happen that leaves the new BO unmapped
+ * with the user mode queues running.
+ *
+ * Takes the process_info->lock to protect against concurrent restore
+ * workers.
+ *
+ * Returns 0 for success, negative errno for errors.
+ */
+static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
+ uint64_t user_addr)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ struct amdgpu_bo *bo = mem->bo;
+ struct ttm_operation_ctx ctx = { true, false };
+ int ret = 0;
+
+ mutex_lock(&process_info->lock);
+
+ ret = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, user_addr, 0);
+ if (ret) {
+ pr_err("%s: Failed to set userptr: %d\n", __func__, ret);
+ goto out;
+ }
+
+ ret = amdgpu_mn_register(bo, user_addr);
+ if (ret) {
+ pr_err("%s: Failed to register MMU notifier: %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ /* If no restore worker is running concurrently, user_pages
+ * should not be allocated
+ */
+ WARN(mem->user_pages, "Leaking user_pages array");
+
+ mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n", __func__);
+ ret = -ENOMEM;
+ goto unregister_out;
+ }
+
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+ if (ret) {
+ pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
+ goto free_out;
+ }
+
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
+
+ ret = amdgpu_bo_reserve(bo, true);
+ if (ret) {
+ pr_err("%s: Failed to reserve BO\n", __func__);
+ goto release_out;
+ }
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret)
+ pr_err("%s: failed to validate BO\n", __func__);
+ amdgpu_bo_unreserve(bo);
+
+release_out:
+ if (ret)
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+free_out:
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+unregister_out:
+ if (ret)
+ amdgpu_mn_unregister(bo);
+out:
+ mutex_unlock(&process_info->lock);
+ return ret;
+}
+
/* Reserving a BO and its page table BOs must happen atomically to
* avoid deadlocks. Some operations update multiple VMs at once. Track
* all the reservation info in a context structure. Optionally a sync
@@ -748,7 +870,8 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
}
static int map_bo_to_gpuvm(struct amdgpu_device *adev,
- struct kfd_bo_va_list *entry, struct amdgpu_sync *sync)
+ struct kfd_bo_va_list *entry, struct amdgpu_sync *sync,
+ bool no_update_pte)
{
int ret;
@@ -762,6 +885,9 @@ static int map_bo_to_gpuvm(struct amdgpu_device *adev,
return ret;
}
+ if (no_update_pte)
+ return 0;
+
ret = update_gpuvm_pte(adev, entry, sync);
if (ret) {
pr_err("update_gpuvm_pte() failed\n");
@@ -820,6 +946,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
mutex_init(&info->lock);
INIT_LIST_HEAD(&info->vm_list_head);
INIT_LIST_HEAD(&info->kfd_bo_list);
+ INIT_LIST_HEAD(&info->userptr_valid_list);
+ INIT_LIST_HEAD(&info->userptr_inval_list);
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
@@ -830,6 +958,11 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
goto create_evict_fence_fail;
}
+ info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+ atomic_set(&info->evicted_bos, 0);
+ INIT_DELAYED_WORK(&info->restore_userptr_work,
+ amdgpu_amdkfd_restore_userptr_worker);
+
*process_info = info;
*ef = dma_fence_get(&info->eviction_fence->base);
}
@@ -872,6 +1005,7 @@ reserve_pd_fail:
dma_fence_put(*ef);
*ef = NULL;
*process_info = NULL;
+ put_pid(info->pid);
create_evict_fence_fail:
mutex_destroy(&info->lock);
kfree(info);
@@ -967,8 +1101,12 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
/* Release per-process resources when last compute VM is destroyed */
if (!process_info->n_vms) {
WARN_ON(!list_empty(&process_info->kfd_bo_list));
+ WARN_ON(!list_empty(&process_info->userptr_valid_list));
+ WARN_ON(!list_empty(&process_info->userptr_inval_list));
dma_fence_put(&process_info->eviction_fence->base);
+ cancel_delayed_work_sync(&process_info->restore_userptr_work);
+ put_pid(process_info->pid);
mutex_destroy(&process_info->lock);
kfree(process_info);
}
@@ -1003,9 +1141,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ uint64_t user_addr = 0;
struct amdgpu_bo *bo;
int byte_align;
- u32 alloc_domain;
+ u32 domain, alloc_domain;
u64 alloc_flags;
uint32_t mapping_flags;
int ret;
@@ -1014,14 +1153,21 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
* Check on which domain to allocate BO
*/
if (flags & ALLOC_MEM_FLAGS_VRAM) {
- alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_CLEARED;
alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
} else if (flags & ALLOC_MEM_FLAGS_GTT) {
- alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_flags = 0;
+ } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+ domain = AMDGPU_GEM_DOMAIN_GTT;
+ alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
+ if (!offset || !*offset)
+ return -EINVAL;
+ user_addr = *offset;
} else {
return -EINVAL;
}
@@ -1078,18 +1224,34 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
}
bo->kfd_bo = *mem;
(*mem)->bo = bo;
+ if (user_addr)
+ bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
(*mem)->va = va;
- (*mem)->domain = alloc_domain;
+ (*mem)->domain = domain;
(*mem)->mapped_to_gpu_memory = 0;
(*mem)->process_info = avm->process_info;
- add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info);
+ add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr);
+
+ if (user_addr) {
+ ret = init_user_pages(*mem, current->mm, user_addr);
+ if (ret) {
+ mutex_lock(&avm->process_info->lock);
+ list_del(&(*mem)->validate_list.head);
+ mutex_unlock(&avm->process_info->lock);
+ goto allocate_init_user_pages_failed;
+ }
+ }
if (offset)
*offset = amdgpu_bo_mmap_offset(bo);
return 0;
+allocate_init_user_pages_failed:
+ amdgpu_bo_unref(&bo);
+ /* Don't unreserve system mem limit twice */
+ goto err_reserve_system_mem;
err_bo_create:
unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
@@ -1122,12 +1284,24 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
* be freed anyway
*/
+ /* No more MMU notifiers */
+ amdgpu_mn_unregister(mem->bo);
+
/* Make sure restore workers don't access the BO any more */
bo_list_entry = &mem->validate_list;
mutex_lock(&process_info->lock);
list_del(&bo_list_entry->head);
mutex_unlock(&process_info->lock);
+ /* Free user pages if necessary */
+ if (mem->user_pages) {
+ pr_debug("%s: Freeing user_pages array\n", __func__);
+ if (mem->user_pages[0])
+ release_pages(mem->user_pages,
+ mem->bo->tbo.ttm->num_pages);
+ kvfree(mem->user_pages);
+ }
+
ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
if (unlikely(ret))
return ret;
@@ -1173,21 +1347,32 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
struct kfd_bo_va_list *bo_va_entry = NULL;
struct kfd_bo_va_list *bo_va_entry_aql = NULL;
unsigned long bo_size;
-
- /* Make sure restore is not running concurrently.
- */
- mutex_lock(&mem->process_info->lock);
-
- mutex_lock(&mem->lock);
+ bool is_invalid_userptr = false;
bo = mem->bo;
-
if (!bo) {
pr_err("Invalid BO when mapping memory to GPU\n");
- ret = -EINVAL;
- goto out;
+ return -EINVAL;
}
+ /* Make sure restore is not running concurrently. Since we
+ * don't map invalid userptr BOs, we rely on the next restore
+ * worker to do the mapping
+ */
+ mutex_lock(&mem->process_info->lock);
+
+ /* Lock mmap-sem. If we find an invalid userptr BO, we can be
+ * sure that the MMU notifier is no longer running
+ * concurrently and the queues are actually stopped
+ */
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
+ down_write(&current->mm->mmap_sem);
+ is_invalid_userptr = atomic_read(&mem->invalid);
+ up_write(&current->mm->mmap_sem);
+ }
+
+ mutex_lock(&mem->lock);
+
domain = mem->domain;
bo_size = bo->tbo.mem.size;
@@ -1200,6 +1385,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
if (unlikely(ret))
goto out;
+ /* Userptr can be marked as "not invalid", but not actually be
+ * validated yet (still in the system domain). In that case
+ * the queues are still stopped and we can leave mapping for
+ * the next restore worker
+ */
+ if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
+ is_invalid_userptr = true;
+
if (check_if_add_bo_to_vm(avm, mem)) {
ret = add_bo_to_vm(adev, mem, avm, false,
&bo_va_entry);
@@ -1217,7 +1410,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto add_bo_to_vm_failed;
}
- if (mem->mapped_to_gpu_memory == 0) {
+ if (mem->mapped_to_gpu_memory == 0 &&
+ !amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
/* Validate BO only once. The eviction fence gets added to BO
* the first time it is mapped. Validate will wait for all
* background evictions to complete.
@@ -1235,7 +1429,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
entry->va, entry->va + bo_size,
entry);
- ret = map_bo_to_gpuvm(adev, entry, ctx.sync);
+ ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
+ is_invalid_userptr);
if (ret) {
pr_err("Failed to map radeon bo to gpuvm\n");
goto map_bo_to_gpuvm_failed;
@@ -1418,6 +1613,337 @@ bo_reserve_failed:
return ret;
}
+/* Evict a userptr BO by stopping the queues if necessary
+ *
+ * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
+ * cannot do any memory allocations, and cannot take any locks that
+ * are held elsewhere while allocating memory. Therefore this is as
+ * simple as possible, using atomic counters.
+ *
+ * It doesn't do anything to the BO itself. The real work happens in
+ * restore, where we get updated page addresses. This function only
+ * ensures that GPU access to the BO is stopped.
+ */
+int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
+ struct mm_struct *mm)
+{
+ struct amdkfd_process_info *process_info = mem->process_info;
+ int invalid, evicted_bos;
+ int r = 0;
+
+ invalid = atomic_inc_return(&mem->invalid);
+ evicted_bos = atomic_inc_return(&process_info->evicted_bos);
+ if (evicted_bos == 1) {
+ /* First eviction, stop the queues */
+ r = kgd2kfd->quiesce_mm(mm);
+ if (r)
+ pr_err("Failed to quiesce KFD\n");
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+ }
+
+ return r;
+}
+
+/* Update invalid userptr BOs
+ *
+ * Moves invalidated (evicted) userptr BOs from userptr_valid_list to
+ * userptr_inval_list and updates user pages for all BOs that have
+ * been invalidated since their last update.
+ */
+static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
+ struct mm_struct *mm)
+{
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int invalid, ret;
+
+ /* Move all invalidated BOs to the userptr_inval_list and
+ * release their user pages by migration to the CPU domain
+ */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_valid_list,
+ validate_list.head) {
+ if (!atomic_read(&mem->invalid))
+ continue; /* BO is still valid */
+
+ bo = mem->bo;
+
+ if (amdgpu_bo_reserve(bo, true))
+ return -EAGAIN;
+ amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ amdgpu_bo_unreserve(bo);
+ if (ret) {
+ pr_err("%s: Failed to invalidate userptr BO\n",
+ __func__);
+ return -EAGAIN;
+ }
+
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_inval_list);
+ }
+
+ if (list_empty(&process_info->userptr_inval_list))
+ return 0; /* All evicted userptr BOs were freed */
+
+ /* Go through userptr_inval_list and update any invalid user_pages */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ invalid = atomic_read(&mem->invalid);
+ if (!invalid)
+ /* BO hasn't been invalidated since the last
+ * revalidation attempt. Keep its BO list.
+ */
+ continue;
+
+ bo = mem->bo;
+
+ if (!mem->user_pages) {
+ mem->user_pages =
+ kvmalloc_array(bo->tbo.ttm->num_pages,
+ sizeof(struct page *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!mem->user_pages) {
+ pr_err("%s: Failed to allocate pages array\n",
+ __func__);
+ return -ENOMEM;
+ }
+ } else if (mem->user_pages[0]) {
+ release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+ }
+
+ /* Get updated user pages */
+ ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ if (ret) {
+ mem->user_pages[0] = NULL;
+ pr_info("%s: Failed to get user pages: %d\n",
+ __func__, ret);
+ /* Pretend it succeeded. It will fail later
+ * with a VM fault if the GPU tries to access
+ * it. Better than hanging indefinitely with
+ * stalled user mode queues.
+ */
+ }
+
+ /* Mark the BO as valid unless it was invalidated
+ * again concurrently
+ */
+ if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+/* Validate invalid userptr BOs
+ *
+ * Validates BOs on the userptr_inval_list, and moves them back to the
+ * userptr_valid_list. Also updates GPUVM page tables with new page
+ * addresses and waits for the page table updates to complete.
+ */
+static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
+{
+ struct amdgpu_bo_list_entry *pd_bo_list_entries;
+ struct list_head resv_list, duplicates;
+ struct ww_acquire_ctx ticket;
+ struct amdgpu_sync sync;
+
+ struct amdgpu_vm *peer_vm;
+ struct kgd_mem *mem, *tmp_mem;
+ struct amdgpu_bo *bo;
+ struct ttm_operation_ctx ctx = { false, false };
+ int i, ret;
+
+ pd_bo_list_entries = kcalloc(process_info->n_vms,
+ sizeof(struct amdgpu_bo_list_entry),
+ GFP_KERNEL);
+ if (!pd_bo_list_entries) {
+ pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
+ return -ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&resv_list);
+ INIT_LIST_HEAD(&duplicates);
+
+ /* Get all the page directory BOs that need to be reserved */
+ i = 0;
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_vm_get_pd_bo(peer_vm, &resv_list,
+ &pd_bo_list_entries[i++]);
+ /* Add the userptr_inval_list entries to resv_list */
+ list_for_each_entry(mem, &process_info->userptr_inval_list,
+ validate_list.head) {
+ list_add_tail(&mem->resv_list.head, &resv_list);
+ mem->resv_list.bo = mem->validate_list.bo;
+ mem->resv_list.shared = mem->validate_list.shared;
+ }
+
+ /* Reserve all BOs and page tables for validation */
+ ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+ WARN(!list_empty(&duplicates), "Duplicates should be empty");
+ if (ret)
+ goto out;
+
+ amdgpu_sync_create(&sync);
+
+ /* Avoid triggering eviction fences when unmapping invalid
+ * userptr BOs (waits for all fences, doesn't use
+ * FENCE_OWNER_VM)
+ */
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
+ process_info->eviction_fence,
+ NULL, NULL);
+
+ ret = process_validate_vms(process_info);
+ if (ret)
+ goto unreserve_out;
+
+ /* Validate BOs and update GPUVM page tables */
+ list_for_each_entry_safe(mem, tmp_mem,
+ &process_info->userptr_inval_list,
+ validate_list.head) {
+ struct kfd_bo_va_list *bo_va_entry;
+
+ bo = mem->bo;
+
+ /* Copy pages array and validate the BO if we got user pages */
+ if (mem->user_pages[0]) {
+ amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
+ mem->user_pages);
+ amdgpu_ttm_placement_from_domain(bo, mem->domain);
+ ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+ if (ret) {
+ pr_err("%s: failed to validate BO\n", __func__);
+ goto unreserve_out;
+ }
+ }
+
+ /* Validate succeeded, now the BO owns the pages, free
+ * our copy of the pointer array. Put this BO back on
+ * the userptr_valid_list. If we need to revalidate
+ * it, we need to start from scratch.
+ */
+ kvfree(mem->user_pages);
+ mem->user_pages = NULL;
+ list_move_tail(&mem->validate_list.head,
+ &process_info->userptr_valid_list);
+
+ /* Update mapping. If the BO was not validated
+ * (because we couldn't get user pages), this will
+ * clear the page table entries, which will result in
+ * VM faults if the GPU tries to access the invalid
+ * memory.
+ */
+ list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) {
+ if (!bo_va_entry->is_mapped)
+ continue;
+
+ ret = update_gpuvm_pte((struct amdgpu_device *)
+ bo_va_entry->kgd_dev,
+ bo_va_entry, &sync);
+ if (ret) {
+ pr_err("%s: update PTE failed\n", __func__);
+ /* make sure this gets validated again */
+ atomic_inc(&mem->invalid);
+ goto unreserve_out;
+ }
+ }
+ }
+
+ /* Update page directories */
+ ret = process_update_pds(process_info, &sync);
+
+unreserve_out:
+ list_for_each_entry(peer_vm, &process_info->vm_list_head,
+ vm_list_node)
+ amdgpu_bo_fence(peer_vm->root.base.bo,
+ &process_info->eviction_fence->base, true);
+ ttm_eu_backoff_reservation(&ticket, &resv_list);
+ amdgpu_sync_wait(&sync, false);
+ amdgpu_sync_free(&sync);
+out:
+ kfree(pd_bo_list_entries);
+
+ return ret;
+}
+
+/* Worker callback to restore evicted userptr BOs
+ *
+ * Tries to update and validate all userptr BOs. If successful and no
+ * concurrent evictions happened, the queues are restarted. Otherwise,
+ * reschedule for another attempt later.
+ */
+static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
+{
+ struct delayed_work *dwork = to_delayed_work(work);
+ struct amdkfd_process_info *process_info =
+ container_of(dwork, struct amdkfd_process_info,
+ restore_userptr_work);
+ struct task_struct *usertask;
+ struct mm_struct *mm;
+ int evicted_bos;
+
+ evicted_bos = atomic_read(&process_info->evicted_bos);
+ if (!evicted_bos)
+ return;
+
+ /* Reference task and mm in case of concurrent process termination */
+ usertask = get_pid_task(process_info->pid, PIDTYPE_PID);
+ if (!usertask)
+ return;
+ mm = get_task_mm(usertask);
+ if (!mm) {
+ put_task_struct(usertask);
+ return;
+ }
+
+ mutex_lock(&process_info->lock);
+
+ if (update_invalid_user_pages(process_info, mm))
+ goto unlock_out;
+ /* userptr_inval_list can be empty if all evicted userptr BOs
+ * have been freed. In that case there is nothing to validate
+ * and we can just restart the queues.
+ */
+ if (!list_empty(&process_info->userptr_inval_list)) {
+ if (atomic_read(&process_info->evicted_bos) != evicted_bos)
+ goto unlock_out; /* Concurrent eviction, try again */
+
+ if (validate_invalid_user_pages(process_info))
+ goto unlock_out;
+ }
+ /* Final check for concurrent evicton and atomic update. If
+ * another eviction happens after successful update, it will
+ * be a first eviction that calls quiesce_mm. The eviction
+ * reference counting inside KFD will handle this case.
+ */
+ if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
+ evicted_bos)
+ goto unlock_out;
+ evicted_bos = 0;
+ if (kgd2kfd->resume_mm(mm)) {
+ pr_err("%s: Failed to resume KFD\n", __func__);
+ /* No recovery from this failure. Probably the CP is
+ * hanging. No point trying again.
+ */
+ }
+unlock_out:
+ mutex_unlock(&process_info->lock);
+ mmput(mm);
+ put_task_struct(usertask);
+
+ /* If validation failed, reschedule another attempt */
+ if (evicted_bos)
+ schedule_delayed_work(&process_info->restore_userptr_work,
+ msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
+}
+
/** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given
* KFD process identified by process_info
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc34b50..8e66f37 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -536,7 +536,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (p->bo_list) {
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
if (p->bo_list->first_userptr != p->bo_list->num_entries)
- p->mn = amdgpu_mn_get(p->adev);
+ p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
}
INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index bd67f4c..83e344f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -36,12 +36,14 @@
#include <drm/drm.h>
#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
struct amdgpu_mn {
/* constant after initialisation */
struct amdgpu_device *adev;
struct mm_struct *mm;
struct mmu_notifier mn;
+ enum amdgpu_mn_type type;
/* only used on destruction */
struct work_struct work;
@@ -185,7 +187,7 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
}
/**
- * amdgpu_mn_invalidate_range_start - callback to notify about mm change
+ * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
*
* @mn: our notifier
* @mn: the mm this callback is about
@@ -195,10 +197,10 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
* We block for all BOs between start and end to be idle and
* unmap them by move them into system domain again.
*/
-static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start,
- unsigned long end)
+static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
{
struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
struct interval_tree_node *it;
@@ -220,6 +222,49 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
}
/**
+ * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
+ *
+ * @mn: our notifier
+ * @mn: the mm this callback is about
+ * @start: start of updated range
+ * @end: end of updated range
+ *
+ * We temporarily evict all BOs between start and end. This
+ * necessitates evicting all user-mode queues of the process. The BOs
+ * are restorted in amdgpu_mn_invalidate_range_end_hsa.
+ */
+static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+ struct mm_struct *mm,
+ unsigned long start,
+ unsigned long end)
+{
+ struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+ struct interval_tree_node *it;
+
+ /* notification is exclusive, but interval is inclusive */
+ end -= 1;
+
+ amdgpu_mn_read_lock(rmn);
+
+ it = interval_tree_iter_first(&rmn->objects, start, end);
+ while (it) {
+ struct amdgpu_mn_node *node;
+ struct amdgpu_bo *bo;
+
+ node = container_of(it, struct amdgpu_mn_node, it);
+ it = interval_tree_iter_next(it, start, end);
+
+ list_for_each_entry(bo, &node->bos, mn_list) {
+ struct kgd_mem *mem = bo->kfd_bo;
+
+ if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
+ start, end))
+ amdgpu_amdkfd_evict_userptr(mem, mm);
+ }
+ }
+}
+
+/**
* amdgpu_mn_invalidate_range_end - callback to notify about mm change
*
* @mn: our notifier
@@ -239,23 +284,39 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
amdgpu_mn_read_unlock(rmn);
}
-static const struct mmu_notifier_ops amdgpu_mn_ops = {
- .release = amdgpu_mn_release,
- .invalidate_range_start = amdgpu_mn_invalidate_range_start,
- .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
+ [AMDGPU_MN_TYPE_GFX] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_gfx,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
+ [AMDGPU_MN_TYPE_HSA] = {
+ .release = amdgpu_mn_release,
+ .invalidate_range_start = amdgpu_mn_invalidate_range_start_hsa,
+ .invalidate_range_end = amdgpu_mn_invalidate_range_end,
+ },
};
+/* Low bits of any reasonable mm pointer will be unused due to struct
+ * alignment. Use these bits to make a unique key from the mm pointer
+ * and notifier type.
+ */
+#define AMDGPU_MN_KEY(mm, type) ((unsigned long)(mm) + (type))
+
/**
* amdgpu_mn_get - create notifier context
*
* @adev: amdgpu device pointer
+ * @type: type of MMU notifier context
*
* Creates a notifier context for current->mm.
*/
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
struct mm_struct *mm = current->mm;
struct amdgpu_mn *rmn;
+ unsigned long key = AMDGPU_MN_KEY(mm, type);
int r;
mutex_lock(&adev->mn_lock);
@@ -264,8 +325,8 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
return ERR_PTR(-EINTR);
}
- hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
- if (rmn->mm == mm)
+ hash_for_each_possible(adev->mn_hash, rmn, node, key)
+ if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
goto release_locks;
rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
@@ -276,8 +337,9 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
- rmn->mn.ops = &amdgpu_mn_ops;
init_rwsem(&rmn->lock);
+ rmn->type = type;
+ rmn->mn.ops = &amdgpu_mn_ops[type];
rmn->objects = RB_ROOT_CACHED;
mutex_init(&rmn->read_lock);
atomic_set(&rmn->recursion, 0);
@@ -286,7 +348,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
if (r)
goto free_rmn;
- hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
+ hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
release_locks:
up_write(&mm->mmap_sem);
@@ -315,15 +377,21 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
{
unsigned long end = addr + amdgpu_bo_size(bo) - 1;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+ enum amdgpu_mn_type type =
+ bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
struct amdgpu_mn *rmn;
- struct amdgpu_mn_node *node = NULL;
+ struct amdgpu_mn_node *node = NULL, *new_node;
struct list_head bos;
struct interval_tree_node *it;
- rmn = amdgpu_mn_get(adev);
+ rmn = amdgpu_mn_get(adev, type);
if (IS_ERR(rmn))
return PTR_ERR(rmn);
+ new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
+ if (!new_node)
+ return -ENOMEM;
+
INIT_LIST_HEAD(&bos);
down_write(&rmn->lock);
@@ -337,13 +405,10 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
list_splice(&node->bos, &bos);
}
- if (!node) {
- node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
- if (!node) {
- up_write(&rmn->lock);
- return -ENOMEM;
- }
- }
+ if (!node)
+ node = new_node;
+ else
+ kfree(new_node);
bo->mn = rmn;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index d0095a3..eb0f432 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -29,16 +29,23 @@
*/
struct amdgpu_mn;
+enum amdgpu_mn_type {
+ AMDGPU_MN_TYPE_GFX,
+ AMDGPU_MN_TYPE_HSA,
+};
+
#if defined(CONFIG_MMU_NOTIFIER)
void amdgpu_mn_lock(struct amdgpu_mn *mn);
void amdgpu_mn_unlock(struct amdgpu_mn *mn);
-struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev);
+struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type);
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
#else
static inline void amdgpu_mn_lock(struct amdgpu_mn *mn) {}
static inline void amdgpu_mn_unlock(struct amdgpu_mn *mn) {}
-static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
+static inline struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
+ enum amdgpu_mn_type type)
{
return NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 205da3f..c713d30 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -695,7 +695,7 @@ struct amdgpu_ttm_tt {
struct ttm_dma_tt ttm;
u64 offset;
uint64_t userptr;
- struct mm_struct *usermm;
+ struct task_struct *usertask;
uint32_t userflags;
spinlock_t guptasklock;
struct list_head guptasks;
@@ -706,14 +706,18 @@ struct amdgpu_ttm_tt {
int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct mm_struct *mm = gtt->usertask->mm;
unsigned int flags = 0;
unsigned pinned = 0;
int r;
+ if (!mm) /* Happens during process shutdown */
+ return -ESRCH;
+
if (!(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY))
flags |= FOLL_WRITE;
- down_read(&current->mm->mmap_sem);
+ down_read(&mm->mmap_sem);
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
/* check that we only use anonymous memory
@@ -721,9 +725,9 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
- vma = find_vma(gtt->usermm, gtt->userptr);
+ vma = find_vma(mm, gtt->userptr);
if (!vma || vma->vm_file || vma->vm_end < end) {
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return -EPERM;
}
}
@@ -739,7 +743,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
list_add(&guptask.list, &gtt->guptasks);
spin_unlock(&gtt->guptasklock);
- r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ if (mm == current->mm)
+ r = get_user_pages(userptr, num_pages, flags, p, NULL);
+ else
+ r = get_user_pages_remote(gtt->usertask,
+ mm, userptr, num_pages,
+ flags, p, NULL, NULL);
spin_lock(&gtt->guptasklock);
list_del(&guptask.list);
@@ -752,12 +761,12 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
} while (pinned < ttm->num_pages);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return 0;
release_pages:
release_pages(pages, pinned);
- up_read(&current->mm->mmap_sem);
+ up_read(&mm->mmap_sem);
return r;
}
@@ -978,6 +987,9 @@ static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+
ttm_dma_tt_fini(&gtt->ttm);
kfree(gtt);
}
@@ -1079,8 +1091,13 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
return -EINVAL;
gtt->userptr = addr;
- gtt->usermm = current->mm;
gtt->userflags = flags;
+
+ if (gtt->usertask)
+ put_task_struct(gtt->usertask);
+ gtt->usertask = current->group_leader;
+ get_task_struct(gtt->usertask);
+
spin_lock_init(&gtt->guptasklock);
INIT_LIST_HEAD(&gtt->guptasks);
atomic_set(&gtt->mmu_invalidations, 0);
@@ -1096,7 +1113,10 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
if (gtt == NULL)
return NULL;
- return gtt->usermm;
+ if (gtt->usertask == NULL)
+ return NULL;
+
+ return gtt->usertask->mm;
}
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 9d39fd5..e5962e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4686,6 +4686,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+ cu_info->simd_per_cu = NUM_SIMD_PER_CU;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 7f408f8..f22f7a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -268,6 +268,11 @@
* x=1: tmz_end
*/
+#define PACKET3_INVALIDATE_TLBS 0x98
+# define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0)
+# define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4)
+# define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5)
+# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29)
#define PACKET3_SET_RESOURCES 0xA0
/* 1. header
* 2. CONTROL
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile
index 0d02422..ffd096f 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -30,12 +30,14 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \
kfd_pasid.o kfd_doorbell.o kfd_flat_memory.o \
kfd_process.o kfd_queue.o kfd_mqd_manager.o \
kfd_mqd_manager_cik.o kfd_mqd_manager_vi.o \
+ kfd_mqd_manager_v9.o \
kfd_kernel_queue.o kfd_kernel_queue_cik.o \
- kfd_kernel_queue_vi.o kfd_packet_manager.o \
- kfd_process_queue_manager.o kfd_device_queue_manager.o \
- kfd_device_queue_manager_cik.o kfd_device_queue_manager_vi.o \
+ kfd_kernel_queue_vi.o kfd_kernel_queue_v9.o \
+ kfd_packet_manager.o kfd_process_queue_manager.o \
+ kfd_device_queue_manager.o kfd_device_queue_manager_cik.o \
+ kfd_device_queue_manager_vi.o kfd_device_queue_manager_v9.o \
kfd_interrupt.o kfd_events.o cik_event_interrupt.o \
- kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
+ kfd_int_process_v9.o kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o
ifneq ($(CONFIG_AMD_IOMMU_V2),)
amdkfd-y += kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 3d5ccb3..49df6c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -27,18 +27,28 @@
static bool cik_event_interrupt_isr(struct kfd_dev *dev,
const uint32_t *ih_ring_entry)
{
- unsigned int pasid;
const struct cik_ih_ring_entry *ihre =
(const struct cik_ih_ring_entry *)ih_ring_entry;
+ unsigned int vmid, pasid;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = (ihre->ring_id & 0x0000ff00) >> 8;
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+ /* If there is no valid PASID, it's likely a firmware bug */
pasid = (ihre->ring_id & 0xffff0000) >> 16;
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
- /* Do not process in ISR, just request it to be forwarded to WQ. */
- return (pasid != 0) &&
- (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
- ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE);
+ ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE;
}
static void cik_event_interrupt_wq(struct kfd_dev *dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_regs.h b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
index 48769d1..37ce6dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_regs.h
+++ b/drivers/gpu/drm/amd/amdkfd/cik_regs.h
@@ -33,7 +33,8 @@
#define APE1_MTYPE(x) ((x) << 7)
/* valid for both DEFAULT_MTYPE and APE1_MTYPE */
-#define MTYPE_CACHED 0
+#define MTYPE_CACHED_NV 0
+#define MTYPE_CACHED 1
#define MTYPE_NONCACHED 3
#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
new file mode 100644
index 0000000..f68aef0
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -0,0 +1,560 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+static const uint32_t cwsr_trap_gfx8_hex[] = {
+ 0xbf820001, 0xbf820125,
+ 0xb8f4f802, 0x89748674,
+ 0xb8f5f803, 0x8675ff75,
+ 0x00000400, 0xbf850011,
+ 0xc00a1e37, 0x00000000,
+ 0xbf8c007f, 0x87777978,
+ 0xbf840002, 0xb974f802,
+ 0xbe801d78, 0xb8f5f803,
+ 0x8675ff75, 0x000001ff,
+ 0xbf850002, 0x80708470,
+ 0x82718071, 0x8671ff71,
+ 0x0000ffff, 0xb974f802,
+ 0xbe801f70, 0xb8f5f803,
+ 0x8675ff75, 0x00000100,
+ 0xbf840006, 0xbefa0080,
+ 0xb97a0203, 0x8671ff71,
+ 0x0000ffff, 0x80f08870,
+ 0x82f18071, 0xbefa0080,
+ 0xb97a0283, 0xbef60068,
+ 0xbef70069, 0xb8fa1c07,
+ 0x8e7a9c7a, 0x87717a71,
+ 0xb8fa03c7, 0x8e7a9b7a,
+ 0x87717a71, 0xb8faf807,
+ 0x867aff7a, 0x00007fff,
+ 0xb97af807, 0xbef2007e,
+ 0xbef3007f, 0xbefe0180,
+ 0xbf900004, 0x877a8474,
+ 0xb97af802, 0xbf8e0002,
+ 0xbf88fffe, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x867aff7f,
+ 0x08000000, 0x8f7a837a,
+ 0x877b7a7b, 0x867aff7f,
+ 0x70000000, 0x8f7a817a,
+ 0x877b7a7b, 0xbeef007c,
+ 0xbeee0080, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611c7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cbc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611cfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611d3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xb8f5f803,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611d7c, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dbc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xbefe007c, 0xbefc006e,
+ 0xc0611dfc, 0x0000007c,
+ 0x806e846e, 0xbefc007e,
+ 0xb8eff801, 0xbefe007c,
+ 0xbefc006e, 0xc0611bfc,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b3c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc006e, 0xc0611b7c,
+ 0x0000007c, 0x806e846e,
+ 0xbefc007e, 0x867aff7f,
+ 0x04000000, 0xbef30080,
+ 0x8773737a, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8f51605, 0x80758175,
+ 0x8e758475, 0x8e7a8275,
+ 0xbefa00ff, 0x01000000,
+ 0xbef60178, 0x80786e78,
+ 0x82798079, 0xbefc0080,
+ 0xbe802b00, 0xbe822b02,
+ 0xbe842b04, 0xbe862b06,
+ 0xbe882b08, 0xbe8a2b0a,
+ 0xbe8c2b0c, 0xbe8e2b0e,
+ 0xc06b003c, 0x00000000,
+ 0xc06b013c, 0x00000010,
+ 0xc06b023c, 0x00000020,
+ 0xc06b033c, 0x00000030,
+ 0x8078c078, 0x82798079,
+ 0x807c907c, 0xbf0a757c,
+ 0xbf85ffeb, 0xbef80176,
+ 0xbeee0080, 0xbefe00c1,
+ 0xbeff00c1, 0xbefa00ff,
+ 0x01000000, 0xe0724000,
+ 0x6e1e0000, 0xe0724100,
+ 0x6e1e0100, 0xe0724200,
+ 0x6e1e0200, 0xe0724300,
+ 0x6e1e0300, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f54306,
+ 0x8675c175, 0xbf84002c,
+ 0xbf8a0000, 0x867aff73,
+ 0x04000000, 0xbf840028,
+ 0x8e758675, 0x8e758275,
+ 0xbefa0075, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8fa1605, 0x807a817a,
+ 0x8e7a867a, 0x806e7a6e,
+ 0x806eff6e, 0x00000080,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0080, 0xd28c0002,
+ 0x000100c1, 0xd28d0003,
+ 0x000204c1, 0xd1060002,
+ 0x00011103, 0x7e0602ff,
+ 0x00000200, 0xbefc00ff,
+ 0x00010000, 0xbe80007b,
+ 0x867bff7b, 0xff7fffff,
+ 0x877bff7b, 0x00058000,
+ 0xd8ec0000, 0x00000002,
+ 0xbf8c007f, 0xe0765000,
+ 0x6e1e0002, 0x32040702,
+ 0xd0c9006a, 0x0000eb02,
+ 0xbf87fff7, 0xbefb0000,
+ 0xbeee00ff, 0x00000400,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f52a05, 0x80758175,
+ 0x8e758275, 0x8e7a8875,
+ 0xbefa00ff, 0x01000000,
+ 0xbefc0084, 0xbf0a757c,
+ 0xbf840015, 0xbf11017c,
+ 0x8075ff75, 0x00001000,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0xe0724000, 0x6e1e0000,
+ 0xe0724100, 0x6e1e0100,
+ 0xe0724200, 0x6e1e0200,
+ 0xe0724300, 0x6e1e0300,
+ 0x807c847c, 0x806eff6e,
+ 0x00000400, 0xbf0a757c,
+ 0xbf85ffef, 0xbf9c0000,
+ 0xbf8200ca, 0xbef8007e,
+ 0x8679ff7f, 0x0000ffff,
+ 0x8779ff79, 0x00040000,
+ 0xbefa0080, 0xbefb00ff,
+ 0x00807fac, 0x8676ff7f,
+ 0x08000000, 0x8f768376,
+ 0x877b767b, 0x8676ff7f,
+ 0x70000000, 0x8f768176,
+ 0x877b767b, 0x8676ff7f,
+ 0x04000000, 0xbf84001e,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f34306, 0x8673c173,
+ 0xbf840019, 0x8e738673,
+ 0x8e738273, 0xbefa0073,
+ 0xb8f22a05, 0x80728172,
+ 0x8e728a72, 0xb8f61605,
+ 0x80768176, 0x8e768676,
+ 0x80727672, 0x8072ff72,
+ 0x00000080, 0xbefa00ff,
+ 0x01000000, 0xbefc0080,
+ 0xe0510000, 0x721e0000,
+ 0xe0510100, 0x721e0000,
+ 0x807cff7c, 0x00000200,
+ 0x8072ff72, 0x00000200,
+ 0xbf0a737c, 0xbf85fff6,
+ 0xbef20080, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f32a05,
+ 0x80738173, 0x8e738273,
+ 0x8e7a8873, 0xbefa00ff,
+ 0x01000000, 0xbef60072,
+ 0x8072ff72, 0x00000400,
+ 0xbefc0084, 0xbf11087c,
+ 0x8073ff73, 0x00008000,
+ 0xe0524000, 0x721e0000,
+ 0xe0524100, 0x721e0100,
+ 0xe0524200, 0x721e0200,
+ 0xe0524300, 0x721e0300,
+ 0xbf8c0f70, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0x807c847c,
+ 0x8072ff72, 0x00000400,
+ 0xbf0a737c, 0xbf85ffee,
+ 0xbf9c0000, 0xe0524000,
+ 0x761e0000, 0xe0524100,
+ 0x761e0100, 0xe0524200,
+ 0x761e0200, 0xe0524300,
+ 0x761e0300, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0x80f2c072, 0xb8f31605,
+ 0x80738173, 0x8e738473,
+ 0x8e7a8273, 0xbefa00ff,
+ 0x01000000, 0xbefc0073,
+ 0xc031003c, 0x00000072,
+ 0x80f2c072, 0xbf8c007f,
+ 0x80fc907c, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff1, 0xb8f22a05,
+ 0x80728172, 0x8e728a72,
+ 0xb8f61605, 0x80768176,
+ 0x8e768676, 0x80727672,
+ 0xbefa0084, 0xbefa00ff,
+ 0x01000000, 0xc0211cfc,
+ 0x00000072, 0x80728472,
+ 0xc0211c3c, 0x00000072,
+ 0x80728472, 0xc0211c7c,
+ 0x00000072, 0x80728472,
+ 0xc0211bbc, 0x00000072,
+ 0x80728472, 0xc0211bfc,
+ 0x00000072, 0x80728472,
+ 0xc0211d3c, 0x00000072,
+ 0x80728472, 0xc0211d7c,
+ 0x00000072, 0x80728472,
+ 0xc0211a3c, 0x00000072,
+ 0x80728472, 0xc0211a7c,
+ 0x00000072, 0x80728472,
+ 0xc0211dfc, 0x00000072,
+ 0x80728472, 0xc0211b3c,
+ 0x00000072, 0x80728472,
+ 0xc0211b7c, 0x00000072,
+ 0x80728472, 0xbf8c007f,
+ 0xbefc0073, 0xbefe006e,
+ 0xbeff006f, 0x867375ff,
+ 0x000003ff, 0xb9734803,
+ 0x867375ff, 0xfffff800,
+ 0x8f738b73, 0xb973a2c3,
+ 0xb977f801, 0x8673ff71,
+ 0xf0000000, 0x8f739c73,
+ 0x8e739073, 0xbef60080,
+ 0x87767376, 0x8673ff71,
+ 0x08000000, 0x8f739b73,
+ 0x8e738f73, 0x87767376,
+ 0x8673ff74, 0x00800000,
+ 0x8f739773, 0xb976f807,
+ 0x8671ff71, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb974f802, 0xbf8a0000,
+ 0x95807370, 0xbf810000,
+};
+
+
+static const uint32_t cwsr_trap_gfx9_hex[] = {
+ 0xbf820001, 0xbf82015a,
+ 0xb8f8f802, 0x89788678,
+ 0xb8f1f803, 0x866eff71,
+ 0x00000400, 0xbf850034,
+ 0x866eff71, 0x00000800,
+ 0xbf850003, 0x866eff71,
+ 0x00000100, 0xbf840008,
+ 0x866eff78, 0x00002000,
+ 0xbf840001, 0xbf810000,
+ 0x8778ff78, 0x00002000,
+ 0x80ec886c, 0x82ed806d,
+ 0xb8eef807, 0x866fff6e,
+ 0x001f8000, 0x8e6f8b6f,
+ 0x8977ff77, 0xfc000000,
+ 0x87776f77, 0x896eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0xb8f0f812, 0xb8f1f813,
+ 0x8ef08870, 0xc0071bb8,
+ 0x00000000, 0xbf8cc07f,
+ 0xc0071c38, 0x00000008,
+ 0xbf8cc07f, 0x86ee6e6e,
+ 0xbf840001, 0xbe801d6e,
+ 0xb8f1f803, 0x8671ff71,
+ 0x000001ff, 0xbf850002,
+ 0x806c846c, 0x826d806d,
+ 0x866dff6d, 0x0000ffff,
+ 0x8f6e8b77, 0x866eff6e,
+ 0x001f8000, 0xb96ef807,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb978f802, 0xbe801f6c,
+ 0x866dff6d, 0x0000ffff,
+ 0xbef00080, 0xb9700283,
+ 0xb8f02407, 0x8e709c70,
+ 0x876d706d, 0xb8f003c7,
+ 0x8e709b70, 0x876d706d,
+ 0xb8f0f807, 0x8670ff70,
+ 0x00007fff, 0xb970f807,
+ 0xbeee007e, 0xbeef007f,
+ 0xbefe0180, 0xbf900004,
+ 0x87708478, 0xb970f802,
+ 0xbf8e0002, 0xbf88fffe,
+ 0xb8f02a05, 0x80708170,
+ 0x8e708a70, 0xb8f11605,
+ 0x80718171, 0x8e718671,
+ 0x80707170, 0x80707e70,
+ 0x8271807f, 0x8671ff71,
+ 0x0000ffff, 0xc0471cb8,
+ 0x00000040, 0xbf8cc07f,
+ 0xc04b1d38, 0x00000048,
+ 0xbf8cc07f, 0xc0431e78,
+ 0x00000058, 0xbf8cc07f,
+ 0xc0471eb8, 0x0000005c,
+ 0xbf8cc07f, 0xbef4007e,
+ 0x8675ff7f, 0x0000ffff,
+ 0x8775ff75, 0x00040000,
+ 0xbef60080, 0xbef700ff,
+ 0x00807fac, 0x8670ff7f,
+ 0x08000000, 0x8f708370,
+ 0x87777077, 0x8670ff7f,
+ 0x70000000, 0x8f708170,
+ 0x87777077, 0xbefb007c,
+ 0xbefa0080, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f01605, 0x80708170,
+ 0x8e708670, 0x807a707a,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xbefe007c,
+ 0xbefc007a, 0xc0611efa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611b3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611b7a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611bba, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611bfa,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611e3a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8f1f803,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611c7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xbefe007c,
+ 0xbefc007a, 0xc0611a3a,
+ 0x0000007c, 0xbf8cc07f,
+ 0x807a847a, 0xbefc007e,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611a7a, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0xb8fbf801,
+ 0xbefe007c, 0xbefc007a,
+ 0xc0611efa, 0x0000007c,
+ 0xbf8cc07f, 0x807a847a,
+ 0xbefc007e, 0x8670ff7f,
+ 0x04000000, 0xbeef0080,
+ 0x876f6f70, 0xb8fa2a05,
+ 0x807a817a, 0x8e7a8a7a,
+ 0xb8f11605, 0x80718171,
+ 0x8e718471, 0x8e768271,
+ 0xbef600ff, 0x01000000,
+ 0xbef20174, 0x80747a74,
+ 0x82758075, 0xbefc0080,
+ 0xbf800000, 0xbe802b00,
+ 0xbe822b02, 0xbe842b04,
+ 0xbe862b06, 0xbe882b08,
+ 0xbe8a2b0a, 0xbe8c2b0c,
+ 0xbe8e2b0e, 0xc06b003a,
+ 0x00000000, 0xbf8cc07f,
+ 0xc06b013a, 0x00000010,
+ 0xbf8cc07f, 0xc06b023a,
+ 0x00000020, 0xbf8cc07f,
+ 0xc06b033a, 0x00000030,
+ 0xbf8cc07f, 0x8074c074,
+ 0x82758075, 0x807c907c,
+ 0xbf0a717c, 0xbf85ffe7,
+ 0xbef40172, 0xbefa0080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xbee80080, 0xbee90080,
+ 0xbef600ff, 0x01000000,
+ 0xe0724000, 0x7a1d0000,
+ 0xe0724100, 0x7a1d0100,
+ 0xe0724200, 0x7a1d0200,
+ 0xe0724300, 0x7a1d0300,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8f14306, 0x8671c171,
+ 0xbf84002c, 0xbf8a0000,
+ 0x8670ff6f, 0x04000000,
+ 0xbf840028, 0x8e718671,
+ 0x8e718271, 0xbef60071,
+ 0xb8fa2a05, 0x807a817a,
+ 0x8e7a8a7a, 0xb8f01605,
+ 0x80708170, 0x8e708670,
+ 0x807a707a, 0x807aff7a,
+ 0x00000080, 0xbef600ff,
+ 0x01000000, 0xbefc0080,
+ 0xd28c0002, 0x000100c1,
+ 0xd28d0003, 0x000204c1,
+ 0xd1060002, 0x00011103,
+ 0x7e0602ff, 0x00000200,
+ 0xbefc00ff, 0x00010000,
+ 0xbe800077, 0x8677ff77,
+ 0xff7fffff, 0x8777ff77,
+ 0x00058000, 0xd8ec0000,
+ 0x00000002, 0xbf8cc07f,
+ 0xe0765000, 0x7a1d0002,
+ 0x68040702, 0xd0c9006a,
+ 0x0000e302, 0xbf87fff7,
+ 0xbef70000, 0xbefa00ff,
+ 0x00000400, 0xbefe00c1,
+ 0xbeff00c1, 0xb8f12a05,
+ 0x80718171, 0x8e718271,
+ 0x8e768871, 0xbef600ff,
+ 0x01000000, 0xbefc0084,
+ 0xbf0a717c, 0xbf840015,
+ 0xbf11017c, 0x8071ff71,
+ 0x00001000, 0x7e000300,
+ 0x7e020301, 0x7e040302,
+ 0x7e060303, 0xe0724000,
+ 0x7a1d0000, 0xe0724100,
+ 0x7a1d0100, 0xe0724200,
+ 0x7a1d0200, 0xe0724300,
+ 0x7a1d0300, 0x807c847c,
+ 0x807aff7a, 0x00000400,
+ 0xbf0a717c, 0xbf85ffef,
+ 0xbf9c0000, 0xbf8200d9,
+ 0xbef4007e, 0x8675ff7f,
+ 0x0000ffff, 0x8775ff75,
+ 0x00040000, 0xbef60080,
+ 0xbef700ff, 0x00807fac,
+ 0x866eff7f, 0x08000000,
+ 0x8f6e836e, 0x87776e77,
+ 0x866eff7f, 0x70000000,
+ 0x8f6e816e, 0x87776e77,
+ 0x866eff7f, 0x04000000,
+ 0xbf84001e, 0xbefe00c1,
+ 0xbeff00c1, 0xb8ef4306,
+ 0x866fc16f, 0xbf840019,
+ 0x8e6f866f, 0x8e6f826f,
+ 0xbef6006f, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0x8078ff78, 0x00000080,
+ 0xbef600ff, 0x01000000,
+ 0xbefc0080, 0xe0510000,
+ 0x781d0000, 0xe0510100,
+ 0x781d0000, 0x807cff7c,
+ 0x00000200, 0x8078ff78,
+ 0x00000200, 0xbf0a6f7c,
+ 0xbf85fff6, 0xbef80080,
+ 0xbefe00c1, 0xbeff00c1,
+ 0xb8ef2a05, 0x806f816f,
+ 0x8e6f826f, 0x8e76886f,
+ 0xbef600ff, 0x01000000,
+ 0xbeee0078, 0x8078ff78,
+ 0x00000400, 0xbefc0084,
+ 0xbf11087c, 0x806fff6f,
+ 0x00008000, 0xe0524000,
+ 0x781d0000, 0xe0524100,
+ 0x781d0100, 0xe0524200,
+ 0x781d0200, 0xe0524300,
+ 0x781d0300, 0xbf8c0f70,
+ 0x7e000300, 0x7e020301,
+ 0x7e040302, 0x7e060303,
+ 0x807c847c, 0x8078ff78,
+ 0x00000400, 0xbf0a6f7c,
+ 0xbf85ffee, 0xbf9c0000,
+ 0xe0524000, 0x6e1d0000,
+ 0xe0524100, 0x6e1d0100,
+ 0xe0524200, 0x6e1d0200,
+ 0xe0524300, 0x6e1d0300,
+ 0xb8f82a05, 0x80788178,
+ 0x8e788a78, 0xb8ee1605,
+ 0x806e816e, 0x8e6e866e,
+ 0x80786e78, 0x80f8c078,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f846f, 0x8e76826f,
+ 0xbef600ff, 0x01000000,
+ 0xbefc006f, 0xc031003a,
+ 0x00000078, 0x80f8c078,
+ 0xbf8cc07f, 0x80fc907c,
+ 0xbf800000, 0xbe802d00,
+ 0xbe822d02, 0xbe842d04,
+ 0xbe862d06, 0xbe882d08,
+ 0xbe8a2d0a, 0xbe8c2d0c,
+ 0xbe8e2d0e, 0xbf06807c,
+ 0xbf84fff0, 0xb8f82a05,
+ 0x80788178, 0x8e788a78,
+ 0xb8ee1605, 0x806e816e,
+ 0x8e6e866e, 0x80786e78,
+ 0xbef60084, 0xbef600ff,
+ 0x01000000, 0xc0211bfa,
+ 0x00000078, 0x80788478,
+ 0xc0211b3a, 0x00000078,
+ 0x80788478, 0xc0211b7a,
+ 0x00000078, 0x80788478,
+ 0xc0211eba, 0x00000078,
+ 0x80788478, 0xc0211efa,
+ 0x00000078, 0x80788478,
+ 0xc0211c3a, 0x00000078,
+ 0x80788478, 0xc0211c7a,
+ 0x00000078, 0x80788478,
+ 0xc0211a3a, 0x00000078,
+ 0x80788478, 0xc0211a7a,
+ 0x00000078, 0x80788478,
+ 0xc0211cfa, 0x00000078,
+ 0x80788478, 0xbf8cc07f,
+ 0xbefc006f, 0xbefe007a,
+ 0xbeff007b, 0x866f71ff,
+ 0x000003ff, 0xb96f4803,
+ 0x866f71ff, 0xfffff800,
+ 0x8f6f8b6f, 0xb96fa2c3,
+ 0xb973f801, 0xb8ee2a05,
+ 0x806e816e, 0x8e6e8a6e,
+ 0xb8ef1605, 0x806f816f,
+ 0x8e6f866f, 0x806e6f6e,
+ 0x806e746e, 0x826f8075,
+ 0x866fff6f, 0x0000ffff,
+ 0xc0071cb7, 0x00000040,
+ 0xc00b1d37, 0x00000048,
+ 0xc0031e77, 0x00000058,
+ 0xc0071eb7, 0x0000005c,
+ 0xbf8cc07f, 0x866fff6d,
+ 0xf0000000, 0x8f6f9c6f,
+ 0x8e6f906f, 0xbeee0080,
+ 0x876e6f6e, 0x866fff6d,
+ 0x08000000, 0x8f6f9b6f,
+ 0x8e6f8f6f, 0x876e6f6e,
+ 0x866fff70, 0x00800000,
+ 0x8f6f976f, 0xb96ef807,
+ 0x866dff6d, 0x0000ffff,
+ 0x86fe7e7e, 0x86ea6a6a,
+ 0xb970f802, 0xbf8a0000,
+ 0x95806f6c, 0xbf810000,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
index 997a383d..a2a04bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm
@@ -20,9 +20,12 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if 0
-HW (VI) source code for CWSR trap handler
-#Version 18 + multiple trap handler
+/* To compile this assembly code:
+ * PROJECT=vi ./sp3 cwsr_trap_handler_gfx8.asm -hex tmp.hex
+ */
+
+/* HW (VI) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
// this performance-optimal version was originally from Seven Xu at SRDC
@@ -98,6 +101,7 @@ var SWIZZLE_EN = 0 //whether we use swi
/**************************************************************************/
var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
@@ -149,7 +153,7 @@ var s_save_spi_init_lo = exec_lo
var s_save_spi_init_hi = exec_hi
//tba_lo and tba_hi need to be saved/restored
-var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3??h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
var s_save_pc_hi = ttmp1
var s_save_exec_lo = ttmp2
var s_save_exec_hi = ttmp3
@@ -319,6 +323,10 @@ end
s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
end
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
L_SLEEP:
s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
@@ -1007,8 +1015,6 @@ end
s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
- s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
-
//for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
@@ -1044,6 +1050,7 @@ end
s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
@@ -1127,258 +1134,3 @@ end
function get_hwreg_size_bytes
return 128 //HWREG size 128 bytes
end
-
-
-#endif
-
-static const uint32_t cwsr_trap_gfx8_hex[] = {
- 0xbf820001, 0xbf820123,
- 0xb8f4f802, 0x89748674,
- 0xb8f5f803, 0x8675ff75,
- 0x00000400, 0xbf850011,
- 0xc00a1e37, 0x00000000,
- 0xbf8c007f, 0x87777978,
- 0xbf840002, 0xb974f802,
- 0xbe801d78, 0xb8f5f803,
- 0x8675ff75, 0x000001ff,
- 0xbf850002, 0x80708470,
- 0x82718071, 0x8671ff71,
- 0x0000ffff, 0xb974f802,
- 0xbe801f70, 0xb8f5f803,
- 0x8675ff75, 0x00000100,
- 0xbf840006, 0xbefa0080,
- 0xb97a0203, 0x8671ff71,
- 0x0000ffff, 0x80f08870,
- 0x82f18071, 0xbefa0080,
- 0xb97a0283, 0xbef60068,
- 0xbef70069, 0xb8fa1c07,
- 0x8e7a9c7a, 0x87717a71,
- 0xb8fa03c7, 0x8e7a9b7a,
- 0x87717a71, 0xb8faf807,
- 0x867aff7a, 0x00007fff,
- 0xb97af807, 0xbef2007e,
- 0xbef3007f, 0xbefe0180,
- 0xbf900004, 0xbf8e0002,
- 0xbf88fffe, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x867aff7f,
- 0x08000000, 0x8f7a837a,
- 0x877b7a7b, 0x867aff7f,
- 0x70000000, 0x8f7a817a,
- 0x877b7a7b, 0xbeef007c,
- 0xbeee0080, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611c7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cbc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611cfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611d3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xb8f5f803,
- 0xbefe007c, 0xbefc006e,
- 0xc0611d7c, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dbc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xbefe007c, 0xbefc006e,
- 0xc0611dfc, 0x0000007c,
- 0x806e846e, 0xbefc007e,
- 0xb8eff801, 0xbefe007c,
- 0xbefc006e, 0xc0611bfc,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b3c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0xbefe007c,
- 0xbefc006e, 0xc0611b7c,
- 0x0000007c, 0x806e846e,
- 0xbefc007e, 0x867aff7f,
- 0x04000000, 0xbef30080,
- 0x8773737a, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8f51605, 0x80758175,
- 0x8e758475, 0x8e7a8275,
- 0xbefa00ff, 0x01000000,
- 0xbef60178, 0x80786e78,
- 0x82798079, 0xbefc0080,
- 0xbe802b00, 0xbe822b02,
- 0xbe842b04, 0xbe862b06,
- 0xbe882b08, 0xbe8a2b0a,
- 0xbe8c2b0c, 0xbe8e2b0e,
- 0xc06b003c, 0x00000000,
- 0xc06b013c, 0x00000010,
- 0xc06b023c, 0x00000020,
- 0xc06b033c, 0x00000030,
- 0x8078c078, 0x82798079,
- 0x807c907c, 0xbf0a757c,
- 0xbf85ffeb, 0xbef80176,
- 0xbeee0080, 0xbefe00c1,
- 0xbeff00c1, 0xbefa00ff,
- 0x01000000, 0xe0724000,
- 0x6e1e0000, 0xe0724100,
- 0x6e1e0100, 0xe0724200,
- 0x6e1e0200, 0xe0724300,
- 0x6e1e0300, 0xbefe00c1,
- 0xbeff00c1, 0xb8f54306,
- 0x8675c175, 0xbf84002c,
- 0xbf8a0000, 0x867aff73,
- 0x04000000, 0xbf840028,
- 0x8e758675, 0x8e758275,
- 0xbefa0075, 0xb8ee2a05,
- 0x806e816e, 0x8e6e8a6e,
- 0xb8fa1605, 0x807a817a,
- 0x8e7a867a, 0x806e7a6e,
- 0x806eff6e, 0x00000080,
- 0xbefa00ff, 0x01000000,
- 0xbefc0080, 0xd28c0002,
- 0x000100c1, 0xd28d0003,
- 0x000204c1, 0xd1060002,
- 0x00011103, 0x7e0602ff,
- 0x00000200, 0xbefc00ff,
- 0x00010000, 0xbe80007b,
- 0x867bff7b, 0xff7fffff,
- 0x877bff7b, 0x00058000,
- 0xd8ec0000, 0x00000002,
- 0xbf8c007f, 0xe0765000,
- 0x6e1e0002, 0x32040702,
- 0xd0c9006a, 0x0000eb02,
- 0xbf87fff7, 0xbefb0000,
- 0xbeee00ff, 0x00000400,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f52a05, 0x80758175,
- 0x8e758275, 0x8e7a8875,
- 0xbefa00ff, 0x01000000,
- 0xbefc0084, 0xbf0a757c,
- 0xbf840015, 0xbf11017c,
- 0x8075ff75, 0x00001000,
- 0x7e000300, 0x7e020301,
- 0x7e040302, 0x7e060303,
- 0xe0724000, 0x6e1e0000,
- 0xe0724100, 0x6e1e0100,
- 0xe0724200, 0x6e1e0200,
- 0xe0724300, 0x6e1e0300,
- 0x807c847c, 0x806eff6e,
- 0x00000400, 0xbf0a757c,
- 0xbf85ffef, 0xbf9c0000,
- 0xbf8200ca, 0xbef8007e,
- 0x8679ff7f, 0x0000ffff,
- 0x8779ff79, 0x00040000,
- 0xbefa0080, 0xbefb00ff,
- 0x00807fac, 0x8676ff7f,
- 0x08000000, 0x8f768376,
- 0x877b767b, 0x8676ff7f,
- 0x70000000, 0x8f768176,
- 0x877b767b, 0x8676ff7f,
- 0x04000000, 0xbf84001e,
- 0xbefe00c1, 0xbeff00c1,
- 0xb8f34306, 0x8673c173,
- 0xbf840019, 0x8e738673,
- 0x8e738273, 0xbefa0073,
- 0xb8f22a05, 0x80728172,
- 0x8e728a72, 0xb8f61605,
- 0x80768176, 0x8e768676,
- 0x80727672, 0x8072ff72,
- 0x00000080, 0xbefa00ff,
- 0x01000000, 0xbefc0080,
- 0xe0510000, 0x721e0000,
- 0xe0510100, 0x721e0000,
- 0x807cff7c, 0x00000200,
- 0x8072ff72, 0x00000200,
- 0xbf0a737c, 0xbf85fff6,
- 0xbef20080, 0xbefe00c1,
- 0xbeff00c1, 0xb8f32a05,
- 0x80738173, 0x8e738273,
- 0x8e7a8873, 0xbefa00ff,
- 0x01000000, 0xbef60072,
- 0x8072ff72, 0x00000400,
- 0xbefc0084, 0xbf11087c,
- 0x8073ff73, 0x00008000,
- 0xe0524000, 0x721e0000,
- 0xe0524100, 0x721e0100,
- 0xe0524200, 0x721e0200,
- 0xe0524300, 0x721e0300,
- 0xbf8c0f70, 0x7e000300,
- 0x7e020301, 0x7e040302,
- 0x7e060303, 0x807c847c,
- 0x8072ff72, 0x00000400,
- 0xbf0a737c, 0xbf85ffee,
- 0xbf9c0000, 0xe0524000,
- 0x761e0000, 0xe0524100,
- 0x761e0100, 0xe0524200,
- 0x761e0200, 0xe0524300,
- 0x761e0300, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0x80f2c072, 0xb8f31605,
- 0x80738173, 0x8e738473,
- 0x8e7a8273, 0xbefa00ff,
- 0x01000000, 0xbefc0073,
- 0xc031003c, 0x00000072,
- 0x80f2c072, 0xbf8c007f,
- 0x80fc907c, 0xbe802d00,
- 0xbe822d02, 0xbe842d04,
- 0xbe862d06, 0xbe882d08,
- 0xbe8a2d0a, 0xbe8c2d0c,
- 0xbe8e2d0e, 0xbf06807c,
- 0xbf84fff1, 0xb8f22a05,
- 0x80728172, 0x8e728a72,
- 0xb8f61605, 0x80768176,
- 0x8e768676, 0x80727672,
- 0xbefa0084, 0xbefa00ff,
- 0x01000000, 0xc0211cfc,
- 0x00000072, 0x80728472,
- 0xc0211c3c, 0x00000072,
- 0x80728472, 0xc0211c7c,
- 0x00000072, 0x80728472,
- 0xc0211bbc, 0x00000072,
- 0x80728472, 0xc0211bfc,
- 0x00000072, 0x80728472,
- 0xc0211d3c, 0x00000072,
- 0x80728472, 0xc0211d7c,
- 0x00000072, 0x80728472,
- 0xc0211a3c, 0x00000072,
- 0x80728472, 0xc0211a7c,
- 0x00000072, 0x80728472,
- 0xc0211dfc, 0x00000072,
- 0x80728472, 0xc0211b3c,
- 0x00000072, 0x80728472,
- 0xc0211b7c, 0x00000072,
- 0x80728472, 0xbf8c007f,
- 0x8671ff71, 0x0000ffff,
- 0xbefc0073, 0xbefe006e,
- 0xbeff006f, 0x867375ff,
- 0x000003ff, 0xb9734803,
- 0x867375ff, 0xfffff800,
- 0x8f738b73, 0xb973a2c3,
- 0xb977f801, 0x8673ff71,
- 0xf0000000, 0x8f739c73,
- 0x8e739073, 0xbef60080,
- 0x87767376, 0x8673ff71,
- 0x08000000, 0x8f739b73,
- 0x8e738f73, 0x87767376,
- 0x8673ff74, 0x00800000,
- 0x8f739773, 0xb976f807,
- 0x86fe7e7e, 0x86ea6a6a,
- 0xb974f802, 0xbf8a0000,
- 0x95807370, 0xbf810000,
-};
-
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
new file mode 100644
index 0000000..998be96
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm
@@ -0,0 +1,1214 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/* To compile this assembly code:
+ * PROJECT=greenland ./sp3 cwsr_trap_handler_gfx9.asm -hex tmp.hex
+ */
+
+/* HW (GFX9) source code for CWSR trap handler */
+/* Version 18 + multiple trap handler */
+
+// this performance-optimal version was originally from Seven Xu at SRDC
+
+// Revison #18 --...
+/* Rev History
+** #1. Branch from gc dv. //gfxip/gfx9/main/src/test/suites/block/cs/sr/cs_trap_handler.sp3#1,#50, #51, #52-53(Skip, Already Fixed by PV), #54-56(merged),#57-58(mergerd, skiped-already fixed by PV)
+** #4. SR Memory Layout:
+** 1. VGPR-SGPR-HWREG-{LDS}
+** 2. tba_hi.bits.26 - reconfigured as the first wave in tg bits, for defer Save LDS for a threadgroup.. performance concern..
+** #5. Update: 1. Accurate g8sr_ts_save_d timestamp
+** #6. Update: 1. Fix s_barrier usage; 2. VGPR s/r using swizzle buffer?(NoNeed, already matched the swizzle pattern, more investigation)
+** #7. Update: 1. don't barrier if noLDS
+** #8. Branch: 1. Branch to ver#0, which is very similar to gc dv version
+** 2. Fix SQ issue by s_sleep 2
+** #9. Update: 1. Fix scc restore failed issue, restore wave_status at last
+** 2. optimize s_buffer save by burst 16sgprs...
+** #10. Update 1. Optimize restore sgpr by busrt 16 sgprs.
+** #11. Update 1. Add 2 more timestamp for debug version
+** #12. Update 1. Add VGPR SR using DWx4, some case improve and some case drop performance
+** #13. Integ 1. Always use MUBUF for PV trap shader...
+** #14. Update 1. s_buffer_store soft clause...
+** #15. Update 1. PERF - sclar write with glc:0/mtype0 to allow L2 combine. perf improvement a lot.
+** #16. Update 1. PRRF - UNROLL LDS_DMA got 2500cycle save in IP tree
+** #17. Update 1. FUNC - LDS_DMA has issues while ATC, replace with ds_read/buffer_store for save part[TODO restore part]
+** 2. PERF - Save LDS before save VGPR to cover LDS save long latency...
+** #18. Update 1. FUNC - Implicitly estore STATUS.VCCZ, which is not writable by s_setreg_b32
+** 2. FUNC - Handle non-CWSR traps
+*/
+
+var G8SR_WDMEM_HWREG_OFFSET = 0
+var G8SR_WDMEM_SGPR_OFFSET = 128 // in bytes
+
+// Keep definition same as the app shader, These 2 time stamps are part of the app shader... Should before any Save and after restore.
+
+var G8SR_DEBUG_TIMESTAMP = 0
+var G8SR_DEBUG_TS_SAVE_D_OFFSET = 40*4 // ts_save_d timestamp offset relative to SGPR_SR_memory_offset
+var s_g8sr_ts_save_s = s[34:35] // save start
+var s_g8sr_ts_sq_save_msg = s[36:37] // The save shader send SAVEWAVE msg to spi
+var s_g8sr_ts_spi_wrexec = s[38:39] // the SPI write the sr address to SQ
+var s_g8sr_ts_save_d = s[40:41] // save end
+var s_g8sr_ts_restore_s = s[42:43] // restore start
+var s_g8sr_ts_restore_d = s[44:45] // restore end
+
+var G8SR_VGPR_SR_IN_DWX4 = 0
+var G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 = 0x00100000 // DWx4 stride is 4*4Bytes
+var G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 = G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4
+
+
+/*************************************************************************/
+/* control on how to run the shader */
+/*************************************************************************/
+//any hack that needs to be made to run this code in EMU (either because various EMU code are not ready or no compute save & restore in EMU run)
+var EMU_RUN_HACK = 0
+var EMU_RUN_HACK_RESTORE_NORMAL = 0
+var EMU_RUN_HACK_SAVE_NORMAL_EXIT = 0
+var EMU_RUN_HACK_SAVE_SINGLE_WAVE = 0
+var EMU_RUN_HACK_SAVE_FIRST_TIME = 0 //for interrupted restore in which the first save is through EMU_RUN_HACK
+var SAVE_LDS = 1
+var WG_BASE_ADDR_LO = 0x9000a000
+var WG_BASE_ADDR_HI = 0x0
+var WAVE_SPACE = 0x5000 //memory size that each wave occupies in workgroup state mem
+var CTX_SAVE_CONTROL = 0x0
+var CTX_RESTORE_CONTROL = CTX_SAVE_CONTROL
+var SIM_RUN_HACK = 0 //any hack that needs to be made to run this code in SIM (either because various RTL code are not ready or no compute save & restore in RTL run)
+var SGPR_SAVE_USE_SQC = 1 //use SQC D$ to do the write
+var USE_MTBUF_INSTEAD_OF_MUBUF = 0 //because TC EMU currently asserts on 0 of // overload DFMT field to carry 4 more bits of stride for MUBUF opcodes
+var SWIZZLE_EN = 0 //whether we use swizzled buffer addressing
+var ACK_SQC_STORE = 1 //workaround for suspected SQC store bug causing incorrect stores under concurrency
+
+/**************************************************************************/
+/* variables */
+/**************************************************************************/
+var SQ_WAVE_STATUS_INST_ATC_SHIFT = 23
+var SQ_WAVE_STATUS_INST_ATC_MASK = 0x00800000
+var SQ_WAVE_STATUS_SPI_PRIO_SHIFT = 1
+var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
+var SQ_WAVE_STATUS_HALT_MASK = 0x2000
+
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 6
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT = 24
+var SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE = 3 //FIXME sq.blk still has 4 bits at this time while SQ programming guide has 3 bits
+
+var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
+var SQ_WAVE_TRAPSTS_EXCE_MASK = 0x1FF // Exception mask
+var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+var SQ_WAVE_TRAPSTS_MEM_VIOL_MASK = 0x100
+var SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT = 8
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK = 0x3FF
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT = 0x0
+var SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE = 10
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK = 0xFFFFF800
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT = 11
+var SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE = 21
+var SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK = 0x800
+
+var SQ_WAVE_IB_STS_RCNT_SHIFT = 16 //FIXME
+var SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT = 15 //FIXME
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK = 0x1F8000
+var SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG = 0x00007FFF //FIXME
+
+var SQ_BUF_RSRC_WORD1_ATC_SHIFT = 24
+var SQ_BUF_RSRC_WORD3_MTYPE_SHIFT = 27
+
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT = 26 // bits [31:26] unused by SPI debug data
+var TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK = 0xFC000000
+
+/* Save */
+var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000 //stride is 4 bytes
+var S_SAVE_BUF_RSRC_WORD3_MISC = 0x00807FAC //SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14] when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
+
+var S_SAVE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_SAVE_SPI_INIT_ATC_SHIFT = 27
+var S_SAVE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28
+var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used
+var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME
+var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME
+
+var s_save_spi_init_lo = exec_lo
+var s_save_spi_init_hi = exec_hi
+
+var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],trapID[7:0], PC[47:0]}
+var s_save_pc_hi = ttmp1
+var s_save_exec_lo = ttmp2
+var s_save_exec_hi = ttmp3
+var s_save_tmp = ttmp4
+var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine
+var s_save_xnack_mask_lo = ttmp6
+var s_save_xnack_mask_hi = ttmp7
+var s_save_buf_rsrc0 = ttmp8
+var s_save_buf_rsrc1 = ttmp9
+var s_save_buf_rsrc2 = ttmp10
+var s_save_buf_rsrc3 = ttmp11
+var s_save_status = ttmp12
+var s_save_mem_offset = ttmp14
+var s_save_alloc_size = s_save_trapsts //conflict
+var s_save_m0 = ttmp15
+var s_save_ttmps_lo = s_save_tmp //no conflict
+var s_save_ttmps_hi = s_save_trapsts //no conflict
+
+/* Restore */
+var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
+var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
+
+var S_RESTORE_SPI_INIT_ATC_MASK = 0x08000000 //bit[27]: ATC bit
+var S_RESTORE_SPI_INIT_ATC_SHIFT = 27
+var S_RESTORE_SPI_INIT_MTYPE_MASK = 0x70000000 //bit[30:28]: Mtype
+var S_RESTORE_SPI_INIT_MTYPE_SHIFT = 28
+var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG
+var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
+
+var S_RESTORE_PC_HI_RCNT_SHIFT = S_SAVE_PC_HI_RCNT_SHIFT
+var S_RESTORE_PC_HI_RCNT_MASK = S_SAVE_PC_HI_RCNT_MASK
+var S_RESTORE_PC_HI_FIRST_REPLAY_SHIFT = S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+var S_RESTORE_PC_HI_FIRST_REPLAY_MASK = S_SAVE_PC_HI_FIRST_REPLAY_MASK
+
+var s_restore_spi_init_lo = exec_lo
+var s_restore_spi_init_hi = exec_hi
+
+var s_restore_mem_offset = ttmp12
+var s_restore_alloc_size = ttmp3
+var s_restore_tmp = ttmp2
+var s_restore_mem_offset_save = s_restore_tmp //no conflict
+
+var s_restore_m0 = s_restore_alloc_size //no conflict
+
+var s_restore_mode = ttmp7
+
+var s_restore_pc_lo = ttmp0
+var s_restore_pc_hi = ttmp1
+var s_restore_exec_lo = ttmp14
+var s_restore_exec_hi = ttmp15
+var s_restore_status = ttmp4
+var s_restore_trapsts = ttmp5
+var s_restore_xnack_mask_lo = xnack_mask_lo
+var s_restore_xnack_mask_hi = xnack_mask_hi
+var s_restore_buf_rsrc0 = ttmp8
+var s_restore_buf_rsrc1 = ttmp9
+var s_restore_buf_rsrc2 = ttmp10
+var s_restore_buf_rsrc3 = ttmp11
+var s_restore_ttmps_lo = s_restore_tmp //no conflict
+var s_restore_ttmps_hi = s_restore_alloc_size //no conflict
+
+/**************************************************************************/
+/* trap handler entry points */
+/**************************************************************************/
+/* Shader Main*/
+
+shader main
+ asic(GFX9)
+ type(CS)
+
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL)) //hack to use trap_id for determining save/restore
+ //FIXME VCCZ un-init assertion s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_and_b32 s_save_tmp, s_save_pc_hi, 0xffff0000 //change SCC
+ s_cmp_eq_u32 s_save_tmp, 0x007e0000 //Save: trap_id = 0x7e. Restore: trap_id = 0x7f.
+ s_cbranch_scc0 L_JUMP_TO_RESTORE //do not need to recover STATUS here since we are going to RESTORE
+ //FIXME s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status //need to recover STATUS since we are going to SAVE
+ s_branch L_SKIP_RESTORE //NOT restore, SAVE actually
+ else
+ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
+ end
+
+L_JUMP_TO_RESTORE:
+ s_branch L_RESTORE //restore
+
+L_SKIP_RESTORE:
+
+ s_getreg_b32 s_save_status, hwreg(HW_REG_STATUS) //save STATUS since we will change SCC
+ s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_SPI_PRIO_MASK //check whether this is for save
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_SAVECTX_MASK //check whether this is for save
+ s_cbranch_scc1 L_SAVE //this is the operation for save
+
+ // ********* Handle non-CWSR traps *******************
+if (!EMU_RUN_HACK)
+ // Illegal instruction is a non-maskable exception which blocks context save.
+ // Halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_ILLEGAL_INST_MASK
+ s_cbranch_scc1 L_HALT_WAVE
+
+ // If STATUS.MEM_VIOL is asserted then we cannot fetch from the TMA.
+ // Instead, halt the wavefront and return from the trap.
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK
+ s_cbranch_scc0 L_FETCH_2ND_TRAP
+
+L_HALT_WAVE:
+ // If STATUS.HALT is set then this fault must come from SQC instruction fetch.
+ // We cannot prevent further faults so just terminate the wavefront.
+ s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+ s_cbranch_scc0 L_NOT_ALREADY_HALTED
+ s_endpgm
+L_NOT_ALREADY_HALTED:
+ s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK
+
+ // If the PC points to S_ENDPGM then context save will fail if STATUS.HALT is set.
+ // Rewind the PC to prevent this from occurring. The debugger compensates for this.
+ s_sub_u32 ttmp0, ttmp0, 0x8
+ s_subb_u32 ttmp1, ttmp1, 0x0
+
+L_FETCH_2ND_TRAP:
+ // Preserve and clear scalar XNACK state before issuing scalar reads.
+ // Save IB_STS.FIRST_REPLAY[15] and IB_STS.RCNT[20:16] into unused space ttmp11[31:26].
+ s_getreg_b32 ttmp2, hwreg(HW_REG_IB_STS)
+ s_and_b32 ttmp3, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_lshl_b32 ttmp3, ttmp3, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_andn2_b32 ttmp11, ttmp11, TTMP11_SAVE_RCNT_FIRST_REPLAY_MASK
+ s_or_b32 ttmp11, ttmp11, ttmp3
+
+ s_andn2_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Read second-level TBA/TMA from first-level TMA and jump if available.
+ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
+ // ttmp12 holds SQ_WAVE_STATUS
+ s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO)
+ s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI)
+ s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8
+ s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA
+ s_waitcnt lgkmcnt(0)
+ s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA
+ s_waitcnt lgkmcnt(0)
+ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
+ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
+ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
+
+L_NO_NEXT_TRAP:
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCE_MASK // Check whether it is an exception
+ s_cbranch_scc1 L_EXCP_CASE // Exception, jump back to the shader program directly.
+ s_add_u32 ttmp0, ttmp0, 4 // S_TRAP case, add 4 to ttmp0
+ s_addc_u32 ttmp1, ttmp1, 0
+L_EXCP_CASE:
+ s_and_b32 ttmp1, ttmp1, 0xFFFF
+
+ // Restore SQ_WAVE_IB_STS.
+ s_lshr_b32 ttmp2, ttmp11, (TTMP11_SAVE_RCNT_FIRST_REPLAY_SHIFT - SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK
+ s_setreg_b32 hwreg(HW_REG_IB_STS), ttmp2
+
+ // Restore SQ_WAVE_STATUS.
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_status
+
+ s_rfe_b64 [ttmp0, ttmp1]
+end
+ // ********* End handling of non-CWSR traps *******************
+
+/**************************************************************************/
+/* save routine */
+/**************************************************************************/
+
+L_SAVE:
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+end
+
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+
+ s_mov_b32 s_save_tmp, 0 //clear saveCtx bit
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit
+
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_RCNT_SHIFT, SQ_WAVE_IB_STS_RCNT_SIZE) //save RCNT
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_RCNT_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT, SQ_WAVE_IB_STS_FIRST_REPLAY_SIZE) //save FIRST_REPLAY
+ s_lshl_b32 s_save_tmp, s_save_tmp, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
+ s_getreg_b32 s_save_tmp, hwreg(HW_REG_IB_STS) //clear RCNT and FIRST_REPLAY in IB_STS
+ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_IB_STS_RCNT_FIRST_REPLAY_MASK_NEG
+
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_save_tmp
+
+ /* inform SPI the readiness and wait for SPI's go signal */
+ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
+ s_mov_b32 s_save_exec_hi, exec_hi
+ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_sq_save_msg
+ s_waitcnt lgkmcnt(0)
+end
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_sendmsg sendmsg(MSG_SAVEWAVE) //send SPI a message and wait for SPI's write to EXEC
+ end
+
+ // Set SPI_PRIO=2 to avoid starving instruction fetch in the waves we're waiting for.
+ s_or_b32 s_save_tmp, s_save_status, (2 << SQ_WAVE_STATUS_SPI_PRIO_SHIFT)
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_save_tmp
+
+ L_SLEEP:
+ s_sleep 0x2 // sleep 1 (64clk) is not enough for 8 waves per SIMD, which will cause SQ hang, since the 7,8th wave could not get arbit to exec inst, while other waves are stuck into the sleep-loop and waiting for wrexec!=0
+
+ if (EMU_RUN_HACK)
+
+ else
+ s_cbranch_execz L_SLEEP
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_spi_wrexec
+ s_waitcnt lgkmcnt(0)
+end
+
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_save_tmp, v9, 0
+ s_lshr_b32 s_save_tmp, s_save_tmp, 6
+ s_mul_i32 s_save_tmp, s_save_tmp, WAVE_SPACE
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_SINGLE_WAVE))
+ s_add_i32 s_save_spi_init_lo, s_save_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_save_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_save_spi_init_hi, s_save_spi_init_hi, CTX_SAVE_CONTROL
+ else
+ end
+
+ // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_save_ttmps_lo)
+ get_sgpr_size_bytes(s_save_ttmps_hi)
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
+ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
+ s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0
+ s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF
+ s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1
+ ack_sqc_store_workaround()
+ s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1
+ ack_sqc_store_workaround()
+ s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1
+ ack_sqc_store_workaround()
+
+ /* setup Resource Contants */
+ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
+ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
+ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or ATC
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_save_tmp, s_save_tmp, (S_SAVE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, s_save_tmp //or MTYPE
+
+ //FIXME right now s_save_m0/s_save_mem_offset use tma_lo/tma_hi (might need to save them before using them?)
+ s_mov_b32 s_save_m0, m0 //save M0
+
+ /* global mem offset */
+ s_mov_b32 s_save_mem_offset, 0x0 //mem offset initial value = 0
+
+
+
+
+ /* save HW registers */
+ //////////////////////////////
+
+ L_SAVE_HWREG:
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+
+
+ s_mov_b32 s_save_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset) //M0
+
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_SAVE_FIRST_TIME))
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ end
+
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset) //PC
+ write_hwreg_to_mem(s_save_pc_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_exec_lo, s_save_buf_rsrc0, s_save_mem_offset) //EXEC
+ write_hwreg_to_mem(s_save_exec_hi, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_status, s_save_buf_rsrc0, s_save_mem_offset) //STATUS
+
+ //s_save_trapsts conflicts with s_save_alloc_size
+ s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS)
+ write_hwreg_to_mem(s_save_trapsts, s_save_buf_rsrc0, s_save_mem_offset) //TRAPSTS
+
+ write_hwreg_to_mem(xnack_mask_lo, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_LO
+ write_hwreg_to_mem(xnack_mask_hi, s_save_buf_rsrc0, s_save_mem_offset) //XNACK_MASK_HI
+
+ //use s_save_tmp would introduce conflict here between s_save_tmp and s_save_buf_rsrc2
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_MODE) //MODE
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK // extract fisrt wave bit
+ s_mov_b32 s_save_exec_hi, 0x0
+ s_or_b32 s_save_exec_hi, s_save_tmp, s_save_exec_hi // save first wave bit to s_save_exec_hi.bits[26]
+
+
+ /* save SGPRs */
+ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // backup s_save_buf_rsrc0,1 to s_save_pc_lo/hi, since write_16sgpr_to_mem function will change the rsrc0
+ //s_mov_b64 s_save_pc_lo, s_save_buf_rsrc0
+ s_mov_b64 s_save_xnack_mask_lo, s_save_buf_rsrc0
+ s_add_u32 s_save_buf_rsrc0, s_save_buf_rsrc0, s_save_mem_offset
+ s_addc_u32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0
+
+ s_mov_b32 m0, 0x0 //SGPR initial index value =0
+ s_nop 0x0 //Manually inserted wait states
+ L_SAVE_SGPR_LOOP:
+ // SGPR is allocated in 16 SGPR granularity
+ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
+ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
+ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
+ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
+ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
+ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
+ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
+ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
+
+ write_16sgpr_to_mem(s0, s_save_buf_rsrc0, s_save_mem_offset) //PV: the best performance should be using s_buffer_store_dwordx4
+ s_add_u32 m0, m0, 16 //next sgpr index
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_SGPR_LOOP //SGPR save is complete?
+ // restore s_save_buf_rsrc0,1
+ //s_mov_b64 s_save_buf_rsrc0, s_save_pc_lo
+ s_mov_b64 s_save_buf_rsrc0, s_save_xnack_mask_lo
+
+
+
+
+ /* save first 4 VGPR, then LDS save could use */
+ // each wave will alloc 4 vgprs at least...
+ /////////////////////////////////////////////////////////////////////////////////////
+
+ s_mov_b32 s_save_mem_offset, 0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+ s_mov_b32 xnack_mask_lo, 0x0
+ s_mov_b32 xnack_mask_hi, 0x0
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+end
+
+
+
+ /* save LDS */
+ //////////////////////////////
+
+ L_SAVE_LDS:
+
+ // Change EXEC to all threads...
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+ s_barrier //LDS is used? wait for other waves in the same TG
+ s_and_b32 s_save_tmp, s_save_exec_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK //exec is still used here
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+ // first wave do LDS save;
+
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_save_mem_offset)
+ get_sgpr_size_bytes(s_save_tmp)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+
+var LDS_DMA_ENABLE = 0
+var UNROLL = 0
+if UNROLL==0 && LDS_DMA_ENABLE==1
+ s_mov_b32 s3, 256*2
+ s_nop 0
+ s_nop 0
+ s_nop 0
+ L_SAVE_LDS_LOOP:
+ //TODO: looks the 2 buffer_store/load clause for s/r will hurt performance.???
+ if (SAVE_LDS) //SPI always alloc LDS space in 128DW granularity
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+ end
+
+ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3 //mem offset increased by 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_LDS_LOOP //LDS save is complete?
+
+elsif LDS_DMA_ENABLE==1 && UNROLL==1 // UNROOL , has ichace miss
+ // store from higest LDS address to lowest
+ s_mov_b32 s3, 256*2
+ s_sub_u32 m0, s_save_alloc_size, s3
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, m0
+ s_lshr_b32 s_save_alloc_size, s_save_alloc_size, 9 // how many 128 trunks...
+ s_sub_u32 s_save_alloc_size, 128, s_save_alloc_size // store from higheset addr to lowest
+ s_mul_i32 s_save_alloc_size, s_save_alloc_size, 6*4 // PC offset increment, each LDS save block cost 6*4 Bytes instruction
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 3*4 //2is the below 2 inst...//s_addc and s_setpc
+ s_nop 0
+ s_nop 0
+ s_nop 0 //pad 3 dw to let LDS_DMA align with 64Bytes
+ s_getpc_b64 s[0:1] // reuse s[0:1], since s[0:1] already saved
+ s_add_u32 s0, s0,s_save_alloc_size
+ s_addc_u32 s1, s1, 0
+ s_setpc_b64 s[0:1]
+
+
+ for var i =0; i< 128; i++
+ // be careful to make here a 64Byte aligned address, which could improve performance...
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:0 // first 64DW
+ buffer_store_lds_dword s_save_buf_rsrc0, s_save_mem_offset lds:1 offset:256 // second 64DW
+
+ if i!=127
+ s_sub_u32 m0, m0, s3 // use a sgpr to shrink 2DW-inst to 1DW inst to improve performance , i.e. pack more LDS_DMA inst to one Cacheline
+ s_sub_u32 s_save_mem_offset, s_save_mem_offset, s3
+ end
+ end
+
+else // BUFFER_STORE
+ v_mbcnt_lo_u32_b32 v2, 0xffffffff, 0x0
+ v_mbcnt_hi_u32_b32 v3, 0xffffffff, v2 // tid
+ v_mul_i32_i24 v2, v3, 8 // tid*8
+ v_mov_b32 v3, 256*2
+ s_mov_b32 m0, 0x10000
+ s_mov_b32 s0, s_save_buf_rsrc3
+ s_and_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0xFF7FFFFF // disable add_tid
+ s_or_b32 s_save_buf_rsrc3, s_save_buf_rsrc3, 0x58000 //DFMT
+
+L_SAVE_LDS_LOOP_VECTOR:
+ ds_read_b64 v[0:1], v2 //x =LDS[a], byte address
+ s_waitcnt lgkmcnt(0)
+ buffer_store_dwordx2 v[0:1], v2, s_save_buf_rsrc0, s_save_mem_offset offen:1 glc:1 slc:1
+// s_waitcnt vmcnt(0)
+// v_add_u32 v2, vcc[0:1], v2, v3
+ v_add_u32 v2, v2, v3
+ v_cmp_lt_u32 vcc[0:1], v2, s_save_alloc_size
+ s_cbranch_vccnz L_SAVE_LDS_LOOP_VECTOR
+
+ // restore rsrc3
+ s_mov_b32 s_save_buf_rsrc3, s0
+
+end
+
+L_SAVE_LDS_DONE:
+
+
+ /* save VGPRs - set the Rest VGPRs */
+ //////////////////////////////////////////////////////////////////////////////////////
+ L_SAVE_VGPR:
+ // VGPR SR memory offset: 0
+ // TODO rearrange the RSRC words to use swizzle for VGPR save...
+
+ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
+ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value) //FIXME for GFX, zero is possible
+ s_lshl_b32 s_save_buf_rsrc2, s_save_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_save_buf_rsrc2, s_save_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+
+ // VGPR Allocated in 4-GPR granularity
+
+if G8SR_VGPR_SR_IN_DWX4
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, G8SR_SAVE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, 4 // skip first 4 VGPRs
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_LOOP_END // no more vgprs
+
+ s_set_gpr_idx_on m0, 0x1 // This will change M0
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 // because above inst change m0
+L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 // v0 = v[0+m0]
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+
+
+ buffer_store_dwordx4 v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ s_add_u32 m0, m0, 4
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+L_SAVE_VGPR_LOOP_END:
+
+ s_and_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE // reset const stride to 4 bytes
+else
+ // VGPR store using dw burst
+ s_mov_b32 m0, 0x4 //VGPR initial index value =0
+ s_cmp_lt_u32 m0, s_save_alloc_size
+ s_cbranch_scc0 L_SAVE_VGPR_END
+
+
+ s_set_gpr_idx_on m0, 0x1 //M0[7:0] = M0[7:0] and M0[15:12] = 0x1
+ s_add_u32 s_save_alloc_size, s_save_alloc_size, 0x1000 //add 0x1000 since we compare m0 against it later
+
+ L_SAVE_VGPR_LOOP:
+ v_mov_b32 v0, v0 //v0 = v[0+m0]
+ v_mov_b32 v1, v1 //v0 = v[0+m0]
+ v_mov_b32 v2, v2 //v0 = v[0+m0]
+ v_mov_b32 v3, v3 //v0 = v[0+m0]
+
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_store_format_x v0, v0, s_save_buf_rsrc0, s_save_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1
+ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256
+ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*2
+ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset slc:1 glc:1 offset:256*3
+ end
+
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_SAVE_VGPR_LOOP //VGPR save is complete?
+ s_set_gpr_idx_off
+end
+
+L_SAVE_VGPR_END:
+
+
+
+
+
+
+ /* S_PGM_END_SAVED */ //FIXME graphics ONLY
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_SAVE_NORMAL_EXIT))
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_add_u32 s_save_pc_lo, s_save_pc_lo, 4 //pc[31:0]+4
+ s_addc_u32 s_save_pc_hi, s_save_pc_hi, 0x0 //carry bit over
+ s_rfe_b64 s_save_pc_lo //Return to the main shader program
+ else
+ end
+
+// Save Done timestamp
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_save_d
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_save_mem_offset)
+ s_add_u32 s_save_mem_offset, s_save_mem_offset, G8SR_DEBUG_TS_SAVE_D_OFFSET
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // Need reset rsrc2??
+ s_mov_b32 m0, s_save_mem_offset
+ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ s_buffer_store_dwordx2 s_g8sr_ts_save_d, s_save_buf_rsrc0, m0 glc:1
+end
+
+
+ s_branch L_END_PGM
+
+
+
+/**************************************************************************/
+/* restore routine */
+/**************************************************************************/
+
+L_RESTORE:
+ /* Setup Resource Contants */
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ //calculate wd_addr using absolute thread id
+ v_readlane_b32 s_restore_tmp, v9, 0
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, 6
+ s_mul_i32 s_restore_tmp, s_restore_tmp, WAVE_SPACE
+ s_add_i32 s_restore_spi_init_lo, s_restore_tmp, WG_BASE_ADDR_LO
+ s_mov_b32 s_restore_spi_init_hi, WG_BASE_ADDR_HI
+ s_and_b32 s_restore_spi_init_hi, s_restore_spi_init_hi, CTX_RESTORE_CONTROL
+ else
+ end
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_s
+ s_waitcnt lgkmcnt(0) //FIXME, will cause xnack??
+ // tma_lo/hi are sgpr 110, 111, which will not used for 112 SGPR allocated case...
+ s_mov_b32 s_restore_pc_lo, s_g8sr_ts_restore_s[0]
+ s_mov_b32 s_restore_pc_hi, s_g8sr_ts_restore_s[1] //backup ts to ttmp0/1, sicne exec will be finally restored..
+end
+
+
+
+ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
+ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_ATC_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_ATC_SHIFT-SQ_BUF_RSRC_WORD1_ATC_SHIFT) //get ATC bit into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or ATC
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_MTYPE_MASK
+ s_lshr_b32 s_restore_tmp, s_restore_tmp, (S_RESTORE_SPI_INIT_MTYPE_SHIFT-SQ_BUF_RSRC_WORD3_MTYPE_SHIFT) //get MTYPE bits into position
+ s_or_b32 s_restore_buf_rsrc3, s_restore_buf_rsrc3, s_restore_tmp //or MTYPE
+
+ /* global mem offset */
+// s_mov_b32 s_restore_mem_offset, 0x0 //mem offset initial value = 0
+
+ /* the first wave in the threadgroup */
+ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_RESTORE_VGPR
+
+ /* restore LDS */
+ //////////////////////////////
+ L_RESTORE_LDS:
+
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) //lds_size
+ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 6 //LDS size in dwords = lds_size * 64dw
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //LDS size in bytes
+ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
+
+ // LDS at offset: size(VGPR)+SIZE(SGPR)+SIZE(HWREG)
+ //
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes() //FIXME, Check if offset overflow???
+
+
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+ s_mov_b32 m0, 0x0 //lds_offset initial value = 0
+
+ L_RESTORE_LDS_LOOP:
+ if (SAVE_LDS)
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 // first 64DW
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset lds:1 offset:256 // second 64DW
+ end
+ s_add_u32 m0, m0, 256*2 // 128 DW
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*2 //mem offset increased by 128DW
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_LDS_LOOP //LDS restore is complete?
+
+
+ /* restore VGPRs */
+ //////////////////////////////
+ L_RESTORE_VGPR:
+ // VGPR SR memory offset : 0
+ s_mov_b32 s_restore_mem_offset, 0x0
+ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on //be consistent with SAVE although can be moved ahead
+ s_mov_b32 exec_hi, 0xFFFFFFFF
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads*4)
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+if G8SR_VGPR_SR_IN_DWX4
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+
+ // the const stride for DWx4 is 4*4 bytes
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, G8SR_RESTORE_BUF_RSRC_WORD1_STRIDE_DWx4 // const stride to 4*4 bytes
+
+ s_mov_b32 m0, s_restore_alloc_size
+ s_set_gpr_idx_on m0, 0x8 // Note.. This will change m0
+
+L_RESTORE_VGPR_LOOP:
+ buffer_load_dwordx4 v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ s_waitcnt vmcnt(0)
+ s_sub_u32 m0, m0, 4
+ v_mov_b32 v0, v0 // v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_cmp_eq_u32 m0, 0x8000
+ s_cbranch_scc0 L_RESTORE_VGPR_LOOP
+ s_set_gpr_idx_off
+
+ s_and_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, 0x0000FFFF // reset const stride to 0
+ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE // const stride to 4*4 bytes
+
+else
+ // VGPR load using dw burst
+ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
+ s_mov_b32 m0, 4 //VGPR initial index value = 1
+ s_set_gpr_idx_on m0, 0x8 //M0[7:0] = M0[7:0] and M0[15:12] = 0x8
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 0x8000 //add 0x8000 since we compare m0 against it later
+
+ L_RESTORE_VGPR_LOOP:
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset slc:1 glc:1 offset:256*3
+ end
+ s_waitcnt vmcnt(0) //ensure data ready
+ v_mov_b32 v0, v0 //v[0+m0] = v0
+ v_mov_b32 v1, v1
+ v_mov_b32 v2, v2
+ v_mov_b32 v3, v3
+ s_add_u32 m0, m0, 4 //next vgpr index
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
+ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc1 L_RESTORE_VGPR_LOOP //VGPR restore (except v0) is complete?
+ s_set_gpr_idx_off
+ /* VGPR restore on v0 */
+ if(USE_MTBUF_INSTEAD_OF_MUBUF)
+ tbuffer_load_format_x v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save format:BUF_NUM_FORMAT_FLOAT format: BUF_DATA_FORMAT_32 slc:1 glc:1
+ else
+ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1
+ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256
+ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*2
+ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save slc:1 glc:1 offset:256*3
+ end
+
+end
+
+ /* restore SGPRs */
+ //////////////////////////////
+
+ // SGPR SR memory offset : size(VGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 16*4 // restore SGPR from S[n] to S[0], by 16 sgprs group
+ // TODO, change RSRC word to rearrange memory layout for SGPRS
+
+ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
+ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 4 //Number of SGPRs = (sgpr_size + 1) * 16 (non-zero value)
+
+ if (SGPR_SAVE_USE_SQC)
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 2 //NUM_RECORDS in bytes
+ else
+ s_lshl_b32 s_restore_buf_rsrc2, s_restore_alloc_size, 8 //NUM_RECORDS in bytes (64 threads)
+ end
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ s_mov_b32 m0, s_restore_alloc_size
+
+ L_RESTORE_SGPR_LOOP:
+ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset) //PV: further performance improvement can be made
+ s_waitcnt lgkmcnt(0) //ensure data ready
+
+ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
+ s_nop 0 // hazard SALU M0=> S_MOVREL
+
+ s_movreld_b64 s0, s0 //s[0+m0] = s0
+ s_movreld_b64 s2, s2
+ s_movreld_b64 s4, s4
+ s_movreld_b64 s6, s6
+ s_movreld_b64 s8, s8
+ s_movreld_b64 s10, s10
+ s_movreld_b64 s12, s12
+ s_movreld_b64 s14, s14
+
+ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_restore_alloc_size) ? 1 : 0
+ s_cbranch_scc0 L_RESTORE_SGPR_LOOP //SGPR restore (except s0) is complete?
+
+ /* restore HW registers */
+ //////////////////////////////
+ L_RESTORE_HWREG:
+
+
+if G8SR_DEBUG_TIMESTAMP
+ s_mov_b32 s_g8sr_ts_restore_s[0], s_restore_pc_lo
+ s_mov_b32 s_g8sr_ts_restore_s[1], s_restore_pc_hi
+end
+
+ // HWREG SR memory offset : size(VGPR)+size(SGPR)
+ get_vgpr_size_bytes(s_restore_mem_offset)
+ get_sgpr_size_bytes(s_restore_tmp)
+ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
+
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x4 //NUM_RECORDS in bytes
+ if (SWIZZLE_EN)
+ s_add_u32 s_restore_buf_rsrc2, s_restore_buf_rsrc2, 0x0 //FIXME need to use swizzle to enable bounds checking?
+ else
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+ end
+
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset) //M0
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //PC
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //EXEC
+ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_status, s_restore_buf_rsrc0, s_restore_mem_offset) //STATUS
+ read_hwreg_from_mem(s_restore_trapsts, s_restore_buf_rsrc0, s_restore_mem_offset) //TRAPSTS
+ read_hwreg_from_mem(xnack_mask_lo, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_LO
+ read_hwreg_from_mem(xnack_mask_hi, s_restore_buf_rsrc0, s_restore_mem_offset) //XNACK_MASK_HI
+ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset) //MODE
+
+ s_waitcnt lgkmcnt(0) //from now on, it is safe to restore STATUS and IB_STS
+
+ //for normal save & restore, the saved PC points to the next inst to execute, no adjustment needs to be made, otherwise:
+ if ((EMU_RUN_HACK) && (!EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 8 //pc[31:0]+8 //two back-to-back s_trap are used (first for save and second for restore)
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+ if ((EMU_RUN_HACK) && (EMU_RUN_HACK_RESTORE_NORMAL))
+ s_add_u32 s_restore_pc_lo, s_restore_pc_lo, 4 //pc[31:0]+4 // save is hack through s_trap but restore is normal
+ s_addc_u32 s_restore_pc_hi, s_restore_pc_hi, 0x0 //carry bit over
+ end
+
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_PRE_SAVECTX_MASK, s_restore_trapsts
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_PRE_SAVECTX_SIZE), s_restore_m0
+ s_and_b32 s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_MASK, s_restore_trapsts
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT
+ s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_POST_SAVECTX_SHIFT, SQ_WAVE_TRAPSTS_POST_SAVECTX_SIZE), s_restore_m0
+ //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore
+ s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode
+
+ // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic
+ // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40
+ get_vgpr_size_bytes(s_restore_ttmps_lo)
+ get_sgpr_size_bytes(s_restore_ttmps_hi)
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
+ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
+ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
+ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
+ s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1
+ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1
+ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1
+ s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1
+ s_waitcnt lgkmcnt(0)
+
+ //reuse s_restore_m0 as a temp register
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_RCNT_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_RCNT_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_RCNT_SHIFT
+ s_mov_b32 s_restore_tmp, 0x0 //IB_STS is zero
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_pc_hi, S_SAVE_PC_HI_FIRST_REPLAY_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, S_SAVE_PC_HI_FIRST_REPLAY_SHIFT
+ s_lshl_b32 s_restore_m0, s_restore_m0, SQ_WAVE_IB_STS_FIRST_REPLAY_SHIFT
+ s_or_b32 s_restore_tmp, s_restore_tmp, s_restore_m0
+ s_and_b32 s_restore_m0, s_restore_status, SQ_WAVE_STATUS_INST_ATC_MASK
+ s_lshr_b32 s_restore_m0, s_restore_m0, SQ_WAVE_STATUS_INST_ATC_SHIFT
+ s_setreg_b32 hwreg(HW_REG_IB_STS), s_restore_tmp
+
+ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+ s_setreg_b32 hwreg(HW_REG_STATUS), s_restore_status // SCC is included, which is changed by previous salu
+
+ s_barrier //barrier to ensure the readiness of LDS before access attempts from any other wave in the same TG //FIXME not performance-optimal at this time
+
+if G8SR_DEBUG_TIMESTAMP
+ s_memrealtime s_g8sr_ts_restore_d
+ s_waitcnt lgkmcnt(0)
+end
+
+// s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+ s_rfe_restore_b64 s_restore_pc_lo, s_restore_m0 // s_restore_m0[0] is used to set STATUS.inst_atc
+
+
+/**************************************************************************/
+/* the END */
+/**************************************************************************/
+L_END_PGM:
+ s_endpgm
+
+end
+
+
+/**************************************************************************/
+/* the helper functions */
+/**************************************************************************/
+
+//Only for save hwreg to mem
+function write_hwreg_to_mem(s, s_rsrc, s_mem_offset)
+ s_mov_b32 exec_lo, m0 //assuming exec_lo is not needed anymore from this point on
+ s_mov_b32 m0, s_mem_offset
+ s_buffer_store_dword s, s_rsrc, m0 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+ s_mov_b32 m0, exec_lo
+end
+
+
+// HWREG are saved before SGPRs, so all HWREG could be use.
+function write_16sgpr_to_mem(s, s_rsrc, s_mem_offset)
+
+ s_buffer_store_dwordx4 s[0], s_rsrc, 0 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[4], s_rsrc, 16 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[8], s_rsrc, 32 glc:1
+ ack_sqc_store_workaround()
+ s_buffer_store_dwordx4 s[12], s_rsrc, 48 glc:1
+ ack_sqc_store_workaround()
+ s_add_u32 s_rsrc[0], s_rsrc[0], 4*16
+ s_addc_u32 s_rsrc[1], s_rsrc[1], 0x0 // +scc
+end
+
+
+function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dword s, s_rsrc, s_mem_offset glc:1
+ s_add_u32 s_mem_offset, s_mem_offset, 4
+end
+
+function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
+ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset glc:1
+ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
+end
+
+
+
+function get_lds_size_bytes(s_lds_size_byte)
+ // SQ LDS granularity is 64DW, while PGM_RSRC2.lds_size is in granularity 128DW
+ s_getreg_b32 s_lds_size_byte, hwreg(HW_REG_LDS_ALLOC, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT, SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE) // lds_size
+ s_lshl_b32 s_lds_size_byte, s_lds_size_byte, 8 //LDS size in dwords = lds_size * 64 *4Bytes // granularity 64DW
+end
+
+function get_vgpr_size_bytes(s_vgpr_size_byte)
+ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE) //vpgr_size
+ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
+ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value) //FIXME for GFX, zero is possible
+end
+
+function get_sgpr_size_bytes(s_sgpr_size_byte)
+ s_getreg_b32 s_sgpr_size_byte, hwreg(HW_REG_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_SGPR_SIZE_SIZE) //spgr_size
+ s_add_u32 s_sgpr_size_byte, s_sgpr_size_byte, 1
+ s_lshl_b32 s_sgpr_size_byte, s_sgpr_size_byte, 6 //Number of SGPRs = (sgpr_size + 1) * 16 *4 (non-zero value)
+end
+
+function get_hwreg_size_bytes
+ return 128 //HWREG size 128 bytes
+end
+
+function ack_sqc_store_workaround
+ if ACK_SQC_STORE
+ s_waitcnt lgkmcnt(0)
+ end
+end
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 59808a3..f64c555 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -233,7 +233,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
pr_debug("Queue Size: 0x%llX, %u\n",
q_properties->queue_size, args->ring_size);
- pr_debug("Queue r/w Pointers: %p, %p\n",
+ pr_debug("Queue r/w Pointers: %px, %px\n",
q_properties->read_ptr,
q_properties->write_ptr);
@@ -292,8 +292,16 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
/* Return gpu_id as doorbell offset for mmap usage */
- args->doorbell_offset = (KFD_MMAP_DOORBELL_MASK | args->gpu_id);
+ args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
+ args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
+ if (KFD_IS_SOC15(dev->device_info->asic_family))
+ /* On SOC15 ASICs, doorbell allocation must be
+ * per-device, and independent from the per-process
+ * queue_id. Return the doorbell offset within the
+ * doorbell aperture to user mode.
+ */
+ args->doorbell_offset |= q_properties.doorbell_off;
mutex_unlock(&p->mutex);
@@ -1296,8 +1304,8 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1405,8 +1413,8 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
return -EINVAL;
}
- devices_arr = kmalloc(args->n_devices * sizeof(*devices_arr),
- GFP_KERNEL);
+ devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
+ GFP_KERNEL);
if (!devices_arr)
return -ENOMEM;
@@ -1645,23 +1653,33 @@ err_i1:
static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct kfd_process *process;
+ struct kfd_dev *dev = NULL;
+ unsigned long vm_pgoff;
+ unsigned int gpu_id;
process = kfd_get_process(current);
if (IS_ERR(process))
return PTR_ERR(process);
- if ((vma->vm_pgoff & KFD_MMAP_DOORBELL_MASK) ==
- KFD_MMAP_DOORBELL_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_DOORBELL_MASK;
- return kfd_doorbell_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_EVENTS_MASK) ==
- KFD_MMAP_EVENTS_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_EVENTS_MASK;
+ vm_pgoff = vma->vm_pgoff;
+ vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
+ gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
+ if (gpu_id)
+ dev = kfd_device_by_id(gpu_id);
+
+ switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
+ case KFD_MMAP_TYPE_DOORBELL:
+ if (!dev)
+ return -ENODEV;
+ return kfd_doorbell_mmap(dev, process, vma);
+
+ case KFD_MMAP_TYPE_EVENTS:
return kfd_event_mmap(process, vma);
- } else if ((vma->vm_pgoff & KFD_MMAP_RESERVED_MEM_MASK) ==
- KFD_MMAP_RESERVED_MEM_MASK) {
- vma->vm_pgoff = vma->vm_pgoff ^ KFD_MMAP_RESERVED_MEM_MASK;
- return kfd_reserved_mem_mmap(process, vma);
+
+ case KFD_MMAP_TYPE_RESERVED_MEM:
+ if (!dev)
+ return -ENODEV;
+ return kfd_reserved_mem_mmap(dev, process, vma);
}
return -EFAULT;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 4f126ef..296b3f2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -132,6 +132,9 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = {
#define fiji_cache_info carrizo_cache_info
#define polaris10_cache_info carrizo_cache_info
#define polaris11_cache_info carrizo_cache_info
+/* TODO - check & update Vega10 cache details */
+#define vega10_cache_info carrizo_cache_info
+#define raven_cache_info carrizo_cache_info
static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
struct crat_subtype_computeunit *cu)
@@ -603,6 +606,14 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
pcache_info = polaris11_cache_info;
num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
break;
+ case CHIP_VEGA10:
+ pcache_info = vega10_cache_info;
+ num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
+ break;
+ case CHIP_RAVEN:
+ pcache_info = raven_cache_info;
+ num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
default:
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 334669996..7ee6cec 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -20,16 +20,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2)
-#include <linux/amd-iommu.h>
-#endif
#include <linux/bsearch.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_pm4_headers_vi.h"
-#include "cwsr_trap_handler_gfx8.asm"
+#include "cwsr_trap_handler.h"
#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
@@ -41,6 +38,7 @@ static const struct kfd_device_info kaveri_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -55,6 +53,7 @@ static const struct kfd_device_info carrizo_device_info = {
.max_pasid_bits = 16,
/* max num of queues for CZ.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -70,6 +69,7 @@ static const struct kfd_device_info hawaii_device_info = {
.max_pasid_bits = 16,
/* max num of queues for KV.TODO should be a dynamic value */
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -83,6 +83,7 @@ static const struct kfd_device_info tonga_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -96,6 +97,7 @@ static const struct kfd_device_info tonga_vf_device_info = {
.asic_family = CHIP_TONGA,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -109,6 +111,7 @@ static const struct kfd_device_info fiji_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -122,6 +125,7 @@ static const struct kfd_device_info fiji_vf_device_info = {
.asic_family = CHIP_FIJI,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -136,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -149,6 +154,7 @@ static const struct kfd_device_info polaris10_vf_device_info = {
.asic_family = CHIP_POLARIS10,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -162,6 +168,7 @@ static const struct kfd_device_info polaris11_device_info = {
.asic_family = CHIP_POLARIS11,
.max_pasid_bits = 16,
.max_no_of_hqd = 24,
+ .doorbell_size = 4,
.ih_ring_entry_size = 4 * sizeof(uint32_t),
.event_interrupt_class = &event_interrupt_class_cik,
.num_of_watch_points = 4,
@@ -171,6 +178,34 @@ static const struct kfd_device_info polaris11_device_info = {
.needs_pci_atomics = true,
};
+static const struct kfd_device_info vega10_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
+static const struct kfd_device_info vega10_vf_device_info = {
+ .asic_family = CHIP_VEGA10,
+ .max_pasid_bits = 16,
+ .max_no_of_hqd = 24,
+ .doorbell_size = 8,
+ .ih_ring_entry_size = 8 * sizeof(uint32_t),
+ .event_interrupt_class = &event_interrupt_class_v9,
+ .num_of_watch_points = 4,
+ .mqd_size_aligned = MQD_SIZE_ALIGNED,
+ .supports_cwsr = true,
+ .needs_iommu_device = false,
+ .needs_pci_atomics = false,
+};
+
struct kfd_deviceid {
unsigned short did;
@@ -250,6 +285,15 @@ static const struct kfd_deviceid supported_devices[] = {
{ 0x67EB, &polaris11_device_info }, /* Polaris11 */
{ 0x67EF, &polaris11_device_info }, /* Polaris11 */
{ 0x67FF, &polaris11_device_info }, /* Polaris11 */
+ { 0x6860, &vega10_device_info }, /* Vega10 */
+ { 0x6861, &vega10_device_info }, /* Vega10 */
+ { 0x6862, &vega10_device_info }, /* Vega10 */
+ { 0x6863, &vega10_device_info }, /* Vega10 */
+ { 0x6864, &vega10_device_info }, /* Vega10 */
+ { 0x6867, &vega10_device_info }, /* Vega10 */
+ { 0x6868, &vega10_device_info }, /* Vega10 */
+ { 0x686C, &vega10_vf_device_info }, /* Vega10 vf*/
+ { 0x687F, &vega10_device_info }, /* Vega10 */
};
static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
@@ -279,7 +323,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
struct pci_dev *pdev, const struct kfd2kgd_calls *f2g)
{
struct kfd_dev *kfd;
-
+ int ret;
const struct kfd_device_info *device_info =
lookup_device_info(pdev->device);
@@ -288,19 +332,18 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
return NULL;
}
- if (device_info->needs_pci_atomics) {
- /* Allow BIF to recode atomics to PCIe 3.0
- * AtomicOps. 32 and 64-bit requests are possible and
- * must be supported.
- */
- if (pci_enable_atomic_ops_to_root(pdev,
- PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
- PCI_EXP_DEVCAP2_ATOMIC_COMP64) < 0) {
- dev_info(kfd_device,
- "skipped device %x:%x, PCI rejects atomics",
- pdev->vendor, pdev->device);
- return NULL;
- }
+ /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
+ * 32 and 64-bit requests are possible and must be
+ * supported.
+ */
+ ret = pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+ PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+ if (device_info->needs_pci_atomics && ret < 0) {
+ dev_info(kfd_device,
+ "skipped device %x:%x, PCI rejects atomics\n",
+ pdev->vendor, pdev->device);
+ return NULL;
}
kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
@@ -323,10 +366,16 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
static void kfd_cwsr_init(struct kfd_dev *kfd)
{
if (cwsr_enable && kfd->device_info->supports_cwsr) {
- BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ if (kfd->device_info->asic_family < CHIP_VEGA10) {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx8_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
+ } else {
+ BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
+ kfd->cwsr_isa = cwsr_trap_gfx9_hex;
+ kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
+ }
- kfd->cwsr_isa = cwsr_trap_gfx8_hex;
- kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
kfd->cwsr_enabled = true;
}
}
@@ -541,6 +590,44 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
spin_unlock(&kfd->interrupt_lock);
}
+int kgd2kfd_quiesce_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_evict_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
+int kgd2kfd_resume_mm(struct mm_struct *mm)
+{
+ struct kfd_process *p;
+ int r;
+
+ /* Because we are called from arbitrary context (workqueue) as opposed
+ * to process context, kfd_process could attempt to exit while we are
+ * running so the lookup function increments the process ref count.
+ */
+ p = kfd_lookup_process_by_mm(mm);
+ if (!p)
+ return -ESRCH;
+
+ r = kfd_process_restore_queues(p);
+
+ kfd_unref_process(p);
+ return r;
+}
+
/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
* prepare for safe eviction of KFD BOs that belong to the specified
* process.
@@ -652,7 +739,7 @@ int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
return -ENOMEM;
- *mem_obj = kmalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
+ *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
if ((*mem_obj) == NULL)
return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d55d29d..668ad07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -110,6 +110,57 @@ void program_sh_mem_settings(struct device_queue_manager *dqm,
qpd->sh_mem_bases);
}
+static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
+{
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
+ /* On pre-SOC15 chips we need to use the queue ID to
+ * preserve the user mode ABI.
+ */
+ q->doorbell_id = q->properties.queue_id;
+ } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
+ /* For SDMA queues on SOC15, use static doorbell
+ * assignments based on the engine and queue.
+ */
+ q->doorbell_id = dev->shared_resources.sdma_doorbell
+ [q->properties.sdma_engine_id]
+ [q->properties.sdma_queue_id];
+ } else {
+ /* For CP queues on SOC15 reserve a free doorbell ID */
+ unsigned int found;
+
+ found = find_first_zero_bit(qpd->doorbell_bitmap,
+ KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
+ if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
+ pr_debug("No doorbells available");
+ return -EBUSY;
+ }
+ set_bit(found, qpd->doorbell_bitmap);
+ q->doorbell_id = found;
+ }
+
+ q->properties.doorbell_off =
+ kfd_doorbell_id_to_offset(dev, q->process,
+ q->doorbell_id);
+
+ return 0;
+}
+
+static void deallocate_doorbell(struct qcm_process_device *qpd,
+ struct queue *q)
+{
+ unsigned int old;
+ struct kfd_dev *dev = qpd->dqm->dev;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
+ q->properties.type == KFD_QUEUE_TYPE_SDMA)
+ return;
+
+ old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
+ WARN_ON(!old);
+}
+
static int allocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
@@ -145,15 +196,19 @@ static int allocate_vmid(struct device_queue_manager *dqm,
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
struct qcm_process_device *qpd)
{
- uint32_t len;
+ const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
+ int ret;
if (!qpd->ib_kaddr)
return -ENOMEM;
- len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+ if (ret)
+ return ret;
return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
- qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
+ pmf->release_mem_size / sizeof(uint32_t));
}
static void deallocate_vmid(struct device_queue_manager *dqm,
@@ -301,10 +356,14 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
if (retval)
return retval;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_hqd;
+
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_hqd;
+ goto out_deallocate_doorbell;
pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
q->pipe, q->queue);
@@ -324,6 +383,8 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_hqd:
deallocate_hqd(dqm, q);
@@ -357,6 +418,8 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
}
dqm->total_queue_count--;
+ deallocate_doorbell(qpd, q);
+
retval = mqd->destroy_mqd(mqd, q->mqd,
KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
KFD_UNMAP_LATENCY_MS,
@@ -861,6 +924,10 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
q->properties.sdma_queue_id = q->sdma_id / CIK_SDMA_QUEUES_PER_ENGINE;
q->properties.sdma_engine_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
pr_debug("SDMA id is: %d\n", q->sdma_id);
pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
@@ -869,7 +936,7 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
retval = mqd->load_mqd(mqd, q->mqd, 0, 0, &q->properties, NULL);
if (retval)
@@ -879,6 +946,8 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
out_uninit_mqd:
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1070,12 +1139,17 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
q->properties.sdma_engine_id =
q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
}
+
+ retval = allocate_doorbell(qpd, q);
+ if (retval)
+ goto out_deallocate_sdma_queue;
+
mqd = dqm->ops.get_mqd_manager(dqm,
get_mqd_type_from_queue_type(q->properties.type));
if (!mqd) {
retval = -ENOMEM;
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
}
/*
* Eviction state logic: we only mark active queues as evicted
@@ -1093,7 +1167,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
&q->gart_mqd_addr, &q->properties);
if (retval)
- goto out_deallocate_sdma_queue;
+ goto out_deallocate_doorbell;
list_add(&q->list, &qpd->queues_list);
qpd->queue_count++;
@@ -1117,6 +1191,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
mutex_unlock(&dqm->lock);
return retval;
+out_deallocate_doorbell:
+ deallocate_doorbell(qpd, q);
out_deallocate_sdma_queue:
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1257,6 +1333,8 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,
goto failed;
}
+ deallocate_doorbell(qpd, q);
+
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
dqm->sdma_queue_count--;
deallocate_sdma_queue(dqm, q->sdma_id);
@@ -1308,7 +1386,10 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm,
void __user *alternate_aperture_base,
uint64_t alternate_aperture_size)
{
- bool retval;
+ bool retval = true;
+
+ if (!dqm->asic_ops.set_cache_memory_policy)
+ return retval;
mutex_lock(&dqm->lock);
@@ -1577,6 +1658,11 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
case CHIP_POLARIS11:
device_queue_manager_init_vi_tonga(&dqm->asic_ops);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ device_queue_manager_init_v9(&dqm->asic_ops);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
@@ -1627,6 +1713,18 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
int pipe, queue;
int r = 0;
+ r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
+ KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
+ if (!r) {
+ seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n",
+ KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
+ KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
+ KFD_CIK_HIQ_QUEUE);
+ seq_reg_dump(m, dump, n_regs);
+
+ kfree(dump);
+ }
+
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 412beff..59a6b19 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -200,6 +200,8 @@ void device_queue_manager_init_vi(
struct device_queue_manager_asic_ops *asic_ops);
void device_queue_manager_init_vi_tonga(
struct device_queue_manager_asic_ops *asic_ops);
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
unsigned int get_queues_num(struct device_queue_manager *dqm);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
new file mode 100644
index 0000000..79e5bcf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_device_queue_manager.h"
+#include "vega10_enum.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd);
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd);
+
+void device_queue_manager_init_v9(
+ struct device_queue_manager_asic_ops *asic_ops)
+{
+ asic_ops->update_qpd = update_qpd_v9;
+ asic_ops->init_sdma_vm = init_sdma_vm_v9;
+}
+
+static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd)
+{
+ uint32_t shared_base = pdd->lds_base >> 48;
+ uint32_t private_base = pdd->scratch_base >> 48;
+
+ return (shared_base << SH_MEM_BASES__SHARED_BASE__SHIFT) |
+ private_base;
+}
+
+static int update_qpd_v9(struct device_queue_manager *dqm,
+ struct qcm_process_device *qpd)
+{
+ struct kfd_process_device *pdd;
+
+ pdd = qpd_to_pdd(qpd);
+
+ /* check if sh_mem_config register already configured */
+ if (qpd->sh_mem_config == 0) {
+ qpd->sh_mem_config =
+ SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
+ SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
+ if (vega10_noretry &&
+ !dqm->dev->device_info->needs_iommu_device)
+ qpd->sh_mem_config |=
+ 1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
+
+ qpd->sh_mem_ape1_limit = 0;
+ qpd->sh_mem_ape1_base = 0;
+ }
+
+ qpd->sh_mem_bases = compute_sh_mem_bases_64bit(pdd);
+
+ pr_debug("sh_mem_bases 0x%X\n", qpd->sh_mem_bases);
+
+ return 0;
+}
+
+static void init_sdma_vm_v9(struct device_queue_manager *dqm, struct queue *q,
+ struct qcm_process_device *qpd)
+{
+ /* Not needed on SDMAv4 any more */
+ q->properties.sdma_vm_addr = 0;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebb4da14..c3744d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -33,7 +33,6 @@
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
-#define KFD_SIZE_OF_DOORBELL_IN_BYTES 4
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
@@ -50,9 +49,9 @@ static unsigned int max_doorbell_slices;
*/
/* # of doorbell bytes allocated for each process. */
-static inline size_t doorbell_process_allocation(void)
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd)
{
- return roundup(KFD_SIZE_OF_DOORBELL_IN_BYTES *
+ return roundup(kfd->device_info->doorbell_size *
KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
PAGE_SIZE);
}
@@ -72,16 +71,16 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
doorbell_start_offset =
roundup(kfd->shared_resources.doorbell_start_offset,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
doorbell_aperture_size =
rounddown(kfd->shared_resources.doorbell_aperture_size,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (doorbell_aperture_size > doorbell_start_offset)
doorbell_process_limit =
(doorbell_aperture_size - doorbell_start_offset) /
- doorbell_process_allocation();
+ kfd_doorbell_process_slice(kfd);
else
return -ENOSPC;
@@ -95,7 +94,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);
kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(kfd));
if (!kfd->doorbell_kernel_ptr)
return -ENOMEM;
@@ -127,21 +126,16 @@ void kfd_doorbell_fini(struct kfd_dev *kfd)
iounmap(kfd->doorbell_kernel_ptr);
}
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma)
{
phys_addr_t address;
- struct kfd_dev *dev;
/*
* For simplicitly we only allow mapping of the entire doorbell
* allocation of a single device & process.
*/
- if (vma->vm_end - vma->vm_start != doorbell_process_allocation())
- return -EINVAL;
-
- /* Find kfd device according to gpu id */
- dev = kfd_device_by_id(vma->vm_pgoff);
- if (!dev)
+ if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
@@ -158,19 +152,19 @@ int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma)
" vm_flags == 0x%04lX\n"
" size == 0x%04lX\n",
(unsigned long long) vma->vm_start, address, vma->vm_flags,
- doorbell_process_allocation());
+ kfd_doorbell_process_slice(dev));
return io_remap_pfn_range(vma,
vma->vm_start,
address >> PAGE_SHIFT,
- doorbell_process_allocation(),
+ kfd_doorbell_process_slice(dev),
vma->vm_page_prot);
}
/* get kernel iomem pointer for a doorbell */
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off)
{
u32 inx;
@@ -185,6 +179,8 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
return NULL;
+ inx *= kfd->device_info->doorbell_size / sizeof(u32);
+
/*
* Calculating the kernel doorbell offset using the first
* doorbell page.
@@ -210,7 +206,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr)
mutex_unlock(&kfd->doorbell_mutex);
}
-inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
+void write_kernel_doorbell(void __iomem *db, u32 value)
{
if (db) {
writel(value, db);
@@ -218,30 +214,37 @@ inline void write_kernel_doorbell(u32 __iomem *db, u32 value)
}
}
-/*
- * queue_ids are in the range [0,MAX_PROCESS_QUEUES) and are mapped 1:1
- * to doorbells with the process's doorbell page
- */
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell64(void __iomem *db, u64 value)
+{
+ if (db) {
+ WARN(((unsigned long)db & 7) != 0,
+ "Unaligned 64-bit doorbell");
+ writeq(value, (u64 __iomem *)db);
+ pr_debug("writing %llu to doorbell address %p\n", value, db);
+ }
+}
+
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id)
+ unsigned int doorbell_id)
{
/*
* doorbell_id_offset accounts for doorbells taken by KGD.
- * index * doorbell_process_allocation/sizeof(u32) adjusts to
- * the process's doorbells.
+ * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
+ * the process's doorbells. The offset returned is in dword
+ * units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_id_offset +
process->doorbell_index
- * doorbell_process_allocation() / sizeof(u32) +
- queue_id;
+ * kfd_doorbell_process_slice(kfd) / sizeof(u32) +
+ doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
{
uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size -
kfd->shared_resources.doorbell_start_offset) /
- doorbell_process_allocation() + 1;
+ kfd_doorbell_process_slice(kfd) + 1;
return num_of_elems;
@@ -251,7 +254,7 @@ phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
{
return dev->doorbell_base +
- process->doorbell_index * doorbell_process_allocation();
+ process->doorbell_index * kfd_doorbell_process_slice(dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 4890a90..5562e94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -345,7 +345,7 @@ int kfd_event_create(struct file *devkfd, struct kfd_process *p,
case KFD_EVENT_TYPE_DEBUG:
ret = create_signal_event(devkfd, p, ev);
if (!ret) {
- *event_page_offset = KFD_MMAP_EVENTS_MASK;
+ *event_page_offset = KFD_MMAP_TYPE_EVENTS;
*event_page_offset <<= PAGE_SHIFT;
*event_slot_index = ev->event_id;
}
@@ -496,7 +496,7 @@ void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id,
pr_debug_ratelimited("Partial ID invalid: %u (%u valid bits)\n",
partial_id, valid_id_bits);
- if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT/2) {
+ if (p->signal_event_count < KFD_SIGNAL_EVENT_LIMIT / 64) {
/* With relatively few events, it's faster to
* iterate over the event IDR
*/
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 66852de..97d5423 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -275,23 +275,35 @@
* for FLAT_* / S_LOAD operations.
*/
-#define MAKE_GPUVM_APP_BASE(gpu_num) \
+#define MAKE_GPUVM_APP_BASE_VI(gpu_num) \
(((uint64_t)(gpu_num) << 61) + 0x1000000000000L)
#define MAKE_GPUVM_APP_LIMIT(base, size) \
(((uint64_t)(base) & 0xFFFFFF0000000000UL) + (size) - 1)
-#define MAKE_SCRATCH_APP_BASE() \
+#define MAKE_SCRATCH_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x100000000L)
#define MAKE_SCRATCH_APP_LIMIT(base) \
(((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
-#define MAKE_LDS_APP_BASE() \
+#define MAKE_LDS_APP_BASE_VI() \
(((uint64_t)(0x1UL) << 61) + 0x0)
#define MAKE_LDS_APP_LIMIT(base) \
(((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
+/* On GFXv9 the LDS and scratch apertures are programmed independently
+ * using the high 16 bits of the 64-bit virtual address. They must be
+ * in the hole, which will be the case as long as the high 16 bits are
+ * not 0.
+ *
+ * The aperture sizes are still 4GB implicitly.
+ *
+ * A GPUVM aperture is not applicable on GFXv9.
+ */
+#define MAKE_LDS_APP_BASE_V9() ((uint64_t)(0x1UL) << 48)
+#define MAKE_SCRATCH_APP_BASE_V9() ((uint64_t)(0x2UL) << 48)
+
/* User mode manages most of the SVM aperture address space. The low
* 16MB are reserved for kernel use (CWSR trap handler and kernel IB
* for now).
@@ -300,6 +312,55 @@
#define SVM_CWSR_BASE (SVM_USER_BASE - KFD_CWSR_TBA_TMA_SIZE)
#define SVM_IB_BASE (SVM_CWSR_BASE - PAGE_SIZE)
+static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
+{
+ /*
+ * node id couldn't be 0 - the three MSB bits of
+ * aperture shoudn't be 0
+ */
+ pdd->lds_base = MAKE_LDS_APP_BASE_VI();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ if (!pdd->dev->device_info->needs_iommu_device) {
+ /* dGPUs: SVM aperture starting at 0
+ * with small reserved space for kernel.
+ * Set them to CANONICAL addresses.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+ } else {
+ /* set them to non CANONICAL addresses, and no SVM is
+ * allocated.
+ */
+ pdd->gpuvm_base = MAKE_GPUVM_APP_BASE_VI(id + 1);
+ pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base,
+ pdd->dev->shared_resources.gpuvm_size);
+ }
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_VI();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
+static void kfd_init_apertures_v9(struct kfd_process_device *pdd, uint8_t id)
+{
+ pdd->lds_base = MAKE_LDS_APP_BASE_V9();
+ pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
+
+ /* Raven needs SVM to support graphic handle, etc. Leave the small
+ * reserved space before SVM on Raven as well, even though we don't
+ * have to.
+ * Set gpuvm_base and gpuvm_limit to CANONICAL addresses so that they
+ * are used in Thunk to reserve SVM.
+ */
+ pdd->gpuvm_base = SVM_USER_BASE;
+ pdd->gpuvm_limit =
+ pdd->dev->shared_resources.gpuvm_size - 1;
+
+ pdd->scratch_base = MAKE_SCRATCH_APP_BASE_V9();
+ pdd->scratch_limit = MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+}
+
int kfd_init_apertures(struct kfd_process *process)
{
uint8_t id = 0;
@@ -307,9 +368,7 @@ int kfd_init_apertures(struct kfd_process *process)
struct kfd_process_device *pdd;
/*Iterating over all devices*/
- while (kfd_topology_enum_kfd_devices(id, &dev) == 0 &&
- id < NUM_OF_SUPPORTED_GPUS) {
-
+ while (kfd_topology_enum_kfd_devices(id, &dev) == 0) {
if (!dev) {
id++; /* Skip non GPU devices */
continue;
@@ -318,7 +377,7 @@ int kfd_init_apertures(struct kfd_process *process)
pdd = kfd_create_process_device_data(dev, process);
if (!pdd) {
pr_err("Failed to create process device data\n");
- return -1;
+ return -ENOMEM;
}
/*
* For 64 bit process apertures will be statically reserved in
@@ -330,32 +389,30 @@ int kfd_init_apertures(struct kfd_process *process)
pdd->gpuvm_base = pdd->gpuvm_limit = 0;
pdd->scratch_base = pdd->scratch_limit = 0;
} else {
- /* Same LDS and scratch apertures can be used
- * on all GPUs. This allows using more dGPUs
- * than placement options for apertures.
- */
- pdd->lds_base = MAKE_LDS_APP_BASE();
- pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
-
- pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
- pdd->scratch_limit =
- MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
+ switch (dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ kfd_init_apertures_vi(pdd, id);
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kfd_init_apertures_v9(pdd, id);
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dev->device_info->asic_family);
+ return -EINVAL;
+ }
- if (dev->device_info->needs_iommu_device) {
- /* APUs: GPUVM aperture in
- * non-canonical address space
- */
- pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
- pdd->gpuvm_limit = MAKE_GPUVM_APP_LIMIT(
- pdd->gpuvm_base,
- dev->shared_resources.gpuvm_size);
- } else {
- /* dGPUs: SVM aperture starting at 0
- * with small reserved space for kernel
+ if (!dev->device_info->needs_iommu_device) {
+ /* dGPUs: the reserved space for kernel
+ * before SVM
*/
- pdd->gpuvm_base = SVM_USER_BASE;
- pdd->gpuvm_limit =
- dev->shared_resources.gpuvm_size - 1;
pdd->qpd.cwsr_base = SVM_CWSR_BASE;
pdd->qpd.ib_base = SVM_IB_BASE;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
new file mode 100644
index 0000000..37029ba
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "kfd_priv.h"
+#include "kfd_events.h"
+#include "soc15_int.h"
+
+
+static bool event_interrupt_isr_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ const uint32_t *data = ih_ring_entry;
+
+ /* Only handle interrupts from KFD VMIDs */
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ if (vmid < dev->vm_info.first_vmid_kfd ||
+ vmid > dev->vm_info.last_vmid_kfd)
+ return 0;
+
+ /* If there is no valid PASID, it's likely a firmware bug */
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
+ return 0;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+
+ pr_debug("client id 0x%x, source id %d, pasid 0x%x. raw data:\n",
+ client_id, source_id, pasid);
+ pr_debug("%8X, %8X, %8X, %8X, %8X, %8X, %8X, %8X.\n",
+ data[0], data[1], data[2], data[3],
+ data[4], data[5], data[6], data[7]);
+
+ /* Interrupt types we care about: various signals and faults.
+ * They will be forwarded to a work queue (see below).
+ */
+ return source_id == SOC15_INTSRC_CP_END_OF_PIPE ||
+ source_id == SOC15_INTSRC_SDMA_TRAP ||
+ source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG ||
+ source_id == SOC15_INTSRC_CP_BAD_OPCODE;
+}
+
+static void event_interrupt_wq_v9(struct kfd_dev *dev,
+ const uint32_t *ih_ring_entry)
+{
+ uint16_t source_id, client_id, pasid, vmid;
+ uint32_t context_id;
+
+ source_id = SOC15_SOURCE_ID_FROM_IH_ENTRY(ih_ring_entry);
+ client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry);
+ pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry);
+ vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry);
+ context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry);
+
+ if (source_id == SOC15_INTSRC_CP_END_OF_PIPE)
+ kfd_signal_event_interrupt(pasid, context_id, 32);
+ else if (source_id == SOC15_INTSRC_SDMA_TRAP)
+ kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28);
+ else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG)
+ kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24);
+ else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE)
+ kfd_signal_hw_exception_event(pasid);
+ else if (client_id == SOC15_IH_CLIENTID_VMC ||
+ client_id == SOC15_IH_CLIENTID_UTCL2) {
+ /* TODO */
+ }
+}
+
+const struct kfd_event_interrupt_class event_interrupt_class_v9 = {
+ .interrupt_isr = event_interrupt_isr_v9,
+ .interrupt_wq = event_interrupt_wq_v9,
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index 035c351..db6d933 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -139,10 +139,12 @@ static void interrupt_wq(struct work_struct *work)
{
struct kfd_dev *dev = container_of(work, struct kfd_dev,
interrupt_work);
+ uint32_t ih_ring_entry[KFD_MAX_RING_ENTRY_SIZE];
- uint32_t ih_ring_entry[DIV_ROUND_UP(
- dev->device_info->ih_ring_entry_size,
- sizeof(uint32_t))];
+ if (dev->device_info->ih_ring_entry_size > sizeof(ih_ring_entry)) {
+ dev_err_once(kfd_chardev(), "Ring entry too small\n");
+ return;
+ }
while (dequeue_ih_ring_entry(dev, ih_ring_entry))
dev->device_info->event_interrupt_class->interrupt_wq(dev,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 69f4964..476951d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -99,7 +99,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
kq->rptr_kernel = kq->rptr_mem->cpu_ptr;
kq->rptr_gpu_addr = kq->rptr_mem->gpu_addr;
- retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->wptr_kernel),
+ retval = kfd_gtt_sa_allocate(dev, dev->device_info->doorbell_size,
&kq->wptr_mem);
if (retval != 0)
@@ -208,6 +208,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
size_t available_size;
size_t queue_size_dwords;
uint32_t wptr, rptr;
+ uint64_t wptr64;
unsigned int *queue_address;
/* When rptr == wptr, the buffer is empty.
@@ -216,7 +217,8 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* the opposite. So we can only use up to queue_size_dwords - 1 dwords.
*/
rptr = *kq->rptr_kernel;
- wptr = *kq->wptr_kernel;
+ wptr = kq->pending_wptr;
+ wptr64 = kq->pending_wptr64;
queue_address = (unsigned int *)kq->pq_kernel_addr;
queue_size_dwords = kq->queue->properties.queue_size / 4;
@@ -232,29 +234,33 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
* make sure calling functions know
* acquire_packet_buffer() failed
*/
- *buffer_ptr = NULL;
- return -ENOMEM;
+ goto err_no_space;
}
if (wptr + packet_size_in_dwords >= queue_size_dwords) {
/* make sure after rolling back to position 0, there is
* still enough space.
*/
- if (packet_size_in_dwords >= rptr) {
- *buffer_ptr = NULL;
- return -ENOMEM;
- }
+ if (packet_size_in_dwords >= rptr)
+ goto err_no_space;
+
/* fill nops, roll back and start at position 0 */
while (wptr > 0) {
queue_address[wptr] = kq->nop_packet;
wptr = (wptr + 1) % queue_size_dwords;
+ wptr64++;
}
}
*buffer_ptr = &queue_address[wptr];
kq->pending_wptr = wptr + packet_size_in_dwords;
+ kq->pending_wptr64 = wptr64 + packet_size_in_dwords;
return 0;
+
+err_no_space:
+ *buffer_ptr = NULL;
+ return -ENOMEM;
}
static void submit_packet(struct kernel_queue *kq)
@@ -270,14 +276,18 @@ static void submit_packet(struct kernel_queue *kq)
pr_debug("\n");
#endif
- *kq->wptr_kernel = kq->pending_wptr;
- write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
- kq->pending_wptr);
+ kq->ops_asic_specific.submit_packet(kq);
}
static void rollback_packet(struct kernel_queue *kq)
{
- kq->pending_wptr = *kq->queue->properties.write_ptr;
+ if (kq->dev->device_info->doorbell_size == 8) {
+ kq->pending_wptr64 = *kq->wptr64_kernel;
+ kq->pending_wptr = *kq->wptr_kernel %
+ (kq->queue->properties.queue_size / 4);
+ } else {
+ kq->pending_wptr = *kq->wptr_kernel;
+ }
}
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -308,6 +318,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
case CHIP_HAWAII:
kernel_queue_init_cik(&kq->ops_asic_specific);
break;
+
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ kernel_queue_init_v9(&kq->ops_asic_specific);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index 5940531..97aff20 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -72,6 +72,7 @@ struct kernel_queue {
struct kfd_dev *dev;
struct mqd_manager *mqd;
struct queue *queue;
+ uint64_t pending_wptr64;
uint32_t pending_wptr;
unsigned int nop_packet;
@@ -79,7 +80,10 @@ struct kernel_queue {
uint32_t *rptr_kernel;
uint64_t rptr_gpu_addr;
struct kfd_mem_obj *wptr_mem;
- uint32_t *wptr_kernel;
+ union {
+ uint64_t *wptr64_kernel;
+ uint32_t *wptr_kernel;
+ };
uint64_t wptr_gpu_addr;
struct kfd_mem_obj *pq;
uint64_t pq_gpu_addr;
@@ -97,5 +101,6 @@ struct kernel_queue {
void kernel_queue_init_cik(struct kernel_queue_ops *ops);
void kernel_queue_init_vi(struct kernel_queue_ops *ops);
+void kernel_queue_init_v9(struct kernel_queue_ops *ops);
#endif /* KFD_KERNEL_QUEUE_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
index a90eb44..19e54ac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
@@ -26,11 +26,13 @@
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_cik(struct kernel_queue *kq);
+static void submit_packet_cik(struct kernel_queue *kq);
void kernel_queue_init_cik(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_cik;
ops->uninitialize = uninitialize_cik;
+ ops->submit_packet = submit_packet_cik;
}
static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -42,3 +44,10 @@ static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
static void uninitialize_cik(struct kernel_queue *kq)
{
}
+
+static void submit_packet_cik(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
new file mode 100644
index 0000000..684a3bf
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
@@ -0,0 +1,340 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_ai.h"
+#include "kfd_pm4_opcodes.h"
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size);
+static void uninitialize_v9(struct kernel_queue *kq);
+static void submit_packet_v9(struct kernel_queue *kq);
+
+void kernel_queue_init_v9(struct kernel_queue_ops *ops)
+{
+ ops->initialize = initialize_v9;
+ ops->uninitialize = uninitialize_v9;
+ ops->submit_packet = submit_packet_v9;
+}
+
+static bool initialize_v9(struct kernel_queue *kq, struct kfd_dev *dev,
+ enum kfd_queue_type type, unsigned int queue_size)
+{
+ int retval;
+
+ retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, &kq->eop_mem);
+ if (retval)
+ return false;
+
+ kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+ kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+ memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+
+ return true;
+}
+
+static void uninitialize_v9(struct kernel_queue *kq)
+{
+ kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+}
+
+static void submit_packet_v9(struct kernel_queue *kq)
+{
+ *kq->wptr64_kernel = kq->pending_wptr64;
+ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr64);
+}
+
+static int pm_map_process_v9(struct packet_manager *pm,
+ uint32_t *buffer, struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+ uint64_t vm_page_table_base_addr =
+ (uint64_t)(qpd->page_table_base) << 12;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields14.gds_size = qpd->gds_size;
+ packet->bitfields14.num_gws = qpd->num_gws;
+ packet->bitfields14.num_oac = qpd->num_oac;
+ packet->bitfields14.sdma_enable = 1;
+ packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sq_shader_tba_lo = lower_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tba_hi = upper_32_bits(qpd->tba_addr >> 8);
+ packet->sq_shader_tma_lo = lower_32_bits(qpd->tma_addr >> 8);
+ packet->sq_shader_tma_hi = upper_32_bits(qpd->tma_addr >> 8);
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ packet->vm_context_page_table_base_addr_lo32 =
+ lower_32_bits(vm_page_table_base_addr);
+ packet->vm_context_page_table_base_addr_hi32 =
+ upper_32_bits(vm_page_table_base_addr);
+
+ return 0;
+}
+
+static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+
+static int pm_release_mem_v9(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mec_release_mem));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(struct pm4_mec_release_mem));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index__mec_release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy__mec_release_mem__lru;
+
+ packet->bitfields3.data_sel = data_sel__mec_release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_v9_pm_funcs = {
+ .map_process = pm_map_process_v9,
+ .runlist = pm_runlist_v9,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_v9,
+ .unmap_queues = pm_unmap_queues_v9,
+ .query_status = pm_query_status_v9,
+ .release_mem = pm_release_mem_v9,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
index f1d4828..bf20c6d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
@@ -22,15 +22,20 @@
*/
#include "kfd_kernel_queue.h"
+#include "kfd_device_queue_manager.h"
+#include "kfd_pm4_headers_vi.h"
+#include "kfd_pm4_opcodes.h"
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size);
static void uninitialize_vi(struct kernel_queue *kq);
+static void submit_packet_vi(struct kernel_queue *kq);
void kernel_queue_init_vi(struct kernel_queue_ops *ops)
{
ops->initialize = initialize_vi;
ops->uninitialize = uninitialize_vi;
+ ops->submit_packet = submit_packet_vi;
}
static bool initialize_vi(struct kernel_queue *kq, struct kfd_dev *dev,
@@ -54,3 +59,317 @@ static void uninitialize_vi(struct kernel_queue *kq)
{
kfd_gtt_sa_free(kq->dev, kq->eop_mem);
}
+
+static void submit_packet_vi(struct kernel_queue *kq)
+{
+ *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+ kq->pending_wptr);
+}
+
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size)
+{
+ union PM4_MES_TYPE_3_HEADER header;
+
+ header.u32All = 0;
+ header.opcode = opcode;
+ header.count = packet_size / 4 - 2;
+ header.type = PM4_TYPE_3;
+
+ return header.u32All;
+}
+
+static int pm_map_process_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd)
+{
+ struct pm4_mes_map_process *packet;
+
+ packet = (struct pm4_mes_map_process *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_map_process));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_PROCESS,
+ sizeof(struct pm4_mes_map_process));
+ packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
+ packet->bitfields2.process_quantum = 1;
+ packet->bitfields2.pasid = qpd->pqm->process->pasid;
+ packet->bitfields3.page_table_base = qpd->page_table_base;
+ packet->bitfields10.gds_size = qpd->gds_size;
+ packet->bitfields10.num_gws = qpd->num_gws;
+ packet->bitfields10.num_oac = qpd->num_oac;
+ packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
+
+ packet->sh_mem_config = qpd->sh_mem_config;
+ packet->sh_mem_bases = qpd->sh_mem_bases;
+ packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
+ packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
+
+ packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
+
+ packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
+ packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
+
+ return 0;
+}
+
+static int pm_runlist_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain)
+{
+ struct pm4_mes_runlist *packet;
+ int concurrent_proc_cnt = 0;
+ struct kfd_dev *kfd = pm->dqm->dev;
+
+ if (WARN_ON(!ib))
+ return -EFAULT;
+
+ /* Determine the number of processes to map together to HW:
+ * it can not exceed the number of VMIDs available to the
+ * scheduler, and it is determined by the smaller of the number
+ * of processes in the runlist and kfd module parameter
+ * hws_max_conc_proc.
+ * Note: the arbitration between the number of VMIDs and
+ * hws_max_conc_proc has been done in
+ * kgd2kfd_device_init().
+ */
+ concurrent_proc_cnt = min(pm->dqm->processes_count,
+ kfd->max_proc_per_quantum);
+
+ packet = (struct pm4_mes_runlist *)buffer;
+
+ memset(buffer, 0, sizeof(struct pm4_mes_runlist));
+ packet->header.u32All = pm_build_pm4_header(IT_RUN_LIST,
+ sizeof(struct pm4_mes_runlist));
+
+ packet->bitfields4.ib_size = ib_size_in_dwords;
+ packet->bitfields4.chain = chain ? 1 : 0;
+ packet->bitfields4.offload_polling = 0;
+ packet->bitfields4.valid = 1;
+ packet->bitfields4.process_cnt = concurrent_proc_cnt;
+ packet->ordinal2 = lower_32_bits(ib);
+ packet->bitfields3.ib_base_hi = upper_32_bits(ib);
+
+ return 0;
+}
+
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res)
+{
+ struct pm4_mes_set_resources *packet;
+
+ packet = (struct pm4_mes_set_resources *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_set_resources));
+
+ packet->header.u32All = pm_build_pm4_header(IT_SET_RESOURCES,
+ sizeof(struct pm4_mes_set_resources));
+
+ packet->bitfields2.queue_type =
+ queue_type__mes_set_resources__hsa_interface_queue_hiq;
+ packet->bitfields2.vmid_mask = res->vmid_mask;
+ packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
+ packet->bitfields7.oac_mask = res->oac_mask;
+ packet->bitfields8.gds_heap_base = res->gds_heap_base;
+ packet->bitfields8.gds_heap_size = res->gds_heap_size;
+
+ packet->gws_mask_lo = lower_32_bits(res->gws_mask);
+ packet->gws_mask_hi = upper_32_bits(res->gws_mask);
+
+ packet->queue_mask_lo = lower_32_bits(res->queue_mask);
+ packet->queue_mask_hi = upper_32_bits(res->queue_mask);
+
+ return 0;
+}
+
+static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static)
+{
+ struct pm4_mes_map_queues *packet;
+ bool use_static = is_static;
+
+ packet = (struct pm4_mes_map_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES,
+ sizeof(struct pm4_mes_map_queues));
+ packet->bitfields2.alloc_format =
+ alloc_format__mes_map_queues__one_per_pipe_vi;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
+
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_map_queues__compute_vi;
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_compute_vi;
+
+ switch (q->properties.type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ if (use_static)
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__normal_latency_static_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.queue_type =
+ queue_type__mes_map_queues__debug_interface_queue_vi;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
+ engine_sel__mes_map_queues__sdma0_vi;
+ use_static = false; /* no static queues under SDMA */
+ break;
+ default:
+ WARN(1, "queue type %d", q->properties.type);
+ return -EINVAL;
+ }
+ packet->bitfields3.doorbell_offset =
+ q->properties.doorbell_off;
+
+ packet->mqd_addr_lo =
+ lower_32_bits(q->gart_mqd_addr);
+
+ packet->mqd_addr_hi =
+ upper_32_bits(q->gart_mqd_addr);
+
+ packet->wptr_addr_lo =
+ lower_32_bits((uint64_t)q->properties.write_ptr);
+
+ packet->wptr_addr_hi =
+ upper_32_bits((uint64_t)q->properties.write_ptr);
+
+ return 0;
+}
+
+static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine)
+{
+ struct pm4_mes_unmap_queues *packet;
+
+ packet = (struct pm4_mes_unmap_queues *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
+
+ packet->header.u32All = pm_build_pm4_header(IT_UNMAP_QUEUES,
+ sizeof(struct pm4_mes_unmap_queues));
+ switch (type) {
+ case KFD_QUEUE_TYPE_COMPUTE:
+ case KFD_QUEUE_TYPE_DIQ:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__compute;
+ break;
+ case KFD_QUEUE_TYPE_SDMA:
+ packet->bitfields2.engine_sel =
+ engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
+ break;
+ default:
+ WARN(1, "queue type %d", type);
+ return -EINVAL;
+ }
+
+ if (reset)
+ packet->bitfields2.action =
+ action__mes_unmap_queues__reset_queues;
+ else
+ packet->bitfields2.action =
+ action__mes_unmap_queues__preempt_queues;
+
+ switch (filter) {
+ case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
+ packet->bitfields2.num_queues = 1;
+ packet->bitfields3b.doorbell_offset0 = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
+ packet->bitfields3a.pasid = filter_param;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_queues;
+ break;
+ case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
+ /* in this case, we do not preempt static queues */
+ packet->bitfields2.queue_sel =
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
+ break;
+ default:
+ WARN(1, "filter %d", filter);
+ return -EINVAL;
+ }
+
+ return 0;
+
+}
+
+static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value)
+{
+ struct pm4_mes_query_status *packet;
+
+ packet = (struct pm4_mes_query_status *)buffer;
+ memset(buffer, 0, sizeof(struct pm4_mes_query_status));
+
+ packet->header.u32All = pm_build_pm4_header(IT_QUERY_STATUS,
+ sizeof(struct pm4_mes_query_status));
+
+ packet->bitfields2.context_id = 0;
+ packet->bitfields2.interrupt_sel =
+ interrupt_sel__mes_query_status__completion_status;
+ packet->bitfields2.command =
+ command__mes_query_status__fence_only_after_write_ack;
+
+ packet->addr_hi = upper_32_bits((uint64_t)fence_address);
+ packet->addr_lo = lower_32_bits((uint64_t)fence_address);
+ packet->data_hi = upper_32_bits((uint64_t)fence_value);
+ packet->data_lo = lower_32_bits((uint64_t)fence_value);
+
+ return 0;
+}
+
+static int pm_release_mem_vi(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(*packet));
+
+ packet->header.u32All = pm_build_pm4_header(IT_RELEASE_MEM,
+ sizeof(*packet));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ packet->bitfields2.atc = 0;
+
+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel___release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return 0;
+}
+
+const struct packet_manager_funcs kfd_vi_pm_funcs = {
+ .map_process = pm_map_process_vi,
+ .runlist = pm_runlist_vi,
+ .set_resources = pm_set_resources_vi,
+ .map_queues = pm_map_queues_vi,
+ .unmap_queues = pm_unmap_queues_vi,
+ .query_status = pm_query_status_vi,
+ .release_mem = pm_release_mem_vi,
+ .map_process_size = sizeof(struct pm4_mes_map_process),
+ .runlist_size = sizeof(struct pm4_mes_runlist),
+ .set_resources_size = sizeof(struct pm4_mes_set_resources),
+ .map_queues_size = sizeof(struct pm4_mes_map_queues),
+ .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues),
+ .query_status_size = sizeof(struct pm4_mes_query_status),
+ .release_mem_size = sizeof(struct pm4_mec_release_mem)
+};
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index e0c07d2..76bf2dc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
.interrupt = kgd2kfd_interrupt,
.suspend = kgd2kfd_suspend,
.resume = kgd2kfd_resume,
+ .quiesce_mm = kgd2kfd_quiesce_mm,
+ .resume_mm = kgd2kfd_resume_mm,
.schedule_evict_and_restore_process =
kgd2kfd_schedule_evict_and_restore_process,
};
@@ -81,6 +83,11 @@ module_param(ignore_crat, int, 0444);
MODULE_PARM_DESC(ignore_crat,
"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+int vega10_noretry;
+module_param_named(noretry, vega10_noretry, int, 0644);
+MODULE_PARM_DESC(noretry,
+ "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)");
+
static int amdkfd_init_completed;
int kgd2kfd_init(unsigned int interface_version,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
index ee7061e..4b8eb50 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
@@ -38,6 +38,9 @@ struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type,
case CHIP_POLARIS10:
case CHIP_POLARIS11:
return mqd_manager_init_vi_tonga(type, dev);
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ return mqd_manager_init_v9(type, dev);
default:
WARN(1, "Unexpected ASIC family %u",
dev->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index c00c325..06eaa21 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -79,10 +79,6 @@ static int init_mqd(struct mqd_manager *mm, void **mqd,
m->cp_mqd_base_addr_lo = lower_32_bits(addr);
m->cp_mqd_base_addr_hi = upper_32_bits(addr);
- m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE | IB_ATC_EN;
- /* Although WinKFD writes this, I suspect it should not be necessary */
- m->cp_hqd_ib_control = IB_ATC_EN | DEFAULT_MIN_IB_AVAIL_SIZE;
-
m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS |
QUANTUM_DURATION(10);
@@ -412,7 +408,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
new file mode 100644
index 0000000..684054f
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -0,0 +1,443 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include "kfd_priv.h"
+#include "kfd_mqd_manager.h"
+#include "v9_structs.h"
+#include "gc/gc_9_0_offset.h"
+#include "gc/gc_9_0_sh_mask.h"
+#include "sdma0/sdma0_4_0_sh_mask.h"
+
+static inline struct v9_mqd *get_mqd(void *mqd)
+{
+ return (struct v9_mqd *)mqd;
+}
+
+static inline struct v9_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+ return (struct v9_sdma_mqd *)mqd;
+}
+
+static int init_mqd(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ uint64_t addr;
+ struct v9_mqd *m;
+ struct kfd_dev *kfd = mm->dev;
+
+ /* From V9, for CWSR, the control stack is located on the next page
+ * boundary after the mqd, we will use the gtt allocation function
+ * instead of sub-allocation function.
+ */
+ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
+ *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+ if (!*mqd_mem_obj)
+ return -ENOMEM;
+ retval = kfd->kfd2kgd->init_gtt_mem_allocation(kfd->kgd,
+ ALIGN(q->ctl_stack_size, PAGE_SIZE) +
+ ALIGN(sizeof(struct v9_mqd), PAGE_SIZE),
+ &((*mqd_mem_obj)->gtt_mem),
+ &((*mqd_mem_obj)->gpu_addr),
+ (void *)&((*mqd_mem_obj)->cpu_ptr));
+ } else
+ retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd),
+ mqd_mem_obj);
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr;
+ addr = (*mqd_mem_obj)->gpu_addr;
+
+ memset(m, 0, sizeof(struct v9_mqd));
+
+ m->header = 0xC0310800;
+ m->compute_pipelinestat_enable = 1;
+ m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
+ m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
+
+ m->cp_hqd_persistent_state = CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
+ 0x53 << CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
+
+ m->cp_mqd_control = 1 << CP_MQD_CONTROL__PRIV_STATE__SHIFT;
+
+ m->cp_mqd_base_addr_lo = lower_32_bits(addr);
+ m->cp_mqd_base_addr_hi = upper_32_bits(addr);
+
+ m->cp_hqd_quantum = 1 << CP_HQD_QUANTUM__QUANTUM_EN__SHIFT |
+ 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
+ 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
+
+ m->cp_hqd_pipe_priority = 1;
+ m->cp_hqd_queue_priority = 15;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_aql_control =
+ 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
+ }
+
+ if (q->tba_addr) {
+ m->compute_pgm_rsrc2 |=
+ (1 << COMPUTE_PGM_RSRC2__TRAP_PRESENT__SHIFT);
+ }
+
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address) {
+ m->cp_hqd_persistent_state |=
+ (1 << CP_HQD_PERSISTENT_STATE__QSWITCH_MODE__SHIFT);
+ m->cp_hqd_ctx_save_base_addr_lo =
+ lower_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_base_addr_hi =
+ upper_32_bits(q->ctx_save_restore_area_address);
+ m->cp_hqd_ctx_save_size = q->ctx_save_restore_area_size;
+ m->cp_hqd_cntl_stack_size = q->ctl_stack_size;
+ m->cp_hqd_cntl_stack_offset = q->ctl_stack_size;
+ m->cp_hqd_wg_state_offset = q->ctl_stack_size;
+ }
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = addr;
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static int load_mqd(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ /* AQL write pointer counts in 64B packets, PM4/CP counts in dwords. */
+ uint32_t wptr_shift = (p->format == KFD_QUEUE_FORMAT_AQL ? 4 : 0);
+
+ return mm->dev->kfd2kgd->hqd_load(mm->dev->kgd, mqd, pipe_id, queue_id,
+ (uint32_t __user *)p->write_ptr,
+ wptr_shift, 0, mms);
+}
+
+static int update_mqd(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+
+ m = get_mqd(mqd);
+
+ m->cp_hqd_pq_control = 5 << CP_HQD_PQ_CONTROL__RPTR_BLOCK_SIZE__SHIFT;
+ m->cp_hqd_pq_control |= order_base_2(q->queue_size / 4) - 1;
+ pr_debug("cp_hqd_pq_control 0x%x\n", m->cp_hqd_pq_control);
+
+ m->cp_hqd_pq_base_lo = lower_32_bits((uint64_t)q->queue_address >> 8);
+ m->cp_hqd_pq_base_hi = upper_32_bits((uint64_t)q->queue_address >> 8);
+
+ m->cp_hqd_pq_rptr_report_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_rptr_report_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_lo = lower_32_bits((uint64_t)q->write_ptr);
+ m->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits((uint64_t)q->write_ptr);
+
+ m->cp_hqd_pq_doorbell_control =
+ q->doorbell_off <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_OFFSET__SHIFT;
+ pr_debug("cp_hqd_pq_doorbell_control 0x%x\n",
+ m->cp_hqd_pq_doorbell_control);
+
+ m->cp_hqd_ib_control =
+ 3 << CP_HQD_IB_CONTROL__MIN_IB_AVAIL_SIZE__SHIFT |
+ 1 << CP_HQD_IB_CONTROL__IB_EXE_DISABLE__SHIFT;
+
+ /*
+ * HW does not clamp this field correctly. Maximum EOP queue size
+ * is constrained by per-SE EOP done signal count, which is 8-bit.
+ * Limit is 0xFF EOP entries (= 0x7F8 dwords). CP will not submit
+ * more than (EOP entry count - 1) so a queue size of 0x800 dwords
+ * is safe, giving a maximum field value of 0xA.
+ */
+ m->cp_hqd_eop_control = min(0xA,
+ order_base_2(q->eop_ring_buffer_size / 4) - 1);
+ m->cp_hqd_eop_base_addr_lo =
+ lower_32_bits(q->eop_ring_buffer_address >> 8);
+ m->cp_hqd_eop_base_addr_hi =
+ upper_32_bits(q->eop_ring_buffer_address >> 8);
+
+ m->cp_hqd_iq_timer = 0;
+
+ m->cp_hqd_vmid = q->vmid;
+
+ if (q->format == KFD_QUEUE_FORMAT_AQL) {
+ m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
+ 2 << CP_HQD_PQ_CONTROL__SLOT_BASED_WPTR__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__QUEUE_FULL_EN__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__WPP_CLAMP_EN__SHIFT;
+ m->cp_hqd_pq_doorbell_control |= 1 <<
+ CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_BIF_DROP__SHIFT;
+ }
+ if (mm->dev->cwsr_enabled && q->ctx_save_restore_area_address)
+ m->cp_hqd_ctx_save_control = 0;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+
+static int destroy_mqd(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_destroy
+ (mm->dev->kgd, mqd, type, timeout,
+ pipe_id, queue_id);
+}
+
+static void uninit_mqd(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ struct kfd_dev *kfd = mm->dev;
+
+ if (mqd_mem_obj->gtt_mem) {
+ kfd->kfd2kgd->free_gtt_mem(kfd->kgd, mqd_mem_obj->gtt_mem);
+ kfree(mqd_mem_obj);
+ } else {
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+ }
+}
+
+static bool is_occupied(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_is_occupied(
+ mm->dev->kgd, queue_address,
+ pipe_id, queue_id);
+}
+
+static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q);
+
+ if (retval != 0)
+ return retval;
+
+ m = get_mqd(*mqd);
+
+ m->cp_hqd_pq_control |= 1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
+ 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
+
+ return retval;
+}
+
+static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_mqd *m;
+ int retval = update_mqd(mm, mqd, q);
+
+ if (retval != 0)
+ return retval;
+
+ /* TODO: what's the point? update_mqd already does this. */
+ m = get_mqd(mqd);
+ m->cp_hqd_vmid = q->vmid;
+ return retval;
+}
+
+static int init_mqd_sdma(struct mqd_manager *mm, void **mqd,
+ struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
+ struct queue_properties *q)
+{
+ int retval;
+ struct v9_sdma_mqd *m;
+
+
+ retval = kfd_gtt_sa_allocate(mm->dev,
+ sizeof(struct v9_sdma_mqd),
+ mqd_mem_obj);
+
+ if (retval != 0)
+ return -ENOMEM;
+
+ m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr;
+
+ memset(m, 0, sizeof(struct v9_sdma_mqd));
+
+ *mqd = m;
+ if (gart_addr)
+ *gart_addr = (*mqd_mem_obj)->gpu_addr;
+
+ retval = mm->update_mqd(mm, m, q);
+
+ return retval;
+}
+
+static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct kfd_mem_obj *mqd_mem_obj)
+{
+ kfd_gtt_sa_free(mm->dev, mqd_mem_obj);
+}
+
+static int load_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ uint32_t pipe_id, uint32_t queue_id,
+ struct queue_properties *p, struct mm_struct *mms)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_load(mm->dev->kgd, mqd,
+ (uint32_t __user *)p->write_ptr,
+ mms);
+}
+
+#define SDMA_RLC_DUMMY_DEFAULT 0xf
+
+static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ struct queue_properties *q)
+{
+ struct v9_sdma_mqd *m;
+
+ m = get_sdma_mqd(mqd);
+ m->sdmax_rlcx_rb_cntl = order_base_2(q->queue_size / 4)
+ << SDMA0_RLC0_RB_CNTL__RB_SIZE__SHIFT |
+ q->vmid << SDMA0_RLC0_RB_CNTL__RB_VMID__SHIFT |
+ 1 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_ENABLE__SHIFT |
+ 6 << SDMA0_RLC0_RB_CNTL__RPTR_WRITEBACK_TIMER__SHIFT;
+
+ m->sdmax_rlcx_rb_base = lower_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_base_hi = upper_32_bits(q->queue_address >> 8);
+ m->sdmax_rlcx_rb_rptr_addr_lo = lower_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_rb_rptr_addr_hi = upper_32_bits((uint64_t)q->read_ptr);
+ m->sdmax_rlcx_doorbell_offset =
+ q->doorbell_off << SDMA0_RLC0_DOORBELL_OFFSET__OFFSET__SHIFT;
+
+ m->sdma_engine_id = q->sdma_engine_id;
+ m->sdma_queue_id = q->sdma_queue_id;
+ m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
+
+ q->is_active = (q->queue_size > 0 &&
+ q->queue_address != 0 &&
+ q->queue_percent > 0 &&
+ !q->is_evicted);
+
+ return 0;
+}
+
+/*
+ * * preempt type here is ignored because there is only one way
+ * * to preempt sdma queue
+ */
+static int destroy_mqd_sdma(struct mqd_manager *mm, void *mqd,
+ enum kfd_preempt_type type,
+ unsigned int timeout, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_destroy(mm->dev->kgd, mqd, timeout);
+}
+
+static bool is_occupied_sdma(struct mqd_manager *mm, void *mqd,
+ uint64_t queue_address, uint32_t pipe_id,
+ uint32_t queue_id)
+{
+ return mm->dev->kfd2kgd->hqd_sdma_is_occupied(mm->dev->kgd, mqd);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+
+static int debugfs_show_mqd(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_mqd), false);
+ return 0;
+}
+
+static int debugfs_show_mqd_sdma(struct seq_file *m, void *data)
+{
+ seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4,
+ data, sizeof(struct v9_sdma_mqd), false);
+ return 0;
+}
+
+#endif
+
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev)
+{
+ struct mqd_manager *mqd;
+
+ if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
+ return NULL;
+
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+ if (!mqd)
+ return NULL;
+
+ mqd->dev = dev;
+
+ switch (type) {
+ case KFD_MQD_TYPE_CP:
+ case KFD_MQD_TYPE_COMPUTE:
+ mqd->init_mqd = init_mqd;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_HIQ:
+ mqd->init_mqd = init_mqd_hiq;
+ mqd->uninit_mqd = uninit_mqd;
+ mqd->load_mqd = load_mqd;
+ mqd->update_mqd = update_mqd_hiq;
+ mqd->destroy_mqd = destroy_mqd;
+ mqd->is_occupied = is_occupied;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd;
+#endif
+ break;
+ case KFD_MQD_TYPE_SDMA:
+ mqd->init_mqd = init_mqd_sdma;
+ mqd->uninit_mqd = uninit_mqd_sdma;
+ mqd->load_mqd = load_mqd_sdma;
+ mqd->update_mqd = update_mqd_sdma;
+ mqd->destroy_mqd = destroy_mqd_sdma;
+ mqd->is_occupied = is_occupied_sdma;
+#if defined(CONFIG_DEBUG_FS)
+ mqd->debugfs_show_mqd = debugfs_show_mqd_sdma;
+#endif
+ break;
+ default:
+ kfree(mqd);
+ return NULL;
+ }
+
+ return mqd;
+}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 89e4242..481307b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -394,7 +394,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
- mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
+ mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
if (!mqd)
return NULL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 89ba4c6..c317feb4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -26,8 +26,6 @@
#include "kfd_device_queue_manager.h"
#include "kfd_kernel_queue.h"
#include "kfd_priv.h"
-#include "kfd_pm4_headers_vi.h"
-#include "kfd_pm4_opcodes.h"
static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
unsigned int buffer_size_bytes)
@@ -39,18 +37,6 @@ static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes,
*wptr = temp;
}
-static unsigned int build_pm4_header(unsigned int opcode, size_t packet_size)
-{
- union PM4_MES_TYPE_3_HEADER header;
-
- header.u32All = 0;
- header.opcode = opcode;
- header.count = packet_size / 4 - 2;
- header.type = PM4_TYPE_3;
-
- return header.u32All;
-}
-
static void pm_calc_rlib_size(struct packet_manager *pm,
unsigned int *rlib_size,
bool *over_subscription)
@@ -80,9 +66,9 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
pr_debug("Over subscribed runlist\n");
}
- map_queue_size = sizeof(struct pm4_mes_map_queues);
+ map_queue_size = pm->pmf->map_queues_size;
/* calculate run list ib allocation size */
- *rlib_size = process_count * sizeof(struct pm4_mes_map_process) +
+ *rlib_size = process_count * pm->pmf->map_process_size +
queue_count * map_queue_size;
/*
@@ -90,7 +76,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
* when over subscription
*/
if (*over_subscription)
- *rlib_size += sizeof(struct pm4_mes_runlist);
+ *rlib_size += pm->pmf->runlist_size;
pr_debug("runlist ib size %d\n", *rlib_size);
}
@@ -108,12 +94,14 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription);
+ mutex_lock(&pm->lock);
+
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
&pm->ib_buffer_obj);
if (retval) {
pr_err("Failed to allocate runlist IB\n");
- return retval;
+ goto out;
}
*(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
@@ -121,138 +109,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
memset(*rl_buffer, 0, *rl_buffer_size);
pm->allocated = true;
- return retval;
-}
-
-static int pm_create_runlist(struct packet_manager *pm, uint32_t *buffer,
- uint64_t ib, size_t ib_size_in_dwords, bool chain)
-{
- struct pm4_mes_runlist *packet;
- int concurrent_proc_cnt = 0;
- struct kfd_dev *kfd = pm->dqm->dev;
-
- if (WARN_ON(!ib))
- return -EFAULT;
-
- /* Determine the number of processes to map together to HW:
- * it can not exceed the number of VMIDs available to the
- * scheduler, and it is determined by the smaller of the number
- * of processes in the runlist and kfd module parameter
- * hws_max_conc_proc.
- * Note: the arbitration between the number of VMIDs and
- * hws_max_conc_proc has been done in
- * kgd2kfd_device_init().
- */
- concurrent_proc_cnt = min(pm->dqm->processes_count,
- kfd->max_proc_per_quantum);
-
- packet = (struct pm4_mes_runlist *)buffer;
-
- memset(buffer, 0, sizeof(struct pm4_mes_runlist));
- packet->header.u32All = build_pm4_header(IT_RUN_LIST,
- sizeof(struct pm4_mes_runlist));
-
- packet->bitfields4.ib_size = ib_size_in_dwords;
- packet->bitfields4.chain = chain ? 1 : 0;
- packet->bitfields4.offload_polling = 0;
- packet->bitfields4.valid = 1;
- packet->bitfields4.process_cnt = concurrent_proc_cnt;
- packet->ordinal2 = lower_32_bits(ib);
- packet->bitfields3.ib_base_hi = upper_32_bits(ib);
-
- return 0;
-}
-
-static int pm_create_map_process(struct packet_manager *pm, uint32_t *buffer,
- struct qcm_process_device *qpd)
-{
- struct pm4_mes_map_process *packet;
-
- packet = (struct pm4_mes_map_process *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_process));
-
- packet->header.u32All = build_pm4_header(IT_MAP_PROCESS,
- sizeof(struct pm4_mes_map_process));
- packet->bitfields2.diq_enable = (qpd->is_debug) ? 1 : 0;
- packet->bitfields2.process_quantum = 1;
- packet->bitfields2.pasid = qpd->pqm->process->pasid;
- packet->bitfields3.page_table_base = qpd->page_table_base;
- packet->bitfields10.gds_size = qpd->gds_size;
- packet->bitfields10.num_gws = qpd->num_gws;
- packet->bitfields10.num_oac = qpd->num_oac;
- packet->bitfields10.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count;
-
- packet->sh_mem_config = qpd->sh_mem_config;
- packet->sh_mem_bases = qpd->sh_mem_bases;
- packet->sh_mem_ape1_base = qpd->sh_mem_ape1_base;
- packet->sh_mem_ape1_limit = qpd->sh_mem_ape1_limit;
-
- packet->sh_hidden_private_base_vmid = qpd->sh_hidden_private_base;
-
- packet->gds_addr_lo = lower_32_bits(qpd->gds_context_area);
- packet->gds_addr_hi = upper_32_bits(qpd->gds_context_area);
-
- return 0;
-}
-
-static int pm_create_map_queue(struct packet_manager *pm, uint32_t *buffer,
- struct queue *q, bool is_static)
-{
- struct pm4_mes_map_queues *packet;
- bool use_static = is_static;
-
- packet = (struct pm4_mes_map_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_map_queues));
-
- packet->header.u32All = build_pm4_header(IT_MAP_QUEUES,
- sizeof(struct pm4_mes_map_queues));
- packet->bitfields2.alloc_format =
- alloc_format__mes_map_queues__one_per_pipe_vi;
- packet->bitfields2.num_queues = 1;
- packet->bitfields2.queue_sel =
- queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi;
-
- packet->bitfields2.engine_sel =
- engine_sel__mes_map_queues__compute_vi;
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_compute_vi;
-
- switch (q->properties.type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- if (use_static)
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__normal_latency_static_queue_vi;
- break;
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.queue_type =
- queue_type__mes_map_queues__debug_interface_queue_vi;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel = q->properties.sdma_engine_id +
- engine_sel__mes_map_queues__sdma0_vi;
- use_static = false; /* no static queues under SDMA */
- break;
- default:
- WARN(1, "queue type %d", q->properties.type);
- return -EINVAL;
- }
- packet->bitfields3.doorbell_offset =
- q->properties.doorbell_off;
-
- packet->mqd_addr_lo =
- lower_32_bits(q->gart_mqd_addr);
-
- packet->mqd_addr_hi =
- upper_32_bits(q->gart_mqd_addr);
-
- packet->wptr_addr_lo =
- lower_32_bits((uint64_t)q->properties.write_ptr);
-
- packet->wptr_addr_hi =
- upper_32_bits((uint64_t)q->properties.write_ptr);
-
- return 0;
+out:
+ mutex_unlock(&pm->lock);
+ return retval;
}
static int pm_create_runlist_ib(struct packet_manager *pm,
@@ -292,12 +152,12 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return -ENOMEM;
}
- retval = pm_create_map_process(pm, &rl_buffer[rl_wptr], qpd);
+ retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
if (retval)
return retval;
proccesses_mapped++;
- inc_wptr(&rl_wptr, sizeof(struct pm4_mes_map_process),
+ inc_wptr(&rl_wptr, pm->pmf->map_process_size,
alloc_size_bytes);
list_for_each_entry(kq, &qpd->priv_queue_list, list) {
@@ -307,7 +167,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
kq->queue->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
kq->queue,
qpd->is_debug);
@@ -315,7 +175,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
@@ -326,7 +186,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
q->queue, qpd->is_debug);
- retval = pm_create_map_queue(pm,
+ retval = pm->pmf->map_queues(pm,
&rl_buffer[rl_wptr],
q,
qpd->is_debug);
@@ -335,7 +195,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
inc_wptr(&rl_wptr,
- sizeof(struct pm4_mes_map_queues),
+ pm->pmf->map_queues_size,
alloc_size_bytes);
}
}
@@ -343,7 +203,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
pr_debug("Finished map process and queues to runlist\n");
if (is_over_subscription)
- retval = pm_create_runlist(pm, &rl_buffer[rl_wptr],
+ retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
*rl_gpu_addr,
alloc_size_bytes / sizeof(uint32_t),
true);
@@ -355,45 +215,29 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
}
-/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
- * of this packet
- * @gpu_addr - GPU address of the packet. It's a virtual address.
- * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
- * Return - length of the packet
- */
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
-{
- struct pm4_mec_release_mem *packet;
-
- WARN_ON(!buffer);
-
- packet = (struct pm4_mec_release_mem *)buffer;
- memset(buffer, 0, sizeof(*packet));
-
- packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
- sizeof(*packet));
-
- packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
- packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
- packet->bitfields2.tcl1_action_ena = 1;
- packet->bitfields2.tc_action_ena = 1;
- packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
- packet->bitfields2.atc = 0;
-
- packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
- packet->bitfields3.int_sel =
- int_sel___release_mem__send_interrupt_after_write_confirm;
-
- packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
- packet->address_hi = upper_32_bits(gpu_addr);
-
- packet->data_lo = 0;
-
- return sizeof(*packet) / sizeof(unsigned int);
-}
-
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
+ switch (dqm->dev->device_info->asic_family) {
+ case CHIP_KAVERI:
+ case CHIP_HAWAII:
+ /* PM4 packet structures on CIK are the same as on VI */
+ case CHIP_CARRIZO:
+ case CHIP_TONGA:
+ case CHIP_FIJI:
+ case CHIP_POLARIS10:
+ case CHIP_POLARIS11:
+ pm->pmf = &kfd_vi_pm_funcs;
+ break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ pm->pmf = &kfd_v9_pm_funcs;
+ break;
+ default:
+ WARN(1, "Unexpected ASIC family %u",
+ dqm->dev->device_info->asic_family);
+ return -EINVAL;
+ }
+
pm->dqm = dqm;
mutex_init(&pm->lock);
pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ);
@@ -415,38 +259,25 @@ void pm_uninit(struct packet_manager *pm)
int pm_send_set_resources(struct packet_manager *pm,
struct scheduling_resources *res)
{
- struct pm4_mes_set_resources *packet;
+ uint32_t *buffer, size;
int retval = 0;
+ size = pm->pmf->set_resources_size;
mutex_lock(&pm->lock);
pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
- sizeof(*packet) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (!packet) {
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+ if (!buffer) {
pr_err("Failed to allocate buffer on kernel queue\n");
retval = -ENOMEM;
goto out;
}
- memset(packet, 0, sizeof(struct pm4_mes_set_resources));
- packet->header.u32All = build_pm4_header(IT_SET_RESOURCES,
- sizeof(struct pm4_mes_set_resources));
-
- packet->bitfields2.queue_type =
- queue_type__mes_set_resources__hsa_interface_queue_hiq;
- packet->bitfields2.vmid_mask = res->vmid_mask;
- packet->bitfields2.unmap_latency = KFD_UNMAP_LATENCY_MS / 100;
- packet->bitfields7.oac_mask = res->oac_mask;
- packet->bitfields8.gds_heap_base = res->gds_heap_base;
- packet->bitfields8.gds_heap_size = res->gds_heap_size;
-
- packet->gws_mask_lo = lower_32_bits(res->gws_mask);
- packet->gws_mask_hi = upper_32_bits(res->gws_mask);
-
- packet->queue_mask_lo = lower_32_bits(res->queue_mask);
- packet->queue_mask_hi = upper_32_bits(res->queue_mask);
-
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->set_resources(pm, buffer, res);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
out:
mutex_unlock(&pm->lock);
@@ -468,7 +299,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
- packet_size_dwords = sizeof(struct pm4_mes_runlist) / sizeof(uint32_t);
+ packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(&pm->lock);
retval = pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
@@ -476,7 +307,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
if (retval)
goto fail_acquire_packet_buffer;
- retval = pm_create_runlist(pm, rl_buffer, rl_gpu_ib_addr,
+ retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
rl_ib_size / sizeof(uint32_t), false);
if (retval)
goto fail_create_runlist;
@@ -499,37 +330,29 @@ fail_create_runlist_ib:
int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
uint32_t fence_value)
{
- int retval;
- struct pm4_mes_query_status *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
if (WARN_ON(!fence_address))
return -EFAULT;
+ size = pm->pmf->query_status_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_query_status) / sizeof(uint32_t),
- (unsigned int **)&packet);
- if (retval)
- goto fail_acquire_packet_buffer;
-
- packet->header.u32All = build_pm4_header(IT_QUERY_STATUS,
- sizeof(struct pm4_mes_query_status));
-
- packet->bitfields2.context_id = 0;
- packet->bitfields2.interrupt_sel =
- interrupt_sel__mes_query_status__completion_status;
- packet->bitfields2.command =
- command__mes_query_status__fence_only_after_write_ack;
-
- packet->addr_hi = upper_32_bits((uint64_t)fence_address);
- packet->addr_lo = lower_32_bits((uint64_t)fence_address);
- packet->data_hi = upper_32_bits((uint64_t)fence_value);
- packet->data_lo = lower_32_bits((uint64_t)fence_value);
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
+ else
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-fail_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
@@ -539,82 +362,27 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
uint32_t filter_param, bool reset,
unsigned int sdma_engine)
{
- int retval;
- uint32_t *buffer;
- struct pm4_mes_unmap_queues *packet;
+ uint32_t *buffer, size;
+ int retval = 0;
+ size = pm->pmf->unmap_queues_size;
mutex_lock(&pm->lock);
- retval = pm->priv_queue->ops.acquire_packet_buffer(
- pm->priv_queue,
- sizeof(struct pm4_mes_unmap_queues) / sizeof(uint32_t),
- &buffer);
- if (retval)
- goto err_acquire_packet_buffer;
-
- packet = (struct pm4_mes_unmap_queues *)buffer;
- memset(buffer, 0, sizeof(struct pm4_mes_unmap_queues));
- pr_debug("static_queue: unmapping queues: filter is %d , reset is %d , type is %d\n",
- filter, reset, type);
- packet->header.u32All = build_pm4_header(IT_UNMAP_QUEUES,
- sizeof(struct pm4_mes_unmap_queues));
- switch (type) {
- case KFD_QUEUE_TYPE_COMPUTE:
- case KFD_QUEUE_TYPE_DIQ:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__compute;
- break;
- case KFD_QUEUE_TYPE_SDMA:
- packet->bitfields2.engine_sel =
- engine_sel__mes_unmap_queues__sdma0 + sdma_engine;
- break;
- default:
- WARN(1, "queue type %d", type);
- retval = -EINVAL;
- goto err_invalid;
+ pm->priv_queue->ops.acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+ pr_err("Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
}
- if (reset)
- packet->bitfields2.action =
- action__mes_unmap_queues__reset_queues;
+ retval = pm->pmf->unmap_queues(pm, buffer, type, filter, filter_param,
+ reset, sdma_engine);
+ if (!retval)
+ pm->priv_queue->ops.submit_packet(pm->priv_queue);
else
- packet->bitfields2.action =
- action__mes_unmap_queues__preempt_queues;
-
- switch (filter) {
- case KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_specified_queues;
- packet->bitfields2.num_queues = 1;
- packet->bitfields3b.doorbell_offset0 = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_BY_PASID:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__perform_request_on_pasid_queues;
- packet->bitfields3a.pasid = filter_param;
- break;
- case KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES:
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_queues;
- break;
- case KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES:
- /* in this case, we do not preempt static queues */
- packet->bitfields2.queue_sel =
- queue_sel__mes_unmap_queues__unmap_all_non_static_queues;
- break;
- default:
- WARN(1, "filter %d", filter);
- retval = -EINVAL;
- goto err_invalid;
- }
+ pm->priv_queue->ops.rollback_packet(pm->priv_queue);
- pm->priv_queue->ops.submit_packet(pm->priv_queue);
-
- mutex_unlock(&pm->lock);
- return 0;
-
-err_invalid:
- pm->priv_queue->ops.rollback_packet(pm->priv_queue);
-err_acquire_packet_buffer:
+out:
mutex_unlock(&pm->lock);
return retval;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
new file mode 100644
index 0000000..f2bcf5c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
@@ -0,0 +1,583 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef F32_MES_PM4_PACKETS_H
+#define F32_MES_PM4_PACKETS_H
+
+#ifndef PM4_MES_HEADER_DEFINED
+#define PM4_MES_HEADER_DEFINED
+union PM4_MES_TYPE_3_HEADER {
+ struct {
+ uint32_t reserved1 : 8; /* < reserved */
+ uint32_t opcode : 8; /* < IT opcode */
+ uint32_t count : 14;/* < number of DWORDs - 1 in the
+ * information body.
+ */
+ uint32_t type : 2; /* < packet identifier.
+ * It should be 3 for type 3 packets
+ */
+ };
+ uint32_t u32All;
+};
+#endif /* PM4_MES_HEADER_DEFINED */
+
+/*--------------------MES_SET_RESOURCES--------------------*/
+
+#ifndef PM4_MES_SET_RESOURCES_DEFINED
+#define PM4_MES_SET_RESOURCES_DEFINED
+enum mes_set_resources_queue_type_enum {
+ queue_type__mes_set_resources__kernel_interface_queue_kiq = 0,
+ queue_type__mes_set_resources__hsa_interface_queue_hiq = 1,
+ queue_type__mes_set_resources__hsa_debug_interface_queue = 4
+};
+
+
+struct pm4_mes_set_resources {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t vmid_mask:16;
+ uint32_t unmap_latency:8;
+ uint32_t reserved1:5;
+ enum mes_set_resources_queue_type_enum queue_type:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t queue_mask_lo;
+ uint32_t queue_mask_hi;
+ uint32_t gws_mask_lo;
+ uint32_t gws_mask_hi;
+
+ union {
+ struct {
+ uint32_t oac_mask:16;
+ uint32_t reserved2:16;
+ } bitfields7;
+ uint32_t ordinal7;
+ };
+
+ union {
+ struct {
+ uint32_t gds_heap_base:6;
+ uint32_t reserved3:5;
+ uint32_t gds_heap_size:6;
+ uint32_t reserved4:15;
+ } bitfields8;
+ uint32_t ordinal8;
+ };
+
+};
+#endif
+
+/*--------------------MES_RUN_LIST--------------------*/
+
+#ifndef PM4_MES_RUN_LIST_DEFINED
+#define PM4_MES_RUN_LIST_DEFINED
+
+struct pm4_mes_runlist {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:2;
+ uint32_t ib_base_lo:30;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t ib_base_hi;
+
+ union {
+ struct {
+ uint32_t ib_size:20;
+ uint32_t chain:1;
+ uint32_t offload_polling:1;
+ uint32_t reserved2:1;
+ uint32_t valid:1;
+ uint32_t process_cnt:4;
+ uint32_t reserved3:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+};
+#endif
+
+/*--------------------MES_MAP_PROCESS--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_DEFINED
+#define PM4_MES_MAP_PROCESS_DEFINED
+
+struct pm4_mes_map_process {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:8;
+ uint32_t diq_enable:1;
+ uint32_t process_quantum:7;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ uint32_t vm_context_page_table_base_addr_lo32;
+
+ uint32_t vm_context_page_table_base_addr_hi32;
+
+ uint32_t sh_mem_bases;
+
+ uint32_t sh_mem_config;
+
+ uint32_t sq_shader_tba_lo;
+
+ uint32_t sq_shader_tba_hi;
+
+ uint32_t sq_shader_tma_lo;
+
+ uint32_t sq_shader_tma_hi;
+
+ uint32_t reserved6;
+
+ uint32_t gds_addr_lo;
+
+ uint32_t gds_addr_hi;
+
+ union {
+ struct {
+ uint32_t num_gws:6;
+ uint32_t reserved7:1;
+ uint32_t sdma_enable:1;
+ uint32_t num_oac:4;
+ uint32_t reserved8:4;
+ uint32_t gds_size:6;
+ uint32_t num_queues:10;
+ } bitfields14;
+ uint32_t ordinal14;
+ };
+
+ uint32_t completion_signal_lo;
+
+ uint32_t completion_signal_hi;
+
+};
+
+#endif
+
+/*--------------------MES_MAP_PROCESS_VM--------------------*/
+
+#ifndef PM4_MES_MAP_PROCESS_VM_DEFINED
+#define PM4_MES_MAP_PROCESS_VM_DEFINED
+
+struct PM4_MES_MAP_PROCESS_VM {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ uint32_t reserved1;
+
+ uint32_t vm_context_cntl;
+
+ uint32_t reserved2;
+
+ uint32_t vm_context_page_table_end_addr_lo32;
+
+ uint32_t vm_context_page_table_end_addr_hi32;
+
+ uint32_t vm_context_page_table_start_addr_lo32;
+
+ uint32_t vm_context_page_table_start_addr_hi32;
+
+ uint32_t reserved3;
+
+ uint32_t reserved4;
+
+ uint32_t reserved5;
+
+ uint32_t reserved6;
+
+ uint32_t reserved7;
+
+ uint32_t reserved8;
+
+ uint32_t completion_signal_lo32;
+
+ uint32_t completion_signal_hi32;
+
+};
+#endif
+
+/*--------------------MES_MAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_MAP_QUEUES_VI_DEFINED
+#define PM4_MES_MAP_QUEUES_VI_DEFINED
+enum mes_map_queues_queue_sel_enum {
+ queue_sel__mes_map_queues__map_to_specified_queue_slots_vi = 0,
+queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi = 1
+};
+
+enum mes_map_queues_queue_type_enum {
+ queue_type__mes_map_queues__normal_compute_vi = 0,
+ queue_type__mes_map_queues__debug_interface_queue_vi = 1,
+ queue_type__mes_map_queues__normal_latency_static_queue_vi = 2,
+queue_type__mes_map_queues__low_latency_static_queue_vi = 3
+};
+
+enum mes_map_queues_alloc_format_enum {
+ alloc_format__mes_map_queues__one_per_pipe_vi = 0,
+alloc_format__mes_map_queues__all_on_one_pipe_vi = 1
+};
+
+enum mes_map_queues_engine_sel_enum {
+ engine_sel__mes_map_queues__compute_vi = 0,
+ engine_sel__mes_map_queues__sdma0_vi = 2,
+ engine_sel__mes_map_queues__sdma1_vi = 3
+};
+
+
+struct pm4_mes_map_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t reserved1:4;
+ enum mes_map_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:15;
+ enum mes_map_queues_queue_type_enum queue_type:3;
+ enum mes_map_queues_alloc_format_enum alloc_format:2;
+ enum mes_map_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved3:1;
+ uint32_t check_disable:1;
+ uint32_t doorbell_offset:26;
+ uint32_t reserved4:4;
+ } bitfields3;
+ uint32_t ordinal3;
+ };
+
+ uint32_t mqd_addr_lo;
+ uint32_t mqd_addr_hi;
+ uint32_t wptr_addr_lo;
+ uint32_t wptr_addr_hi;
+};
+#endif
+
+/*--------------------MES_QUERY_STATUS--------------------*/
+
+#ifndef PM4_MES_QUERY_STATUS_DEFINED
+#define PM4_MES_QUERY_STATUS_DEFINED
+enum mes_query_status_interrupt_sel_enum {
+ interrupt_sel__mes_query_status__completion_status = 0,
+ interrupt_sel__mes_query_status__process_status = 1,
+ interrupt_sel__mes_query_status__queue_status = 2
+};
+
+enum mes_query_status_command_enum {
+ command__mes_query_status__interrupt_only = 0,
+ command__mes_query_status__fence_only_immediate = 1,
+ command__mes_query_status__fence_only_after_write_ack = 2,
+ command__mes_query_status__fence_wait_for_write_ack_send_interrupt = 3
+};
+
+enum mes_query_status_engine_sel_enum {
+ engine_sel__mes_query_status__compute = 0,
+ engine_sel__mes_query_status__sdma0_queue = 2,
+ engine_sel__mes_query_status__sdma1_queue = 3
+};
+
+struct pm4_mes_query_status {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ uint32_t context_id:28;
+ enum mes_query_status_interrupt_sel_enum interrupt_sel:2;
+ enum mes_query_status_command_enum command:2;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved1:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved2:2;
+ uint32_t doorbell_offset:26;
+ enum mes_query_status_engine_sel_enum engine_sel:3;
+ uint32_t reserved3:1;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ uint32_t addr_lo;
+ uint32_t addr_hi;
+ uint32_t data_lo;
+ uint32_t data_hi;
+};
+#endif
+
+/*--------------------MES_UNMAP_QUEUES--------------------*/
+
+#ifndef PM4_MES_UNMAP_QUEUES_DEFINED
+#define PM4_MES_UNMAP_QUEUES_DEFINED
+enum mes_unmap_queues_action_enum {
+ action__mes_unmap_queues__preempt_queues = 0,
+ action__mes_unmap_queues__reset_queues = 1,
+ action__mes_unmap_queues__disable_process_queues = 2,
+ action__mes_unmap_queues__reserved = 3
+};
+
+enum mes_unmap_queues_queue_sel_enum {
+ queue_sel__mes_unmap_queues__perform_request_on_specified_queues = 0,
+ queue_sel__mes_unmap_queues__perform_request_on_pasid_queues = 1,
+ queue_sel__mes_unmap_queues__unmap_all_queues = 2,
+ queue_sel__mes_unmap_queues__unmap_all_non_static_queues = 3
+};
+
+enum mes_unmap_queues_engine_sel_enum {
+ engine_sel__mes_unmap_queues__compute = 0,
+ engine_sel__mes_unmap_queues__sdma0 = 2,
+ engine_sel__mes_unmap_queues__sdmal = 3
+};
+
+struct pm4_mes_unmap_queues {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /* header */
+ uint32_t ordinal1;
+ };
+
+ union {
+ struct {
+ enum mes_unmap_queues_action_enum action:2;
+ uint32_t reserved1:2;
+ enum mes_unmap_queues_queue_sel_enum queue_sel:2;
+ uint32_t reserved2:20;
+ enum mes_unmap_queues_engine_sel_enum engine_sel:3;
+ uint32_t num_queues:3;
+ } bitfields2;
+ uint32_t ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t pasid:16;
+ uint32_t reserved3:16;
+ } bitfields3a;
+ struct {
+ uint32_t reserved4:2;
+ uint32_t doorbell_offset0:26;
+ int32_t reserved5:4;
+ } bitfields3b;
+ uint32_t ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved6:2;
+ uint32_t doorbell_offset1:26;
+ uint32_t reserved7:4;
+ } bitfields4;
+ uint32_t ordinal4;
+ };
+
+ union {
+ struct {
+ uint32_t reserved8:2;
+ uint32_t doorbell_offset2:26;
+ uint32_t reserved9:4;
+ } bitfields5;
+ uint32_t ordinal5;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ uint32_t doorbell_offset3:26;
+ uint32_t reserved11:4;
+ } bitfields6;
+ uint32_t ordinal6;
+ };
+};
+#endif
+
+#ifndef PM4_MEC_RELEASE_MEM_DEFINED
+#define PM4_MEC_RELEASE_MEM_DEFINED
+
+enum mec_release_mem_event_index_enum {
+ event_index__mec_release_mem__end_of_pipe = 5,
+ event_index__mec_release_mem__shader_done = 6
+};
+
+enum mec_release_mem_cache_policy_enum {
+ cache_policy__mec_release_mem__lru = 0,
+ cache_policy__mec_release_mem__stream = 1
+};
+
+enum mec_release_mem_pq_exe_status_enum {
+ pq_exe_status__mec_release_mem__default = 0,
+ pq_exe_status__mec_release_mem__phase_update = 1
+};
+
+enum mec_release_mem_dst_sel_enum {
+ dst_sel__mec_release_mem__memory_controller = 0,
+ dst_sel__mec_release_mem__tc_l2 = 1,
+ dst_sel__mec_release_mem__queue_write_pointer_register = 2,
+ dst_sel__mec_release_mem__queue_write_pointer_poll_mask_bit = 3
+};
+
+enum mec_release_mem_int_sel_enum {
+ int_sel__mec_release_mem__none = 0,
+ int_sel__mec_release_mem__send_interrupt_only = 1,
+ int_sel__mec_release_mem__send_interrupt_after_write_confirm = 2,
+ int_sel__mec_release_mem__send_data_after_write_confirm = 3,
+ int_sel__mec_release_mem__unconditionally_send_int_ctxid = 4,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_32_bit_compare = 5,
+ int_sel__mec_release_mem__conditionally_send_int_ctxid_based_on_64_bit_compare = 6
+};
+
+enum mec_release_mem_data_sel_enum {
+ data_sel__mec_release_mem__none = 0,
+ data_sel__mec_release_mem__send_32_bit_low = 1,
+ data_sel__mec_release_mem__send_64_bit_data = 2,
+ data_sel__mec_release_mem__send_gpu_clock_counter = 3,
+ data_sel__mec_release_mem__send_cp_perfcounter_hi_lo = 4,
+ data_sel__mec_release_mem__store_gds_data_to_memory = 5
+};
+
+struct pm4_mec_release_mem {
+ union {
+ union PM4_MES_TYPE_3_HEADER header; /*header */
+ unsigned int ordinal1;
+ };
+
+ union {
+ struct {
+ unsigned int event_type:6;
+ unsigned int reserved1:2;
+ enum mec_release_mem_event_index_enum event_index:4;
+ unsigned int tcl1_vol_action_ena:1;
+ unsigned int tc_vol_action_ena:1;
+ unsigned int reserved2:1;
+ unsigned int tc_wb_action_ena:1;
+ unsigned int tcl1_action_ena:1;
+ unsigned int tc_action_ena:1;
+ uint32_t reserved3:1;
+ uint32_t tc_nc_action_ena:1;
+ uint32_t tc_wc_action_ena:1;
+ uint32_t tc_md_action_ena:1;
+ uint32_t reserved4:3;
+ enum mec_release_mem_cache_policy_enum cache_policy:2;
+ uint32_t reserved5:2;
+ enum mec_release_mem_pq_exe_status_enum pq_exe_status:1;
+ uint32_t reserved6:2;
+ } bitfields2;
+ unsigned int ordinal2;
+ };
+
+ union {
+ struct {
+ uint32_t reserved7:16;
+ enum mec_release_mem_dst_sel_enum dst_sel:2;
+ uint32_t reserved8:6;
+ enum mec_release_mem_int_sel_enum int_sel:3;
+ uint32_t reserved9:2;
+ enum mec_release_mem_data_sel_enum data_sel:3;
+ } bitfields3;
+ unsigned int ordinal3;
+ };
+
+ union {
+ struct {
+ uint32_t reserved10:2;
+ unsigned int address_lo_32b:30;
+ } bitfields4;
+ struct {
+ uint32_t reserved11:3;
+ uint32_t address_lo_64b:29;
+ } bitfields4b;
+ uint32_t reserved12;
+ unsigned int ordinal4;
+ };
+
+ union {
+ uint32_t address_hi;
+ uint32_t reserved13;
+ uint32_t ordinal5;
+ };
+
+ union {
+ uint32_t data_lo;
+ uint32_t cmp_data_lo;
+ struct {
+ uint32_t dw_offset:16;
+ uint32_t num_dwords:16;
+ } bitfields6c;
+ uint32_t reserved14;
+ uint32_t ordinal6;
+ };
+
+ union {
+ uint32_t data_hi;
+ uint32_t cmp_data_hi;
+ uint32_t reserved15;
+ uint32_t reserved16;
+ uint32_t ordinal7;
+ };
+
+ uint32_t int_ctxid;
+
+};
+
+#endif
+
+enum {
+ CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014
+};
+#endif
+
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 96a9cc0..5e3990b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -39,11 +39,37 @@
#include "amd_shared.h"
+#define KFD_MAX_RING_ENTRY_SIZE 8
+
#define KFD_SYSFS_FILE_MODE 0444
-#define KFD_MMAP_DOORBELL_MASK 0x8000000000000ull
-#define KFD_MMAP_EVENTS_MASK 0x4000000000000ull
-#define KFD_MMAP_RESERVED_MEM_MASK 0x2000000000000ull
+/* GPU ID hash width in bits */
+#define KFD_GPU_ID_HASH_WIDTH 16
+
+/* Use upper bits of mmap offset to store KFD driver specific information.
+ * BITS[63:62] - Encode MMAP type
+ * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to
+ * BITS[45:0] - MMAP offset value
+ *
+ * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
+ * defines are w.r.t to PAGE_SIZE
+ */
+#define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT)
+#define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT)
+#define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT)
+
+#define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT)
+#define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \
+ << KFD_MMAP_GPU_ID_SHIFT)
+#define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\
+ & KFD_MMAP_GPU_ID_MASK)
+#define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \
+ >> KFD_MMAP_GPU_ID_SHIFT)
+
+#define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT)
+#define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK)
/*
* When working with cp scheduler we should assign the HIQ manually or via
@@ -55,9 +81,6 @@
#define KFD_CIK_HIQ_PIPE 4
#define KFD_CIK_HIQ_QUEUE 0
-/* GPU ID hash width in bits */
-#define KFD_GPU_ID_HASH_WIDTH 16
-
/* Macro for allocating structures */
#define kfd_alloc_struct(ptr_to_struct) \
((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL))
@@ -116,6 +139,11 @@ extern int debug_largebar;
*/
extern int ignore_crat;
+/*
+ * Set sh_mem_config.retry_disable on Vega10
+ */
+extern int vega10_noretry;
+
/**
* enum kfd_sched_policy
*
@@ -148,6 +176,8 @@ enum cache_policy {
cache_policy_noncoherent
};
+#define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10)
+
struct kfd_event_interrupt_class {
bool (*interrupt_isr)(struct kfd_dev *dev,
const uint32_t *ih_ring_entry);
@@ -160,6 +190,7 @@ struct kfd_device_info {
const struct kfd_event_interrupt_class *event_interrupt_class;
unsigned int max_pasid_bits;
unsigned int max_no_of_hqd;
+ unsigned int doorbell_size;
size_t ih_ring_entry_size;
uint8_t num_of_watch_points;
uint16_t mqd_size_aligned;
@@ -173,6 +204,7 @@ struct kfd_mem_obj {
uint32_t range_end;
uint64_t gpu_addr;
uint32_t *cpu_ptr;
+ void *gtt_mem;
};
struct kfd_vmid_info {
@@ -364,7 +396,7 @@ struct queue_properties {
uint32_t queue_percent;
uint32_t *read_ptr;
uint32_t *write_ptr;
- uint32_t __iomem *doorbell_ptr;
+ void __iomem *doorbell_ptr;
uint32_t doorbell_off;
bool is_interop;
bool is_evicted;
@@ -427,6 +459,7 @@ struct queue {
uint32_t queue;
unsigned int sdma_id;
+ unsigned int doorbell_id;
struct kfd_process *process;
struct kfd_dev *device;
@@ -501,6 +534,9 @@ struct qcm_process_device {
/* IB memory */
uint64_t ib_base;
void *ib_kaddr;
+
+ /* doorbell resources per process per device */
+ unsigned long *doorbell_bitmap;
};
/* KFD Memory Eviction */
@@ -512,6 +548,8 @@ struct qcm_process_device {
/* Approx. time before evicting the process again */
#define PROCESS_ACTIVE_TIME_MS 10
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
struct dma_fence *fence);
@@ -681,6 +719,8 @@ struct kfd_process *kfd_get_process(const struct task_struct *);
struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
void kfd_unref_process(struct kfd_process *p);
+int kfd_process_evict_queues(struct kfd_process *p);
+int kfd_process_restore_queues(struct kfd_process *p);
void kfd_suspend_all_processes(void);
int kfd_resume_all_processes(void);
@@ -693,7 +733,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma);
/* KFD process API for creating and translating handles */
@@ -721,17 +761,20 @@ unsigned int kfd_pasid_alloc(void);
void kfd_pasid_free(unsigned int pasid);
/* Doorbells */
+size_t kfd_doorbell_process_slice(struct kfd_dev *kfd);
int kfd_doorbell_init(struct kfd_dev *kfd);
void kfd_doorbell_fini(struct kfd_dev *kfd);
-int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma);
-u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
+int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
+ struct vm_area_struct *vma);
+void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
unsigned int *doorbell_off);
void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr);
u32 read_kernel_doorbell(u32 __iomem *db);
-void write_kernel_doorbell(u32 __iomem *db, u32 value);
-unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd,
+void write_kernel_doorbell(void __iomem *db, u32 value);
+void write_kernel_doorbell64(void __iomem *db, u64 value);
+unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,
struct kfd_process *process,
- unsigned int queue_id);
+ unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
@@ -788,6 +831,8 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type,
struct kfd_dev *dev);
+struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
+ struct kfd_dev *dev);
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev);
void device_queue_manager_uninit(struct device_queue_manager *dqm);
struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
@@ -832,8 +877,42 @@ struct packet_manager {
bool allocated;
struct kfd_mem_obj *ib_buffer_obj;
unsigned int ib_size_bytes;
+
+ const struct packet_manager_funcs *pmf;
+};
+
+struct packet_manager_funcs {
+ /* Support ASIC-specific packet formats for PM4 packets */
+ int (*map_process)(struct packet_manager *pm, uint32_t *buffer,
+ struct qcm_process_device *qpd);
+ int (*runlist)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t ib, size_t ib_size_in_dwords, bool chain);
+ int (*set_resources)(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
+ int (*map_queues)(struct packet_manager *pm, uint32_t *buffer,
+ struct queue *q, bool is_static);
+ int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer,
+ enum kfd_queue_type type,
+ enum kfd_unmap_queues_filter mode,
+ uint32_t filter_param, bool reset,
+ unsigned int sdma_engine);
+ int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
+ uint64_t fence_address, uint32_t fence_value);
+ int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
+
+ /* Packet sizes */
+ int map_process_size;
+ int runlist_size;
+ int set_resources_size;
+ int map_queues_size;
+ int unmap_queues_size;
+ int query_status_size;
+ int release_mem_size;
};
+extern const struct packet_manager_funcs kfd_vi_pm_funcs;
+extern const struct packet_manager_funcs kfd_v9_pm_funcs;
+
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm);
void pm_uninit(struct packet_manager *pm);
int pm_send_set_resources(struct packet_manager *pm,
@@ -849,12 +928,17 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
void pm_release_ib(struct packet_manager *pm);
-uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+/* Following PM funcs can be shared among VI and AI */
+unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);
+int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer,
+ struct scheduling_resources *res);
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
extern const struct kfd_event_interrupt_class event_interrupt_class_cik;
+extern const struct kfd_event_interrupt_class event_interrupt_class_v9;
+
extern const struct kfd_device_global_init_class device_global_init_class_cik;
void kfd_event_init_process(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 1711ad0..1d80b4f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -332,6 +332,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
free_pages((unsigned long)pdd->qpd.cwsr_kaddr,
get_order(KFD_CWSR_TBA_TMA_SIZE));
+ kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfree(pdd);
@@ -451,7 +452,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base)
continue;
- offset = (dev->id | KFD_MMAP_RESERVED_MEM_MASK) << PAGE_SHIFT;
+ offset = (KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id))
+ << PAGE_SHIFT;
qpd->tba_addr = (int64_t)vm_mmap(filep, 0,
KFD_CWSR_TBA_TMA_SIZE, PROT_READ | PROT_EXEC,
MAP_SHARED, offset);
@@ -585,6 +587,31 @@ err_alloc_process:
return ERR_PTR(err);
}
+static int init_doorbell_bitmap(struct qcm_process_device *qpd,
+ struct kfd_dev *dev)
+{
+ unsigned int i;
+
+ if (!KFD_IS_SOC15(dev->device_info->asic_family))
+ return 0;
+
+ qpd->doorbell_bitmap =
+ kzalloc(DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
+ BITS_PER_BYTE), GFP_KERNEL);
+ if (!qpd->doorbell_bitmap)
+ return -ENOMEM;
+
+ /* Mask out any reserved doorbells */
+ for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
+ if ((dev->shared_resources.reserved_doorbell_mask & i) ==
+ dev->shared_resources.reserved_doorbell_val) {
+ set_bit(i, qpd->doorbell_bitmap);
+ pr_debug("reserved doorbell 0x%03x\n", i);
+ }
+
+ return 0;
+}
+
struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev,
struct kfd_process *p)
{
@@ -606,6 +633,12 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
+ if (init_doorbell_bitmap(&pdd->qpd, dev)) {
+ pr_err("Failed to init doorbell for process\n");
+ kfree(pdd);
+ return NULL;
+ }
+
pdd->dev = dev;
INIT_LIST_HEAD(&pdd->qpd.queues_list);
INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
@@ -808,7 +841,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
* Eviction is reference-counted per process-device. This means multiple
* evictions from different sources can be nested safely.
*/
-static int process_evict_queues(struct kfd_process *p)
+int kfd_process_evict_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r = 0;
@@ -844,7 +877,7 @@ fail:
}
/* process_restore_queues - Restore all user queues of a process */
-static int process_restore_queues(struct kfd_process *p)
+int kfd_process_restore_queues(struct kfd_process *p)
{
struct kfd_process_device *pdd;
int r, ret = 0;
@@ -886,7 +919,7 @@ static void evict_process_worker(struct work_struct *work)
flush_delayed_work(&p->restore_work);
pr_debug("Started evicting pasid %d\n", p->pasid);
- ret = process_evict_queues(p);
+ ret = kfd_process_evict_queues(p);
if (!ret) {
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -946,7 +979,7 @@ static void restore_process_worker(struct work_struct *work)
return;
}
- ret = process_restore_queues(p);
+ ret = kfd_process_restore_queues(p);
if (!ret)
pr_debug("Finished restoring pasid %d\n", p->pasid);
else
@@ -963,7 +996,7 @@ void kfd_suspend_all_processes(void)
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
- if (process_evict_queues(p))
+ if (kfd_process_evict_queues(p))
pr_err("Failed to suspend process %d\n", p->pasid);
dma_fence_signal(p->ef);
dma_fence_put(p->ef);
@@ -989,15 +1022,12 @@ int kfd_resume_all_processes(void)
return ret;
}
-int kfd_reserved_mem_mmap(struct kfd_process *process,
+int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
- struct kfd_dev *dev = kfd_device_by_id(vma->vm_pgoff);
struct kfd_process_device *pdd;
struct qcm_process_device *qpd;
- if (!dev)
- return -EINVAL;
if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
pr_err("Incorrect CWSR mapping size.\n");
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 7817e32..d65ce04 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -119,9 +119,6 @@ static int create_cp_queue(struct process_queue_manager *pqm,
/* Doorbell initialized in user space*/
q_properties->doorbell_ptr = NULL;
- q_properties->doorbell_off =
- kfd_queue_id_to_doorbell(dev, pqm->process, qid);
-
/* let DQM handle it*/
q_properties->vmid = 0;
q_properties->queue_id = qid;
@@ -244,10 +241,20 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
if (retval != 0) {
- pr_err("DQM create queue failed\n");
+ pr_err("Pasid %d DQM create queue %d failed. ret %d\n",
+ pqm->process->pasid, type, retval);
goto err_create_queue;
}
+ if (q)
+ /* Return the doorbell offset within the doorbell page
+ * to the caller so it can be passed up to user mode
+ * (in bytes).
+ */
+ properties->doorbell_off =
+ (q->properties.doorbell_off * sizeof(uint32_t)) &
+ (kfd_doorbell_process_slice(dev) - 1);
+
pr_debug("PQM After DQM create queue\n");
list_add(&pqn->process_queue_list, &pqm->queues);
@@ -313,8 +320,11 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid)
dqm = pqn->q->device->dqm;
retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
if (retval) {
- pr_debug("Destroy queue failed, returned %d\n", retval);
- goto err_destroy_queue;
+ pr_err("Pasid %d destroy queue %d failed, ret %d\n",
+ pqm->process->pasid,
+ pqn->q->properties.queue_id, retval);
+ if (retval != -ETIME)
+ goto err_destroy_queue;
}
uninit_queue(pqn->q);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
index a5315d4..6dcd621 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c
@@ -36,8 +36,8 @@ void print_queue_properties(struct queue_properties *q)
pr_debug("Queue Address: 0x%llX\n", q->queue_address);
pr_debug("Queue Id: %u\n", q->queue_id);
pr_debug("Queue Process Vmid: %u\n", q->vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->doorbell_off);
}
@@ -53,8 +53,8 @@ void print_queue(struct queue *q)
pr_debug("Queue Address: 0x%llX\n", q->properties.queue_address);
pr_debug("Queue Id: %u\n", q->properties.queue_id);
pr_debug("Queue Process Vmid: %u\n", q->properties.vmid);
- pr_debug("Queue Read Pointer: 0x%p\n", q->properties.read_ptr);
- pr_debug("Queue Write Pointer: 0x%p\n", q->properties.write_ptr);
+ pr_debug("Queue Read Pointer: 0x%px\n", q->properties.read_ptr);
+ pr_debug("Queue Write Pointer: 0x%px\n", q->properties.write_ptr);
pr_debug("Queue Doorbell Pointer: 0x%p\n", q->properties.doorbell_ptr);
pr_debug("Queue Doorbell Offset: %u\n", q->properties.doorbell_off);
pr_debug("Queue MQD Address: 0x%p\n", q->mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index ac28abc..bc95d4df 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1239,6 +1239,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
break;
+ case CHIP_VEGA10:
+ case CHIP_RAVEN:
+ dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_2_0 <<
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) &
+ HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK);
+ break;
default:
WARN(1, "Unexpected ASIC family %u",
dev->gpu->device_info->asic_family);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index eb54cfc..7d9c3f9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -45,6 +45,7 @@
#define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0
#define HSA_CAP_DOORBELL_TYPE_1_0 0x1
+#define HSA_CAP_DOORBELL_TYPE_2_0 0x2
#define HSA_CAP_AQL_QUEUE_DOUBLE_MAP 0x00004000
struct kfd_node_properties {
diff --git a/drivers/gpu/drm/amd/amdkfd/soc15_int.h b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
new file mode 100644
index 0000000..0bc0b25
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/soc15_int.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef HSA_SOC15_INT_H_INCLUDED
+#define HSA_SOC15_INT_H_INCLUDED
+
+#include "soc15_ih_clientid.h"
+
+#define SOC15_INTSRC_CP_END_OF_PIPE 181
+#define SOC15_INTSRC_CP_BAD_OPCODE 183
+#define SOC15_INTSRC_SQ_INTERRUPT_MSG 239
+#define SOC15_INTSRC_VMC_FAULT 0
+#define SOC15_INTSRC_SDMA_TRAP 224
+
+
+#define SOC15_CLIENT_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) & 0xff)
+#define SOC15_SOURCE_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 8 & 0xff)
+#define SOC15_RING_ID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 16 & 0xff)
+#define SOC15_VMID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 24 & 0xf)
+#define SOC15_VMID_TYPE_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[0]) >> 31 & 0x1)
+#define SOC15_PASID_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[3]) & 0xffff)
+#define SOC15_CONTEXT_ID0_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[4]))
+#define SOC15_CONTEXT_ID1_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[5]))
+#define SOC15_CONTEXT_ID2_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[6]))
+#define SOC15_CONTEXT_ID3_FROM_IH_ENTRY(entry) (le32_to_cpu(entry[7]))
+
+#endif
+
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 237289a..5733fbe 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -100,6 +100,21 @@ struct kgd2kfd_shared_resources {
/* Bit n == 1 means Queue n is available for KFD */
DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
+ /* Doorbell assignments (SOC15 and later chips only). Only
+ * specific doorbells are routed to each SDMA engine. Others
+ * are routed to IH and VCN. They are not usable by the CP.
+ *
+ * Any doorbell number D that satisfies the following condition
+ * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
+ *
+ * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
+ * doorbells 0x0f0-0x0f7 and 0x2f-0x2f7 are reserved, that means
+ * mask would be set to 0x1f8 and val set to 0x0f0.
+ */
+ unsigned int sdma_doorbell[2][2];
+ unsigned int reserved_doorbell_mask;
+ unsigned int reserved_doorbell_val;
+
/* Base address of doorbell aperture. */
phys_addr_t doorbell_physical_address;
@@ -173,8 +188,6 @@ struct tile_config {
* @set_pasid_vmid_mapping: Exposes pasid/vmid pair to the H/W for no cp
* scheduling mode. Only used for no cp scheduling mode.
*
- * @init_pipeline: Initialized the compute pipelines.
- *
* @hqd_load: Loads the mqd structure to a H/W hqd slot. used only for no cp
* sceduling mode.
*
@@ -274,9 +287,6 @@ struct kfd2kgd_calls {
int (*set_pasid_vmid_mapping)(struct kgd_dev *kgd, unsigned int pasid,
unsigned int vmid);
- int (*init_pipeline)(struct kgd_dev *kgd, uint32_t pipe_id,
- uint32_t hpd_size, uint64_t hpd_gpu_addr);
-
int (*init_interrupts)(struct kgd_dev *kgd, uint32_t pipe_id);
int (*hqd_load)(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
@@ -382,6 +392,10 @@ struct kfd2kgd_calls {
*
* @resume: Notifies amdkfd about a resume action done to a kgd device
*
+ * @quiesce_mm: Quiesce all user queue access to specified MM address space
+ *
+ * @resume_mm: Resume user queue access to specified MM address space
+ *
* @schedule_evict_and_restore_process: Schedules work queue that will prepare
* for safe eviction of KFD BOs that belong to the specified process.
*
@@ -399,6 +413,8 @@ struct kgd2kfd_calls {
void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
void (*suspend)(struct kfd_dev *kfd);
int (*resume)(struct kfd_dev *kfd);
+ int (*quiesce_mm)(struct mm_struct *mm);
+ int (*resume_mm)(struct mm_struct *mm);
int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
struct dma_fence *fence);
};
diff --git a/drivers/gpu/drm/amd/include/v9_structs.h b/drivers/gpu/drm/amd/include/v9_structs.h
index 2fb25ab..ceaf493 100644
--- a/drivers/gpu/drm/amd/include/v9_structs.h
+++ b/drivers/gpu/drm/amd/include/v9_structs.h
@@ -29,10 +29,10 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_rb_base;
uint32_t sdmax_rlcx_rb_base_hi;
uint32_t sdmax_rlcx_rb_rptr;
+ uint32_t sdmax_rlcx_rb_rptr_hi;
uint32_t sdmax_rlcx_rb_wptr;
+ uint32_t sdmax_rlcx_rb_wptr_hi;
uint32_t sdmax_rlcx_rb_wptr_poll_cntl;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
- uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
uint32_t sdmax_rlcx_rb_rptr_addr_hi;
uint32_t sdmax_rlcx_rb_rptr_addr_lo;
uint32_t sdmax_rlcx_ib_cntl;
@@ -44,29 +44,29 @@ struct v9_sdma_mqd {
uint32_t sdmax_rlcx_skip_cntl;
uint32_t sdmax_rlcx_context_status;
uint32_t sdmax_rlcx_doorbell;
- uint32_t sdmax_rlcx_virtual_addr;
- uint32_t sdmax_rlcx_ape1_cntl;
+ uint32_t sdmax_rlcx_status;
uint32_t sdmax_rlcx_doorbell_log;
- uint32_t reserved_22;
- uint32_t reserved_23;
- uint32_t reserved_24;
- uint32_t reserved_25;
- uint32_t reserved_26;
- uint32_t reserved_27;
- uint32_t reserved_28;
- uint32_t reserved_29;
- uint32_t reserved_30;
- uint32_t reserved_31;
- uint32_t reserved_32;
- uint32_t reserved_33;
- uint32_t reserved_34;
- uint32_t reserved_35;
- uint32_t reserved_36;
- uint32_t reserved_37;
- uint32_t reserved_38;
- uint32_t reserved_39;
- uint32_t reserved_40;
- uint32_t reserved_41;
+ uint32_t sdmax_rlcx_watermark;
+ uint32_t sdmax_rlcx_doorbell_offset;
+ uint32_t sdmax_rlcx_csa_addr_lo;
+ uint32_t sdmax_rlcx_csa_addr_hi;
+ uint32_t sdmax_rlcx_ib_sub_remain;
+ uint32_t sdmax_rlcx_preempt;
+ uint32_t sdmax_rlcx_dummy_reg;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_hi;
+ uint32_t sdmax_rlcx_rb_wptr_poll_addr_lo;
+ uint32_t sdmax_rlcx_rb_aql_cntl;
+ uint32_t sdmax_rlcx_minor_ptr_update;
+ uint32_t sdmax_rlcx_midcmd_data0;
+ uint32_t sdmax_rlcx_midcmd_data1;
+ uint32_t sdmax_rlcx_midcmd_data2;
+ uint32_t sdmax_rlcx_midcmd_data3;
+ uint32_t sdmax_rlcx_midcmd_data4;
+ uint32_t sdmax_rlcx_midcmd_data5;
+ uint32_t sdmax_rlcx_midcmd_data6;
+ uint32_t sdmax_rlcx_midcmd_data7;
+ uint32_t sdmax_rlcx_midcmd_data8;
+ uint32_t sdmax_rlcx_midcmd_cntl;
uint32_t reserved_42;
uint32_t reserved_43;
uint32_t reserved_44;
diff --git a/drivers/gpu/drm/bridge/adv7511/Kconfig b/drivers/gpu/drm/bridge/adv7511/Kconfig
index 592b9d2..944e440 100644
--- a/drivers/gpu/drm/bridge/adv7511/Kconfig
+++ b/drivers/gpu/drm/bridge/adv7511/Kconfig
@@ -1,5 +1,5 @@
config DRM_I2C_ADV7511
- tristate "AV7511 encoder"
+ tristate "ADV7511 encoder"
depends on OF
select DRM_KMS_HELPER
select REGMAP_I2C
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
index 2614cea..73021b3 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c
@@ -1127,7 +1127,7 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
}
if (adv7511->gpio_pd) {
- mdelay(5);
+ usleep_range(5000, 6000);
gpiod_set_value_cansleep(adv7511->gpio_pd, 0);
}
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 498d594..9837c8d 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -56,7 +56,9 @@ static int dumb_vga_get_modes(struct drm_connector *connector)
}
drm_mode_connector_update_edid_property(connector, edid);
- return drm_add_edid_modes(connector, edid);
+ ret = drm_add_edid_modes(connector, edid);
+ kfree(edid);
+ return ret;
fallback:
/*
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 3c181d2..26a22f5 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1425,7 +1425,9 @@ drm_atomic_set_crtc_for_plane(struct drm_plane_state *plane_state,
{
struct drm_plane *plane = plane_state->plane;
struct drm_crtc_state *crtc_state;
-
+ /* Nothing to do for same crtc*/
+ if (plane_state->crtc == crtc)
+ return 0;
if (plane_state->crtc) {
crtc_state = drm_atomic_get_crtc_state(plane_state->state,
plane_state->crtc);
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 9cb2209..130da51 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -766,7 +766,7 @@ int drm_atomic_helper_check_plane_state(struct drm_plane_state *plane_state,
if (crtc_state->enable)
drm_mode_get_hv_timing(&crtc_state->mode, &clip.x2, &clip.y2);
- plane_state->visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale);
+ plane_state->visible = drm_rect_clip_scaled(src, dst, &clip);
drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16, rotation);
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index b3cde89..9b9ba5d 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -1069,7 +1069,7 @@ int drm_mode_create_tv_properties(struct drm_device *dev,
goto nomem;
for (i = 0; i < num_modes; i++)
- drm_property_add_enum(dev->mode_config.tv_mode_property, i,
+ drm_property_add_enum(dev->mode_config.tv_mode_property,
i, modes[i]);
dev->mode_config.tv_brightness_property =
@@ -1156,7 +1156,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
{
struct drm_device *dev = connector->dev;
struct drm_property *scaling_mode_property;
- int i, j = 0;
+ int i;
const unsigned valid_scaling_mode_mask =
(1U << ARRAY_SIZE(drm_scaling_mode_enum_list)) - 1;
@@ -1177,7 +1177,7 @@ int drm_connector_attach_scaling_mode_property(struct drm_connector *connector,
if (!(BIT(i) & scaling_mode_mask))
continue;
- ret = drm_property_add_enum(scaling_mode_property, j++,
+ ret = drm_property_add_enum(scaling_mode_property,
drm_scaling_mode_enum_list[i].type,
drm_scaling_mode_enum_list[i].name);
@@ -1531,8 +1531,10 @@ static struct drm_encoder *drm_connector_get_encoder(struct drm_connector *conne
return connector->encoder;
}
-static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
- const struct drm_file *file_priv)
+static bool
+drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
+ const struct list_head *export_list,
+ const struct drm_file *file_priv)
{
/*
* If user-space hasn't configured the driver to expose the stereo 3D
@@ -1540,6 +1542,23 @@ static bool drm_mode_expose_to_userspace(const struct drm_display_mode *mode,
*/
if (!file_priv->stereo_allowed && drm_mode_is_stereo(mode))
return false;
+ /*
+ * If user-space hasn't configured the driver to expose the modes
+ * with aspect-ratio, don't expose them. However if such a mode
+ * is unique, let it be exposed, but reset the aspect-ratio flags
+ * while preparing the list of user-modes.
+ */
+ if (!file_priv->aspect_ratio_allowed) {
+ struct drm_display_mode *mode_itr;
+
+ list_for_each_entry(mode_itr, export_list, export_head)
+ if (drm_mode_match(mode_itr, mode,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_CLOCK |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS))
+ return false;
+ }
return true;
}
@@ -1559,6 +1578,7 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
struct drm_mode_modeinfo u_mode;
struct drm_mode_modeinfo __user *mode_ptr;
uint32_t __user *encoder_ptr;
+ LIST_HEAD(export_list);
if (!drm_core_check_feature(dev, DRIVER_MODESET))
return -EINVAL;
@@ -1607,21 +1627,31 @@ int drm_mode_getconnector(struct drm_device *dev, void *data,
/* delayed so we get modes regardless of pre-fill_modes state */
list_for_each_entry(mode, &connector->modes, head)
- if (drm_mode_expose_to_userspace(mode, file_priv))
+ if (drm_mode_expose_to_userspace(mode, &export_list,
+ file_priv)) {
+ list_add_tail(&mode->export_head, &export_list);
mode_count++;
+ }
/*
* This ioctl is called twice, once to determine how much space is
* needed, and the 2nd time to fill it.
+ * The modes that need to be exposed to the user are maintained in the
+ * 'export_list'. When the ioctl is called first time to determine the,
+ * space, the export_list gets filled, to find the no.of modes. In the
+ * 2nd time, the user modes are filled, one by one from the export_list.
*/
if ((out_resp->count_modes >= mode_count) && mode_count) {
copied = 0;
mode_ptr = (struct drm_mode_modeinfo __user *)(unsigned long)out_resp->modes_ptr;
- list_for_each_entry(mode, &connector->modes, head) {
- if (!drm_mode_expose_to_userspace(mode, file_priv))
- continue;
-
+ list_for_each_entry(mode, &export_list, export_head) {
drm_mode_convert_to_umode(&u_mode, mode);
+ /*
+ * Reset aspect ratio flags of user-mode, if modes with
+ * aspect-ratio are not supported.
+ */
+ if (!file_priv->aspect_ratio_allowed)
+ u_mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
if (copy_to_user(mode_ptr + copied,
&u_mode, sizeof(u_mode))) {
ret = -EFAULT;
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index a231dd5..98a36e6 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -449,6 +449,8 @@ int drm_mode_getcrtc(struct drm_device *dev,
crtc_resp->mode_valid = 0;
}
}
+ if (!file_priv->aspect_ratio_allowed)
+ crtc_resp->mode.flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
drm_modeset_unlock(&crtc->mutex);
return 0;
@@ -628,6 +630,13 @@ retry:
ret = -ENOMEM;
goto out;
}
+ if (!file_priv->aspect_ratio_allowed &&
+ (crtc_req->mode.flags & DRM_MODE_FLAG_PIC_AR_MASK) != DRM_MODE_FLAG_PIC_AR_NONE) {
+ DRM_DEBUG_KMS("Unexpected aspect-ratio flag bits\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = drm_mode_convert_umode(dev, mode, &crtc_req->mode);
if (ret) {
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index ffe14ec..36c7609 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -119,18 +119,32 @@ u8 drm_dp_get_adjust_request_pre_emphasis(const u8 link_status[DP_LINK_STATUS_SI
EXPORT_SYMBOL(drm_dp_get_adjust_request_pre_emphasis);
void drm_dp_link_train_clock_recovery_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
- if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+ int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+ DP_TRAINING_AUX_RD_MASK;
+
+ if (rd_interval > 4)
+ DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+ rd_interval);
+
+ if (rd_interval == 0 || dpcd[DP_DPCD_REV] >= DP_DPCD_REV_14)
udelay(100);
else
- mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+ mdelay(rd_interval * 4);
}
EXPORT_SYMBOL(drm_dp_link_train_clock_recovery_delay);
void drm_dp_link_train_channel_eq_delay(const u8 dpcd[DP_RECEIVER_CAP_SIZE]) {
- if (dpcd[DP_TRAINING_AUX_RD_INTERVAL] == 0)
+ int rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+ DP_TRAINING_AUX_RD_MASK;
+
+ if (rd_interval > 4)
+ DRM_DEBUG_KMS("AUX interval %d, out of range (max 4)\n",
+ rd_interval);
+
+ if (rd_interval == 0)
udelay(400);
else
- mdelay(dpcd[DP_TRAINING_AUX_RD_INTERVAL] * 4);
+ mdelay(rd_interval * 4);
}
EXPORT_SYMBOL(drm_dp_link_train_channel_eq_delay);
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 32a83b4..f6910eb 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -99,8 +99,6 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
return &dev->primary;
case DRM_MINOR_RENDER:
return &dev->render;
- case DRM_MINOR_CONTROL:
- return &dev->control;
default:
BUG();
}
@@ -567,7 +565,6 @@ err_ctxbitmap:
err_minors:
drm_minor_free(dev, DRM_MINOR_PRIMARY);
drm_minor_free(dev, DRM_MINOR_RENDER);
- drm_minor_free(dev, DRM_MINOR_CONTROL);
drm_fs_inode_free(dev->anon_inode);
err_free:
mutex_destroy(&dev->master_mutex);
@@ -603,7 +600,6 @@ void drm_dev_fini(struct drm_device *dev)
drm_minor_free(dev, DRM_MINOR_PRIMARY);
drm_minor_free(dev, DRM_MINOR_RENDER);
- drm_minor_free(dev, DRM_MINOR_CONTROL);
mutex_destroy(&dev->master_mutex);
mutex_destroy(&dev->ctxlist_mutex);
@@ -796,10 +792,6 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
mutex_lock(&drm_global_mutex);
- ret = drm_minor_register(dev, DRM_MINOR_CONTROL);
- if (ret)
- goto err_minors;
-
ret = drm_minor_register(dev, DRM_MINOR_RENDER);
if (ret)
goto err_minors;
@@ -837,7 +829,6 @@ err_minors:
remove_compat_control_link(dev);
drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
drm_minor_unregister(dev, DRM_MINOR_RENDER);
- drm_minor_unregister(dev, DRM_MINOR_CONTROL);
out_unlock:
mutex_unlock(&drm_global_mutex);
return ret;
@@ -882,7 +873,6 @@ void drm_dev_unregister(struct drm_device *dev)
remove_compat_control_link(dev);
drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
drm_minor_unregister(dev, DRM_MINOR_RENDER);
- drm_minor_unregister(dev, DRM_MINOR_CONTROL);
}
EXPORT_SYMBOL(drm_dev_unregister);
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 08d33b4..40e1e24 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2930,11 +2930,15 @@ cea_mode_alternate_timings(u8 vic, struct drm_display_mode *mode)
static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_match,
unsigned int clock_tolerance)
{
+ unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
u8 vic;
if (!to_match->clock)
return 0;
+ if (to_match->picture_aspect_ratio)
+ match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
struct drm_display_mode cea_mode = edid_cea_modes[vic];
unsigned int clock1, clock2;
@@ -2948,7 +2952,7 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m
continue;
do {
- if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode))
+ if (drm_mode_match(to_match, &cea_mode, match_flags))
return vic;
} while (cea_mode_alternate_timings(vic, &cea_mode));
}
@@ -2965,11 +2969,15 @@ static u8 drm_match_cea_mode_clock_tolerance(const struct drm_display_mode *to_m
*/
u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
{
+ unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
u8 vic;
if (!to_match->clock)
return 0;
+ if (to_match->picture_aspect_ratio)
+ match_flags |= DRM_MODE_MATCH_ASPECT_RATIO;
+
for (vic = 1; vic < ARRAY_SIZE(edid_cea_modes); vic++) {
struct drm_display_mode cea_mode = edid_cea_modes[vic];
unsigned int clock1, clock2;
@@ -2983,7 +2991,7 @@ u8 drm_match_cea_mode(const struct drm_display_mode *to_match)
continue;
do {
- if (drm_mode_equal_no_clocks_no_stereo(to_match, &cea_mode))
+ if (drm_mode_match(to_match, &cea_mode, match_flags))
return vic;
} while (cea_mode_alternate_timings(vic, &cea_mode));
}
@@ -3030,6 +3038,7 @@ hdmi_mode_alternate_clock(const struct drm_display_mode *hdmi_mode)
static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_match,
unsigned int clock_tolerance)
{
+ unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
u8 vic;
if (!to_match->clock)
@@ -3047,7 +3056,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_
abs(to_match->clock - clock2) > clock_tolerance)
continue;
- if (drm_mode_equal_no_clocks(to_match, hdmi_mode))
+ if (drm_mode_match(to_match, hdmi_mode, match_flags))
return vic;
}
@@ -3064,6 +3073,7 @@ static u8 drm_match_hdmi_mode_clock_tolerance(const struct drm_display_mode *to_
*/
static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
{
+ unsigned int match_flags = DRM_MODE_MATCH_TIMINGS | DRM_MODE_MATCH_FLAGS;
u8 vic;
if (!to_match->clock)
@@ -3079,7 +3089,7 @@ static u8 drm_match_hdmi_mode(const struct drm_display_mode *to_match)
if ((KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock1) ||
KHZ2PICOS(to_match->clock) == KHZ2PICOS(clock2)) &&
- drm_mode_equal_no_clocks_no_stereo(to_match, hdmi_mode))
+ drm_mode_match(to_match, hdmi_mode, match_flags))
return vic;
}
return 0;
@@ -4823,6 +4833,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
const struct drm_display_mode *mode,
bool is_hdmi2_sink)
{
+ enum hdmi_picture_aspect picture_aspect;
int err;
if (!frame || !mode)
@@ -4865,13 +4876,23 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,
* Populate picture aspect ratio from either
* user input (if specified) or from the CEA mode list.
*/
- if (mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_4_3 ||
- mode->picture_aspect_ratio == HDMI_PICTURE_ASPECT_16_9)
- frame->picture_aspect = mode->picture_aspect_ratio;
- else if (frame->video_code > 0)
- frame->picture_aspect = drm_get_cea_aspect_ratio(
- frame->video_code);
+ picture_aspect = mode->picture_aspect_ratio;
+ if (picture_aspect == HDMI_PICTURE_ASPECT_NONE)
+ picture_aspect = drm_get_cea_aspect_ratio(frame->video_code);
+
+ /*
+ * The infoframe can't convey anything but none, 4:3
+ * and 16:9, so if the user has asked for anything else
+ * we can only satisfy it by specifying the right VIC.
+ */
+ if (picture_aspect > HDMI_PICTURE_ASPECT_16_9) {
+ if (picture_aspect !=
+ drm_get_cea_aspect_ratio(frame->video_code))
+ return -EINVAL;
+ picture_aspect = HDMI_PICTURE_ASPECT_NONE;
+ }
+ frame->picture_aspect = picture_aspect;
frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 0646b10..2ee1eaa 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -2183,7 +2183,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper,
for (j = 0; j < i; j++) {
if (!enabled[j])
continue;
- if (!drm_mode_equal(modes[j], modes[i]))
+ if (!drm_mode_match(modes[j], modes[i],
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_CLOCK |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS))
can_clone = false;
}
}
@@ -2203,7 +2207,11 @@ static bool drm_target_cloned(struct drm_fb_helper *fb_helper,
fb_helper_conn = fb_helper->connector_info[i];
list_for_each_entry(mode, &fb_helper_conn->connector->modes, head) {
- if (drm_mode_equal(mode, dmt_mode))
+ if (drm_mode_match(mode, dmt_mode,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_CLOCK |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS))
modes[i] = mode;
}
if (!modes[i])
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index 8c4d32a..bfedcef 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -484,8 +484,7 @@ int drm_mode_getfb(struct drm_device *dev,
* backwards-compatibility reasons, we cannot make GET_FB() privileged,
* so just return an invalid handle for non-masters.
*/
- if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN) &&
- !drm_is_control_client(file_priv)) {
+ if (!drm_is_current_master(file_priv) && !capable(CAP_SYS_ADMIN)) {
r->handle = 0;
ret = 0;
goto out;
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index f8e96e6..67b1fca 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -105,7 +105,7 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
.desc = compat_ptr(v32.desc),
};
err = drm_ioctl_kernel(file, drm_version, &v,
- DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW);
+ DRM_UNLOCKED|DRM_RENDER_ALLOW);
if (err)
return err;
@@ -885,7 +885,7 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
return -EFAULT;
err = drm_ioctl_kernel(file, drm_mode_addfb2, &req64,
- DRM_CONTROL_ALLOW|DRM_UNLOCKED);
+ DRM_UNLOCKED);
if (err)
return err;
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index af78291..0d4cfb2 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -324,6 +324,15 @@ drm_setclientcap(struct drm_device *dev, void *data, struct drm_file *file_priv)
return -EINVAL;
file_priv->atomic = req->value;
file_priv->universal_planes = req->value;
+ /*
+ * No atomic user-space blows up on aspect ratio mode bits.
+ */
+ file_priv->aspect_ratio_allowed = req->value;
+ break;
+ case DRM_CLIENT_CAP_ASPECT_RATIO:
+ if (req->value > 1)
+ return -EINVAL;
+ file_priv->aspect_ratio_allowed = req->value;
break;
default:
return -EINVAL;
@@ -510,13 +519,7 @@ int drm_ioctl_permit(u32 flags, struct drm_file *file_priv)
/* MASTER is only for master or control clients */
if (unlikely((flags & DRM_MASTER) &&
- !drm_is_current_master(file_priv) &&
- !drm_is_control_client(file_priv)))
- return -EACCES;
-
- /* Control clients must be explicitly allowed */
- if (unlikely(!(flags & DRM_CONTROL_ALLOW) &&
- drm_is_control_client(file_priv)))
+ !drm_is_current_master(file_priv)))
return -EACCES;
/* Render clients must be explicitly allowed */
@@ -539,7 +542,7 @@ EXPORT_SYMBOL(drm_ioctl_permit);
/* Ioctl table */
static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_VERSION, drm_version,
- DRM_UNLOCKED|DRM_RENDER_ALLOW|DRM_CONTROL_ALLOW),
+ DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_GET_UNIQUE, drm_getunique, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_GET_MAGIC, drm_getmagic, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_IRQ_BUSID, drm_irq_by_busid, DRM_MASTER|DRM_ROOT_ONLY),
@@ -613,41 +616,41 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH|DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETRESOURCES, drm_mode_getresources, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_PRIME_HANDLE_TO_FD, drm_prime_handle_to_fd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_PRIME_FD_TO_HANDLE, drm_prime_fd_to_handle_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_getplane_res, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCRTC, drm_mode_getcrtc, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETCRTC, drm_mode_setcrtc, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPLANE, drm_mode_getplane, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPLANE, drm_mode_setplane, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR, drm_mode_cursor_ioctl, DRM_MASTER|DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETGAMMA, drm_mode_gamma_get_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETGAMMA, drm_mode_gamma_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETENCODER, drm_mode_getencoder, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETCONNECTOR, drm_mode_getconnector, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATTACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DETACHMODE, drm_noop, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPERTY, drm_mode_getproperty_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_SETPROPERTY, drm_mode_connector_property_set_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETPROPBLOB, drm_mode_getblob_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GETFB, drm_mode_getfb, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB, drm_mode_addfb, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ADDFB2, drm_mode_addfb2, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_RMFB, drm_mode_rmfb, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_PAGE_FLIP, drm_mode_page_flip_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DIRTYFB, drm_mode_dirtyfb_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_DUMB, drm_mode_create_dumb_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_MAP_DUMB, drm_mode_mmap_dumb_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROY_DUMB, drm_mode_destroy_dumb_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CURSOR2, drm_mode_cursor2_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_ATOMIC, drm_mode_atomic_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATEPROPBLOB, drm_mode_createblob_ioctl, DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_DESTROYPROPBLOB, drm_mode_destroyblob_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_SYNCOBJ_CREATE, drm_syncobj_create_ioctl,
DRM_UNLOCKED|DRM_RENDER_ALLOW),
@@ -665,10 +668,10 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
DRM_UNLOCKED|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_GET_SEQUENCE, drm_crtc_get_sequence_ioctl, DRM_UNLOCKED),
DRM_IOCTL_DEF(DRM_IOCTL_CRTC_QUEUE_SEQUENCE, drm_crtc_queue_sequence_ioctl, DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
- DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_CREATE_LEASE, drm_mode_create_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_LIST_LESSEES, drm_mode_list_lessees_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_GET_LEASE, drm_mode_get_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
+ DRM_IOCTL_DEF(DRM_IOCTL_MODE_REVOKE_LEASE, drm_mode_revoke_lease_ioctl, DRM_MASTER|DRM_UNLOCKED),
};
#define DRM_CORE_IOCTL_COUNT ARRAY_SIZE( drm_ioctls )
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index e82b61e..c78ca0e 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -939,17 +939,68 @@ struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
}
EXPORT_SYMBOL(drm_mode_duplicate);
+static bool drm_mode_match_timings(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ return mode1->hdisplay == mode2->hdisplay &&
+ mode1->hsync_start == mode2->hsync_start &&
+ mode1->hsync_end == mode2->hsync_end &&
+ mode1->htotal == mode2->htotal &&
+ mode1->hskew == mode2->hskew &&
+ mode1->vdisplay == mode2->vdisplay &&
+ mode1->vsync_start == mode2->vsync_start &&
+ mode1->vsync_end == mode2->vsync_end &&
+ mode1->vtotal == mode2->vtotal &&
+ mode1->vscan == mode2->vscan;
+}
+
+static bool drm_mode_match_clock(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ /*
+ * do clock check convert to PICOS
+ * so fb modes get matched the same
+ */
+ if (mode1->clock && mode2->clock)
+ return KHZ2PICOS(mode1->clock) == KHZ2PICOS(mode2->clock);
+ else
+ return mode1->clock == mode2->clock;
+}
+
+static bool drm_mode_match_flags(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ return (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) ==
+ (mode2->flags & ~DRM_MODE_FLAG_3D_MASK);
+}
+
+static bool drm_mode_match_3d_flags(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ return (mode1->flags & DRM_MODE_FLAG_3D_MASK) ==
+ (mode2->flags & DRM_MODE_FLAG_3D_MASK);
+}
+
+static bool drm_mode_match_aspect_ratio(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ return mode1->picture_aspect_ratio == mode2->picture_aspect_ratio;
+}
+
/**
- * drm_mode_equal - test modes for equality
+ * drm_mode_match - test modes for (partial) equality
* @mode1: first mode
* @mode2: second mode
+ * @match_flags: which parts need to match (DRM_MODE_MATCH_*)
*
* Check to see if @mode1 and @mode2 are equivalent.
*
* Returns:
- * True if the modes are equal, false otherwise.
+ * True if the modes are (partially) equal, false otherwise.
*/
-bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+bool drm_mode_match(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2,
+ unsigned int match_flags)
{
if (!mode1 && !mode2)
return true;
@@ -957,15 +1008,49 @@ bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_displ
if (!mode1 || !mode2)
return false;
- /* do clock check convert to PICOS so fb modes get matched
- * the same */
- if (mode1->clock && mode2->clock) {
- if (KHZ2PICOS(mode1->clock) != KHZ2PICOS(mode2->clock))
- return false;
- } else if (mode1->clock != mode2->clock)
+ if (match_flags & DRM_MODE_MATCH_TIMINGS &&
+ !drm_mode_match_timings(mode1, mode2))
+ return false;
+
+ if (match_flags & DRM_MODE_MATCH_CLOCK &&
+ !drm_mode_match_clock(mode1, mode2))
+ return false;
+
+ if (match_flags & DRM_MODE_MATCH_FLAGS &&
+ !drm_mode_match_flags(mode1, mode2))
+ return false;
+
+ if (match_flags & DRM_MODE_MATCH_3D_FLAGS &&
+ !drm_mode_match_3d_flags(mode1, mode2))
return false;
- return drm_mode_equal_no_clocks(mode1, mode2);
+ if (match_flags & DRM_MODE_MATCH_ASPECT_RATIO &&
+ !drm_mode_match_aspect_ratio(mode1, mode2))
+ return false;
+
+ return true;
+}
+EXPORT_SYMBOL(drm_mode_match);
+
+/**
+ * drm_mode_equal - test modes for equality
+ * @mode1: first mode
+ * @mode2: second mode
+ *
+ * Check to see if @mode1 and @mode2 are equivalent.
+ *
+ * Returns:
+ * True if the modes are equal, false otherwise.
+ */
+bool drm_mode_equal(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
+{
+ return drm_mode_match(mode1, mode2,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_CLOCK |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS|
+ DRM_MODE_MATCH_ASPECT_RATIO);
}
EXPORT_SYMBOL(drm_mode_equal);
@@ -980,13 +1065,13 @@ EXPORT_SYMBOL(drm_mode_equal);
* Returns:
* True if the modes are equal, false otherwise.
*/
-bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
+bool drm_mode_equal_no_clocks(const struct drm_display_mode *mode1,
+ const struct drm_display_mode *mode2)
{
- if ((mode1->flags & DRM_MODE_FLAG_3D_MASK) !=
- (mode2->flags & DRM_MODE_FLAG_3D_MASK))
- return false;
-
- return drm_mode_equal_no_clocks_no_stereo(mode1, mode2);
+ return drm_mode_match(mode1, mode2,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS);
}
EXPORT_SYMBOL(drm_mode_equal_no_clocks);
@@ -1004,21 +1089,9 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks);
bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1,
const struct drm_display_mode *mode2)
{
- if (mode1->hdisplay == mode2->hdisplay &&
- mode1->hsync_start == mode2->hsync_start &&
- mode1->hsync_end == mode2->hsync_end &&
- mode1->htotal == mode2->htotal &&
- mode1->hskew == mode2->hskew &&
- mode1->vdisplay == mode2->vdisplay &&
- mode1->vsync_start == mode2->vsync_start &&
- mode1->vsync_end == mode2->vsync_end &&
- mode1->vtotal == mode2->vtotal &&
- mode1->vscan == mode2->vscan &&
- (mode1->flags & ~DRM_MODE_FLAG_3D_MASK) ==
- (mode2->flags & ~DRM_MODE_FLAG_3D_MASK))
- return true;
-
- return false;
+ return drm_mode_match(mode1, mode2,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_FLAGS);
}
EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo);
@@ -1575,6 +1648,26 @@ void drm_mode_convert_to_umode(struct drm_mode_modeinfo *out,
out->vrefresh = in->vrefresh;
out->flags = in->flags;
out->type = in->type;
+
+ switch (in->picture_aspect_ratio) {
+ case HDMI_PICTURE_ASPECT_4_3:
+ out->flags |= DRM_MODE_FLAG_PIC_AR_4_3;
+ break;
+ case HDMI_PICTURE_ASPECT_16_9:
+ out->flags |= DRM_MODE_FLAG_PIC_AR_16_9;
+ break;
+ case HDMI_PICTURE_ASPECT_64_27:
+ out->flags |= DRM_MODE_FLAG_PIC_AR_64_27;
+ break;
+ case HDMI_PICTURE_ASPECT_256_135:
+ out->flags |= DRM_MODE_FLAG_PIC_AR_256_135;
+ break;
+ case HDMI_PICTURE_ASPECT_RESERVED:
+ default:
+ out->flags |= DRM_MODE_FLAG_PIC_AR_NONE;
+ break;
+ }
+
strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
}
@@ -1621,6 +1714,30 @@ int drm_mode_convert_umode(struct drm_device *dev,
strncpy(out->name, in->name, DRM_DISPLAY_MODE_LEN);
out->name[DRM_DISPLAY_MODE_LEN-1] = 0;
+ /* Clearing picture aspect ratio bits from out flags,
+ * as the aspect-ratio information is not stored in
+ * flags for kernel-mode, but in picture_aspect_ratio.
+ */
+ out->flags &= ~DRM_MODE_FLAG_PIC_AR_MASK;
+
+ switch (in->flags & DRM_MODE_FLAG_PIC_AR_MASK) {
+ case DRM_MODE_FLAG_PIC_AR_4_3:
+ out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_4_3;
+ break;
+ case DRM_MODE_FLAG_PIC_AR_16_9:
+ out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_16_9;
+ break;
+ case DRM_MODE_FLAG_PIC_AR_64_27:
+ out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_64_27;
+ break;
+ case DRM_MODE_FLAG_PIC_AR_256_135:
+ out->picture_aspect_ratio |= HDMI_PICTURE_ASPECT_256_135;
+ break;
+ default:
+ out->picture_aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
+ break;
+ }
+
out->status = drm_mode_validate_driver(dev, out);
if (out->status != MODE_OK)
return -EINVAL;
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index caebddd..fe9c6c7 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -172,10 +172,9 @@ int drm_get_panel_orientation_quirk(int width, int height)
if (!bios_date)
continue;
- for (i = 0; data->bios_dates[i]; i++) {
- if (!strcmp(data->bios_dates[i], bios_date))
- return data->orientation;
- }
+ i = match_string(data->bios_dates, -1, bios_date);
+ if (i >= 0)
+ return data->orientation;
}
return DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index caf675e..397b46b 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -409,7 +409,10 @@ void *drm_gem_dmabuf_vmap(struct dma_buf *dma_buf)
struct drm_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->dev;
- return dev->driver->gem_prime_vmap(obj);
+ if (dev->driver->gem_prime_vmap)
+ return dev->driver->gem_prime_vmap(obj);
+ else
+ return NULL;
}
EXPORT_SYMBOL(drm_gem_dmabuf_vmap);
@@ -426,7 +429,8 @@ void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
struct drm_gem_object *obj = dma_buf->priv;
struct drm_device *dev = obj->dev;
- dev->driver->gem_prime_vunmap(obj, vaddr);
+ if (dev->driver->gem_prime_vunmap)
+ dev->driver->gem_prime_vunmap(obj, vaddr);
}
EXPORT_SYMBOL(drm_gem_dmabuf_vunmap);
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index 8f4672d..1f8031e 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -169,9 +169,9 @@ struct drm_property *drm_property_create_enum(struct drm_device *dev,
return NULL;
for (i = 0; i < num_values; i++) {
- ret = drm_property_add_enum(property, i,
- props[i].type,
- props[i].name);
+ ret = drm_property_add_enum(property,
+ props[i].type,
+ props[i].name);
if (ret) {
drm_property_destroy(dev, property);
return NULL;
@@ -209,7 +209,7 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
uint64_t supported_bits)
{
struct drm_property *property;
- int i, ret, index = 0;
+ int i, ret;
int num_values = hweight64(supported_bits);
flags |= DRM_MODE_PROP_BITMASK;
@@ -221,14 +221,9 @@ struct drm_property *drm_property_create_bitmask(struct drm_device *dev,
if (!(supported_bits & (1ULL << props[i].type)))
continue;
- if (WARN_ON(index >= num_values)) {
- drm_property_destroy(dev, property);
- return NULL;
- }
-
- ret = drm_property_add_enum(property, index++,
- props[i].type,
- props[i].name);
+ ret = drm_property_add_enum(property,
+ props[i].type,
+ props[i].name);
if (ret) {
drm_property_destroy(dev, property);
return NULL;
@@ -376,7 +371,6 @@ EXPORT_SYMBOL(drm_property_create_bool);
/**
* drm_property_add_enum - add a possible value to an enumeration property
* @property: enumeration property to change
- * @index: index of the new enumeration
* @value: value of the new enumeration
* @name: symbolic name of the new enumeration
*
@@ -388,10 +382,11 @@ EXPORT_SYMBOL(drm_property_create_bool);
* Returns:
* Zero on success, error code on failure.
*/
-int drm_property_add_enum(struct drm_property *property, int index,
+int drm_property_add_enum(struct drm_property *property,
uint64_t value, const char *name)
{
struct drm_property_enum *prop_enum;
+ int index = 0;
if (WARN_ON(strlen(name) >= DRM_PROP_NAME_LEN))
return -EINVAL;
@@ -411,8 +406,12 @@ int drm_property_add_enum(struct drm_property *property, int index,
list_for_each_entry(prop_enum, &property->enum_list, head) {
if (WARN_ON(prop_enum->value == value))
return -EINVAL;
+ index++;
}
+ if (WARN_ON(index >= property->num_values))
+ return -EINVAL;
+
prop_enum = kzalloc(sizeof(struct drm_property_enum), GFP_KERNEL);
if (!prop_enum)
return -ENOMEM;
diff --git a/drivers/gpu/drm/drm_rect.c b/drivers/gpu/drm/drm_rect.c
index 9817c14..8c05782 100644
--- a/drivers/gpu/drm/drm_rect.c
+++ b/drivers/gpu/drm/drm_rect.c
@@ -50,13 +50,25 @@ bool drm_rect_intersect(struct drm_rect *r1, const struct drm_rect *r2)
}
EXPORT_SYMBOL(drm_rect_intersect);
+static u32 clip_scaled(u32 src, u32 dst, u32 clip)
+{
+ u64 tmp = mul_u32_u32(src, dst - clip);
+
+ /*
+ * Round toward 1.0 when clipping so that we don't accidentally
+ * change upscaling to downscaling or vice versa.
+ */
+ if (src < (dst << 16))
+ return DIV_ROUND_UP_ULL(tmp, dst);
+ else
+ return DIV_ROUND_DOWN_ULL(tmp, dst);
+}
+
/**
* drm_rect_clip_scaled - perform a scaled clip operation
* @src: source window rectangle
* @dst: destination window rectangle
* @clip: clip rectangle
- * @hscale: horizontal scaling factor
- * @vscale: vertical scaling factor
*
* Clip rectangle @dst by rectangle @clip. Clip rectangle @src by the
* same amounts multiplied by @hscale and @vscale.
@@ -66,33 +78,44 @@ EXPORT_SYMBOL(drm_rect_intersect);
* %false otherwise
*/
bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst,
- const struct drm_rect *clip,
- int hscale, int vscale)
+ const struct drm_rect *clip)
{
int diff;
diff = clip->x1 - dst->x1;
if (diff > 0) {
- int64_t tmp = src->x1 + (int64_t) diff * hscale;
- src->x1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+ u32 new_src_w = clip_scaled(drm_rect_width(src),
+ drm_rect_width(dst), diff);
+
+ src->x1 = clamp_t(int64_t, src->x2 - new_src_w, INT_MIN, INT_MAX);
+ dst->x1 = clip->x1;
}
diff = clip->y1 - dst->y1;
if (diff > 0) {
- int64_t tmp = src->y1 + (int64_t) diff * vscale;
- src->y1 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+ u32 new_src_h = clip_scaled(drm_rect_height(src),
+ drm_rect_height(dst), diff);
+
+ src->y1 = clamp_t(int64_t, src->y2 - new_src_h, INT_MIN, INT_MAX);
+ dst->y1 = clip->y1;
}
diff = dst->x2 - clip->x2;
if (diff > 0) {
- int64_t tmp = src->x2 - (int64_t) diff * hscale;
- src->x2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+ u32 new_src_w = clip_scaled(drm_rect_width(src),
+ drm_rect_width(dst), diff);
+
+ src->x2 = clamp_t(int64_t, src->x1 + new_src_w, INT_MIN, INT_MAX);
+ dst->x2 = clip->x2;
}
diff = dst->y2 - clip->y2;
if (diff > 0) {
- int64_t tmp = src->y2 - (int64_t) diff * vscale;
- src->y2 = clamp_t(int64_t, tmp, INT_MIN, INT_MAX);
+ u32 new_src_h = clip_scaled(drm_rect_height(src),
+ drm_rect_height(dst), diff);
+
+ src->y2 = clamp_t(int64_t, src->y1 + new_src_h, INT_MIN, INT_MAX);
+ dst->y2 = clip->y2;
}
- return drm_rect_intersect(dst, clip);
+ return drm_rect_visible(dst);
}
EXPORT_SYMBOL(drm_rect_clip_scaled);
@@ -106,7 +129,10 @@ static int drm_calc_scale(int src, int dst)
if (dst == 0)
return 0;
- scale = src / dst;
+ if (src > (dst << 16))
+ return DIV_ROUND_UP(src, dst);
+ else
+ scale = src / dst;
return scale;
}
@@ -121,6 +147,10 @@ static int drm_calc_scale(int src, int dst)
* Calculate the horizontal scaling factor as
* (@src width) / (@dst width).
*
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
* RETURNS:
* The horizontal scaling factor, or errno of out of limits.
*/
@@ -152,6 +182,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale);
* Calculate the vertical scaling factor as
* (@src height) / (@dst height).
*
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
* RETURNS:
* The vertical scaling factor, or errno of out of limits.
*/
@@ -189,6 +223,10 @@ EXPORT_SYMBOL(drm_rect_calc_vscale);
* If the calculated scaling factor is above @max_vscale,
* decrease the height of rectangle @src to compensate.
*
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
* RETURNS:
* The horizontal scaling factor.
*/
@@ -239,6 +277,10 @@ EXPORT_SYMBOL(drm_rect_calc_hscale_relaxed);
* If the calculated scaling factor is above @max_vscale,
* decrease the height of rectangle @src to compensate.
*
+ * If the scale is below 1 << 16, round down. If the scale is above
+ * 1 << 16, round up. This will calculate the scale with the most
+ * pessimistic limit calculation.
+ *
* RETURNS:
* The vertical scaling factor.
*/
@@ -373,8 +415,8 @@ EXPORT_SYMBOL(drm_rect_rotate);
* them when doing a rotatation and its inverse.
* That is, if you do ::
*
- * DRM_MODE_PROP_ROTATE(&r, width, height, rotation);
- * DRM_MODE_ROTATE_inv(&r, width, height, rotation);
+ * drm_rect_rotate(&r, width, height, rotation);
+ * drm_rect_rotate_inv(&r, width, height, rotation);
*
* you will always get back the original rectangle.
*/
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 1c5b5ce..b3c1daa 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -331,9 +331,7 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
struct device *kdev;
int r;
- if (minor->type == DRM_MINOR_CONTROL)
- minor_str = "controlD%d";
- else if (minor->type == DRM_MINOR_RENDER)
+ if (minor->type == DRM_MINOR_RENDER)
minor_str = "renderD%d";
else
minor_str = "card%d";
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index 735ce47..208bc27 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -1,6 +1,6 @@
config DRM_EXYNOS
tristate "DRM Support for Samsung SoC EXYNOS Series"
- depends on OF && DRM && (ARCH_S3C64XX || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
+ depends on OF && DRM && (ARCH_S3C64XX || ARCH_S5PV210 || ARCH_EXYNOS || ARCH_MULTIPLATFORM)
select DRM_KMS_HELPER
select VIDEOMODE_HELPERS
select SND_SOC_HDMI_CODEC if SND_SOC
@@ -95,21 +95,31 @@ config DRM_EXYNOS_G2D
help
Choose this option if you want to use Exynos G2D for DRM.
+config DRM_EXYNOS_IPP
+ bool
+
config DRM_EXYNOS_FIMC
bool "FIMC"
- depends on BROKEN && MFD_SYSCON
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos FIMC for DRM.
config DRM_EXYNOS_ROTATOR
bool "Rotator"
- depends on BROKEN
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos Rotator for DRM.
+config DRM_EXYNOS_SCALER
+ bool "Scaler"
+ select DRM_EXYNOS_IPP
+ help
+ Choose this option if you want to use Exynos Scaler for DRM.
+
config DRM_EXYNOS_GSC
bool "GScaler"
- depends on BROKEN && ARCH_EXYNOS5 && VIDEO_SAMSUNG_EXYNOS_GSC=n
+ depends on VIDEO_SAMSUNG_EXYNOS_GSC=n
+ select DRM_EXYNOS_IPP
help
Choose this option if you want to use Exynos GSC for DRM.
diff --git a/drivers/gpu/drm/exynos/Makefile b/drivers/gpu/drm/exynos/Makefile
index a51c545..3b323f1 100644
--- a/drivers/gpu/drm/exynos/Makefile
+++ b/drivers/gpu/drm/exynos/Makefile
@@ -18,8 +18,10 @@ exynosdrm-$(CONFIG_DRM_EXYNOS_MIXER) += exynos_mixer.o
exynosdrm-$(CONFIG_DRM_EXYNOS_HDMI) += exynos_hdmi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_VIDI) += exynos_drm_vidi.o
exynosdrm-$(CONFIG_DRM_EXYNOS_G2D) += exynos_drm_g2d.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_IPP) += exynos_drm_ipp.o
exynosdrm-$(CONFIG_DRM_EXYNOS_FIMC) += exynos_drm_fimc.o
exynosdrm-$(CONFIG_DRM_EXYNOS_ROTATOR) += exynos_drm_rotator.o
+exynosdrm-$(CONFIG_DRM_EXYNOS_SCALER) += exynos_drm_scaler.o
exynosdrm-$(CONFIG_DRM_EXYNOS_GSC) += exynos_drm_gsc.o
exynosdrm-$(CONFIG_DRM_EXYNOS_MIC) += exynos_drm_mic.o
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 39284bb..a81b4a5 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -27,15 +27,23 @@
#include "exynos_drm_fb.h"
#include "exynos_drm_gem.h"
#include "exynos_drm_plane.h"
+#include "exynos_drm_ipp.h"
#include "exynos_drm_vidi.h"
#include "exynos_drm_g2d.h"
#include "exynos_drm_iommu.h"
#define DRIVER_NAME "exynos"
#define DRIVER_DESC "Samsung SoC DRM"
-#define DRIVER_DATE "20110530"
+#define DRIVER_DATE "20180330"
+
+/*
+ * Interface history:
+ *
+ * 1.0 - Original version
+ * 1.1 - Upgrade IPP driver to version 2.0
+ */
#define DRIVER_MAJOR 1
-#define DRIVER_MINOR 0
+#define DRIVER_MINOR 1
static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
{
@@ -88,6 +96,16 @@ static const struct drm_ioctl_desc exynos_ioctls[] = {
DRM_AUTH | DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(EXYNOS_G2D_EXEC, exynos_g2d_exec_ioctl,
DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_RESOURCES,
+ exynos_drm_ipp_get_res_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_CAPS, exynos_drm_ipp_get_caps_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_GET_LIMITS,
+ exynos_drm_ipp_get_limits_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(EXYNOS_IPP_COMMIT, exynos_drm_ipp_commit_ioctl,
+ DRM_AUTH | DRM_RENDER_ALLOW),
};
static const struct file_operations exynos_drm_driver_fops = {
@@ -184,6 +202,7 @@ struct exynos_drm_driver_info {
#define DRM_COMPONENT_DRIVER BIT(0) /* supports component framework */
#define DRM_VIRTUAL_DEVICE BIT(1) /* create virtual platform device */
#define DRM_DMA_DEVICE BIT(2) /* can be used for dma allocations */
+#define DRM_FIMC_DEVICE BIT(3) /* devices shared with V4L2 subsystem */
#define DRV_PTR(drv, cond) (IS_ENABLED(cond) ? &drv : NULL)
@@ -223,10 +242,16 @@ static struct exynos_drm_driver_info exynos_drm_drivers[] = {
DRV_PTR(g2d_driver, CONFIG_DRM_EXYNOS_G2D),
}, {
DRV_PTR(fimc_driver, CONFIG_DRM_EXYNOS_FIMC),
+ DRM_COMPONENT_DRIVER | DRM_FIMC_DEVICE,
}, {
DRV_PTR(rotator_driver, CONFIG_DRM_EXYNOS_ROTATOR),
+ DRM_COMPONENT_DRIVER
+ }, {
+ DRV_PTR(scaler_driver, CONFIG_DRM_EXYNOS_SCALER),
+ DRM_COMPONENT_DRIVER
}, {
DRV_PTR(gsc_driver, CONFIG_DRM_EXYNOS_GSC),
+ DRM_COMPONENT_DRIVER
}, {
&exynos_drm_platform_driver,
DRM_VIRTUAL_DEVICE
@@ -254,7 +279,11 @@ static struct component_match *exynos_drm_match_add(struct device *dev)
&info->driver->driver,
(void *)platform_bus_type.match))) {
put_device(p);
- component_match_add(dev, &match, compare_dev, d);
+
+ if (!(info->flags & DRM_FIMC_DEVICE) ||
+ exynos_drm_check_fimc_device(d) == 0)
+ component_match_add(dev, &match,
+ compare_dev, d);
p = d;
}
put_device(p);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.h b/drivers/gpu/drm/exynos/exynos_drm_drv.h
index 075957c..0f6d079 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.h
@@ -273,6 +273,15 @@ static inline int exynos_dpi_bind(struct drm_device *dev,
}
#endif
+#ifdef CONFIG_DRM_EXYNOS_FIMC
+int exynos_drm_check_fimc_device(struct device *dev);
+#else
+static inline int exynos_drm_check_fimc_device(struct device *dev)
+{
+ return 0;
+}
+#endif
+
int exynos_atomic_commit(struct drm_device *dev, struct drm_atomic_state *state,
bool nonblock);
@@ -288,6 +297,7 @@ extern struct platform_driver vidi_driver;
extern struct platform_driver g2d_driver;
extern struct platform_driver fimc_driver;
extern struct platform_driver rotator_driver;
+extern struct platform_driver scaler_driver;
extern struct platform_driver gsc_driver;
extern struct platform_driver mic_driver;
#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 7904ffa..eae44fd 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -270,7 +270,6 @@ struct exynos_dsi {
u32 lanes;
u32 mode_flags;
u32 format;
- struct videomode vm;
int state;
struct drm_property *brightness;
@@ -881,30 +880,30 @@ static int exynos_dsi_init_link(struct exynos_dsi *dsi)
static void exynos_dsi_set_display_mode(struct exynos_dsi *dsi)
{
- struct videomode *vm = &dsi->vm;
+ struct drm_display_mode *m = &dsi->encoder.crtc->state->adjusted_mode;
unsigned int num_bits_resol = dsi->driver_data->num_bits_resol;
u32 reg;
if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
reg = DSIM_CMD_ALLOW(0xf)
- | DSIM_STABLE_VFP(vm->vfront_porch)
- | DSIM_MAIN_VBP(vm->vback_porch);
+ | DSIM_STABLE_VFP(m->vsync_start - m->vdisplay)
+ | DSIM_MAIN_VBP(m->vtotal - m->vsync_end);
exynos_dsi_write(dsi, DSIM_MVPORCH_REG, reg);
- reg = DSIM_MAIN_HFP(vm->hfront_porch)
- | DSIM_MAIN_HBP(vm->hback_porch);
+ reg = DSIM_MAIN_HFP(m->hsync_start - m->hdisplay)
+ | DSIM_MAIN_HBP(m->htotal - m->hsync_end);
exynos_dsi_write(dsi, DSIM_MHPORCH_REG, reg);
- reg = DSIM_MAIN_VSA(vm->vsync_len)
- | DSIM_MAIN_HSA(vm->hsync_len);
+ reg = DSIM_MAIN_VSA(m->vsync_end - m->vsync_start)
+ | DSIM_MAIN_HSA(m->hsync_end - m->hsync_start);
exynos_dsi_write(dsi, DSIM_MSYNC_REG, reg);
}
- reg = DSIM_MAIN_HRESOL(vm->hactive, num_bits_resol) |
- DSIM_MAIN_VRESOL(vm->vactive, num_bits_resol);
+ reg = DSIM_MAIN_HRESOL(m->hdisplay, num_bits_resol) |
+ DSIM_MAIN_VRESOL(m->vdisplay, num_bits_resol);
exynos_dsi_write(dsi, DSIM_MDRESOL_REG, reg);
- dev_dbg(dsi->dev, "LCD size = %dx%d\n", vm->hactive, vm->vactive);
+ dev_dbg(dsi->dev, "LCD size = %dx%d\n", m->hdisplay, m->vdisplay);
}
static void exynos_dsi_set_display_enable(struct exynos_dsi *dsi, bool enable)
@@ -1485,26 +1484,7 @@ static int exynos_dsi_create_connector(struct drm_encoder *encoder)
return 0;
}
-static void exynos_dsi_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct exynos_dsi *dsi = encoder_to_dsi(encoder);
- struct videomode *vm = &dsi->vm;
- struct drm_display_mode *m = adjusted_mode;
-
- vm->hactive = m->hdisplay;
- vm->vactive = m->vdisplay;
- vm->vfront_porch = m->vsync_start - m->vdisplay;
- vm->vback_porch = m->vtotal - m->vsync_end;
- vm->vsync_len = m->vsync_end - m->vsync_start;
- vm->hfront_porch = m->hsync_start - m->hdisplay;
- vm->hback_porch = m->htotal - m->hsync_end;
- vm->hsync_len = m->hsync_end - m->hsync_start;
-}
-
static const struct drm_encoder_helper_funcs exynos_dsi_encoder_helper_funcs = {
- .mode_set = exynos_dsi_mode_set,
.enable = exynos_dsi_enable,
.disable = exynos_dsi_disable,
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.c b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
index 5b18b5c..4dfbfc7 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimc.c
@@ -12,6 +12,7 @@
*
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/platform_device.h>
#include <linux/mfd/syscon.h>
#include <linux/regmap.h>
@@ -24,8 +25,8 @@
#include <drm/exynos_drm.h>
#include "regs-fimc.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
-#include "exynos_drm_fimc.h"
/*
* FIMC stands for Fully Interactive Mobile Camera and
@@ -33,23 +34,6 @@
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
* FIMC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> FIMC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> FIMC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> FIMC H/W ----> FIMD.
- */
-
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
*/
#define FIMC_MAX_DEVS 4
@@ -59,29 +43,19 @@
#define FIMC_BUF_STOP 1
#define FIMC_BUF_START 2
#define FIMC_WIDTH_ITU_709 1280
-#define FIMC_REFRESH_MAX 60
-#define FIMC_REFRESH_MIN 12
-#define FIMC_CROP_MAX 8192
-#define FIMC_CROP_MIN 32
-#define FIMC_SCALE_MAX 4224
-#define FIMC_SCALE_MIN 32
+#define FIMC_AUTOSUSPEND_DELAY 2000
+
+static unsigned int fimc_mask = 0xc;
+module_param_named(fimc_devs, fimc_mask, uint, 0644);
+MODULE_PARM_DESC(fimc_devs, "Alias mask for assigning FIMC devices to Exynos DRM");
#define get_fimc_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct fimc_context, ippdrv);
-enum fimc_wb {
- FIMC_WB_NONE,
- FIMC_WB_A,
- FIMC_WB_B,
-};
enum {
FIMC_CLK_LCLK,
FIMC_CLK_GATE,
FIMC_CLK_WB_A,
FIMC_CLK_WB_B,
- FIMC_CLK_MUX,
- FIMC_CLK_PARENT,
FIMC_CLKS_MAX
};
@@ -90,12 +64,8 @@ static const char * const fimc_clock_names[] = {
[FIMC_CLK_GATE] = "fimc",
[FIMC_CLK_WB_A] = "pxl_async0",
[FIMC_CLK_WB_B] = "pxl_async1",
- [FIMC_CLK_MUX] = "mux",
- [FIMC_CLK_PARENT] = "parent",
};
-#define FIMC_DEFAULT_LCLK_FREQUENCY 133000000UL
-
/*
* A structure of scaler.
*
@@ -107,7 +77,7 @@ static const char * const fimc_clock_names[] = {
* @vratio: vertical ratio.
*/
struct fimc_scaler {
- bool range;
+ bool range;
bool bypass;
bool up_h;
bool up_v;
@@ -116,56 +86,32 @@ struct fimc_scaler {
};
/*
- * A structure of scaler capability.
- *
- * find user manual table 43-1.
- * @in_hori: scaler input horizontal size.
- * @bypass: scaler bypass mode.
- * @dst_h_wo_rot: target horizontal size without output rotation.
- * @dst_h_rot: target horizontal size with output rotation.
- * @rl_w_wo_rot: real width without input rotation.
- * @rl_h_rot: real height without output rotation.
- */
-struct fimc_capability {
- /* scaler */
- u32 in_hori;
- u32 bypass;
- /* output rotator */
- u32 dst_h_wo_rot;
- u32 dst_h_rot;
- /* input rotator */
- u32 rl_w_wo_rot;
- u32 rl_h_rot;
-};
-
-/*
* A structure of fimc context.
*
- * @ippdrv: prepare initialization using ippdrv.
* @regs_res: register resources.
* @regs: memory mapped io registers.
* @lock: locking of operations.
* @clocks: fimc clocks.
- * @clk_frequency: LCLK clock frequency.
- * @sysreg: handle to SYSREG block regmap.
* @sc: scaler infomations.
* @pol: porarity of writeback.
* @id: fimc id.
* @irq: irq number.
- * @suspended: qos operations.
*/
struct fimc_context {
- struct exynos_drm_ippdrv ippdrv;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ struct exynos_drm_ipp_task *task;
+ struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+
struct resource *regs_res;
void __iomem *regs;
spinlock_t lock;
struct clk *clocks[FIMC_CLKS_MAX];
- u32 clk_frequency;
- struct regmap *sysreg;
struct fimc_scaler sc;
int id;
int irq;
- bool suspended;
};
static u32 fimc_read(struct fimc_context *ctx, u32 reg)
@@ -217,19 +163,10 @@ static void fimc_sw_reset(struct fimc_context *ctx)
fimc_write(ctx, 0x0, EXYNOS_CIFCNTSEQ);
}
-static int fimc_set_camblk_fimd0_wb(struct fimc_context *ctx)
-{
- return regmap_update_bits(ctx->sysreg, SYSREG_CAMERA_BLK,
- SYSREG_FIMD0WB_DEST_MASK,
- ctx->id << SYSREG_FIMD0WB_DEST_SHIFT);
-}
-
-static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
+static void fimc_set_type_ctrl(struct fimc_context *ctx)
{
u32 cfg;
- DRM_DEBUG_KMS("wb[%d]\n", wb);
-
cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
cfg &= ~(EXYNOS_CIGCTRL_TESTPATTERN_MASK |
EXYNOS_CIGCTRL_SELCAM_ITU_MASK |
@@ -238,23 +175,10 @@ static void fimc_set_type_ctrl(struct fimc_context *ctx, enum fimc_wb wb)
EXYNOS_CIGCTRL_SELWB_CAMIF_MASK |
EXYNOS_CIGCTRL_SELWRITEBACK_MASK);
- switch (wb) {
- case FIMC_WB_A:
- cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_A |
- EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
- break;
- case FIMC_WB_B:
- cfg |= (EXYNOS_CIGCTRL_SELWRITEBACK_B |
- EXYNOS_CIGCTRL_SELWB_CAMIF_WRITEBACK);
- break;
- case FIMC_WB_NONE:
- default:
- cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
- EXYNOS_CIGCTRL_SELWRITEBACK_A |
- EXYNOS_CIGCTRL_SELCAM_MIPI_A |
- EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
- break;
- }
+ cfg |= (EXYNOS_CIGCTRL_SELCAM_ITU_A |
+ EXYNOS_CIGCTRL_SELWRITEBACK_A |
+ EXYNOS_CIGCTRL_SELCAM_MIPI_A |
+ EXYNOS_CIGCTRL_SELCAM_FIMC_ITU);
fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
}
@@ -296,7 +220,6 @@ static void fimc_clear_irq(struct fimc_context *ctx)
static bool fimc_check_ovf(struct fimc_context *ctx)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 status, flag;
status = fimc_read(ctx, EXYNOS_CISTATUS);
@@ -310,7 +233,7 @@ static bool fimc_check_ovf(struct fimc_context *ctx)
EXYNOS_CIWDOFST_CLROVFIY | EXYNOS_CIWDOFST_CLROVFICB |
EXYNOS_CIWDOFST_CLROVFICR);
- dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+ dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
ctx->id, status);
return true;
}
@@ -376,10 +299,8 @@ static void fimc_handle_lastend(struct fimc_context *ctx, bool enable)
fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
}
-
-static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -392,12 +313,12 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB565;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_RGB888:
case DRM_FORMAT_XRGB8888:
cfg |= EXYNOS_CISCCTRL_INRGB_FMT_RGB888;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
default:
/* bypass */
break;
@@ -438,20 +359,13 @@ static int fimc_src_set_fmt_order(struct fimc_context *ctx, u32 fmt)
cfg |= (EXYNOS_MSCTRL_ORDER2P_LSB_CBCR |
EXYNOS_MSCTRL_C_INT_IN_2PLANE);
break;
- default:
- dev_err(ippdrv->dev, "invalid source yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_MSCTRL);
-
- return 0;
}
-static int fimc_src_set_fmt(struct device *dev, u32 fmt)
+static void fimc_src_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -485,9 +399,6 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_NV21:
cfg |= EXYNOS_MSCTRL_INFORMAT_YCBCR420;
break;
- default:
- dev_err(ippdrv->dev, "invalid source format 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_MSCTRL);
@@ -495,22 +406,22 @@ static int fimc_src_set_fmt(struct device *dev, u32 fmt)
cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
cfg &= ~EXYNOS_CIDMAPARAM_R_MODE_MASK;
- cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
+ if (tiled)
+ cfg |= EXYNOS_CIDMAPARAM_R_MODE_64X32;
+ else
+ cfg |= EXYNOS_CIDMAPARAM_R_MODE_LINEAR;
fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
- return fimc_src_set_fmt_order(ctx, fmt);
+ fimc_src_set_fmt_order(ctx, fmt);
}
-static int fimc_src_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void fimc_src_set_transf(struct fimc_context *ctx, unsigned int rotation)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg1, cfg2;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+ DRM_DEBUG_KMS("rotation[%x]\n", rotation);
cfg1 = fimc_read(ctx, EXYNOS_MSCTRL);
cfg1 &= ~(EXYNOS_MSCTRL_FLIP_X_MIRROR |
@@ -520,61 +431,56 @@ static int fimc_src_set_transf(struct device *dev,
cfg2 &= ~EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_90:
+ case DRM_MODE_ROTATE_90:
cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 |= EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 |= EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
EXYNOS_MSCTRL_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg1 |= (EXYNOS_MSCTRL_FLIP_X_MIRROR |
EXYNOS_MSCTRL_FLIP_Y_MIRROR);
cfg2 |= EXYNOS_CITRGFMT_INROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg1 &= ~EXYNOS_MSCTRL_FLIP_Y_MIRROR;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
fimc_write(ctx, cfg1, EXYNOS_MSCTRL);
fimc_write(ctx, cfg2, EXYNOS_CITRGFMT);
- *swap = (cfg2 & EXYNOS_CITRGFMT_INROT90_CLOCKWISE) ? 1 : 0;
-
- return 0;
}
-static int fimc_set_window(struct fimc_context *ctx,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_set_window(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
u32 cfg, h1, h2, v1, v2;
/* cropped image */
- h1 = pos->x;
- h2 = sz->hsize - pos->w - pos->x;
- v1 = pos->y;
- v2 = sz->vsize - pos->h - pos->y;
+ h1 = buf->rect.x;
+ h2 = buf->buf.width - buf->rect.w - buf->rect.x;
+ v1 = buf->rect.y;
+ v2 = buf->buf.height - buf->rect.h - buf->rect.y;
DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]hsize[%d]vsize[%d]\n",
- pos->x, pos->y, pos->w, pos->h, sz->hsize, sz->vsize);
+ buf->rect.x, buf->rect.y, buf->rect.w, buf->rect.h,
+ buf->buf.width, buf->buf.height);
DRM_DEBUG_KMS("h1[%d]h2[%d]v1[%d]v2[%d]\n", h1, h2, v1, v2);
/*
@@ -592,42 +498,30 @@ static int fimc_set_window(struct fimc_context *ctx,
cfg = (EXYNOS_CIWDOFST2_WINHOROFST2(h2) |
EXYNOS_CIWDOFST2_WINVEROFST2(v2));
fimc_write(ctx, cfg, EXYNOS_CIWDOFST2);
-
- return 0;
}
-static int fimc_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_src_set_size(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
- struct drm_exynos_sz img_sz = *sz;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
- swap, sz->hsize, sz->vsize);
+ DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
/* original size */
- cfg = (EXYNOS_ORGISIZE_HORIZONTAL(img_sz.hsize) |
- EXYNOS_ORGISIZE_VERTICAL(img_sz.vsize));
+ cfg = (EXYNOS_ORGISIZE_HORIZONTAL(buf->buf.width) |
+ EXYNOS_ORGISIZE_VERTICAL(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_ORGISIZE);
- DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- img_sz.hsize = sz->vsize;
- img_sz.vsize = sz->hsize;
- }
+ DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+ buf->rect.w, buf->rect.h);
/* set input DMA image size */
cfg = fimc_read(ctx, EXYNOS_CIREAL_ISIZE);
cfg &= ~(EXYNOS_CIREAL_ISIZE_HEIGHT_MASK |
EXYNOS_CIREAL_ISIZE_WIDTH_MASK);
- cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(img_pos.w) |
- EXYNOS_CIREAL_ISIZE_HEIGHT(img_pos.h));
+ cfg |= (EXYNOS_CIREAL_ISIZE_WIDTH(buf->rect.w) |
+ EXYNOS_CIREAL_ISIZE_HEIGHT(buf->rect.h));
fimc_write(ctx, cfg, EXYNOS_CIREAL_ISIZE);
/*
@@ -635,91 +529,34 @@ static int fimc_src_set_size(struct device *dev, int swap,
* for now, we support only ITU601 8 bit mode
*/
cfg = (EXYNOS_CISRCFMT_ITU601_8BIT |
- EXYNOS_CISRCFMT_SOURCEHSIZE(img_sz.hsize) |
- EXYNOS_CISRCFMT_SOURCEVSIZE(img_sz.vsize));
+ EXYNOS_CISRCFMT_SOURCEHSIZE(buf->buf.width) |
+ EXYNOS_CISRCFMT_SOURCEVSIZE(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_CISRCFMT);
/* offset Y(RGB), Cb, Cr */
- cfg = (EXYNOS_CIIYOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIIYOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIIYOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIIYOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIIYOFF);
- cfg = (EXYNOS_CIICBOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIICBOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIICBOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIICBOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIICBOFF);
- cfg = (EXYNOS_CIICROFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIICROFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIICROFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIICROFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIICROFF);
- return fimc_set_window(ctx, &img_pos, &img_sz);
+ fimc_set_window(ctx, buf);
}
-static int fimc_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_src_set_addr(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > FIMC_MAX_SRC) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -ENOMEM;
- }
-
- /* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- config = &property->config[EXYNOS_DRM_OPS_SRC];
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
- EXYNOS_CIIYSA0);
-
- if (config->fmt == DRM_FORMAT_YVU420) {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIICBSA0);
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIICRSA0);
- } else {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIICBSA0);
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIICRSA0);
- }
- break;
- case IPP_BUF_DEQUEUE:
- fimc_write(ctx, 0x0, EXYNOS_CIIYSA0);
- fimc_write(ctx, 0x0, EXYNOS_CIICBSA0);
- fimc_write(ctx, 0x0, EXYNOS_CIICRSA0);
- break;
- default:
- /* bypass */
- break;
- }
-
- return 0;
+ fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIIYSA(0));
+ fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIICBSA(0));
+ fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIICRSA(0));
}
-static struct exynos_drm_ipp_ops fimc_src_ops = {
- .set_fmt = fimc_src_set_fmt,
- .set_transf = fimc_src_set_transf,
- .set_size = fimc_src_set_size,
- .set_addr = fimc_src_set_addr,
-};
-
-static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
+static void fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -732,11 +569,11 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB565;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_RGB888:
cfg |= EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888;
fimc_write(ctx, cfg, EXYNOS_CISCCTRL);
- return 0;
+ return;
case DRM_FORMAT_XRGB8888:
cfg |= (EXYNOS_CISCCTRL_OUTRGB_FMT_RGB888 |
EXYNOS_CISCCTRL_EXTRGB_EXTENSION);
@@ -784,20 +621,13 @@ static int fimc_dst_set_fmt_order(struct fimc_context *ctx, u32 fmt)
cfg |= EXYNOS_CIOCTRL_ORDER2P_LSB_CBCR;
cfg |= EXYNOS_CIOCTRL_YCBCR_2PLANE;
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CIOCTRL);
-
- return 0;
}
-static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
+static void fimc_dst_set_fmt(struct fimc_context *ctx, u32 fmt, bool tiled)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -837,10 +667,6 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_NV21:
cfg |= EXYNOS_CITRGFMT_OUTFORMAT_YCBCR420;
break;
- default:
- dev_err(ippdrv->dev, "invalid target format 0x%x.\n",
- fmt);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
@@ -849,73 +675,67 @@ static int fimc_dst_set_fmt(struct device *dev, u32 fmt)
cfg = fimc_read(ctx, EXYNOS_CIDMAPARAM);
cfg &= ~EXYNOS_CIDMAPARAM_W_MODE_MASK;
- cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
+ if (tiled)
+ cfg |= EXYNOS_CIDMAPARAM_W_MODE_64X32;
+ else
+ cfg |= EXYNOS_CIDMAPARAM_W_MODE_LINEAR;
fimc_write(ctx, cfg, EXYNOS_CIDMAPARAM);
- return fimc_dst_set_fmt_order(ctx, fmt);
+ fimc_dst_set_fmt_order(ctx, fmt);
}
-static int fimc_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void fimc_dst_set_transf(struct fimc_context *ctx, unsigned int rotation)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
+ DRM_DEBUG_KMS("rotation[0x%x]\n", rotation);
cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
cfg &= ~EXYNOS_CITRGFMT_FLIP_MASK;
cfg &= ~EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_90:
+ case DRM_MODE_ROTATE_90:
cfg |= EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg |= (EXYNOS_CITRGFMT_FLIP_X_MIRROR |
EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg |= (EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE |
EXYNOS_CITRGFMT_FLIP_X_MIRROR |
EXYNOS_CITRGFMT_FLIP_Y_MIRROR);
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~EXYNOS_CITRGFMT_FLIP_X_MIRROR;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~EXYNOS_CITRGFMT_FLIP_Y_MIRROR;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
- *swap = (cfg & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE) ? 1 : 0;
-
- return 0;
}
static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
- struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg, cfg_ext, shfactor;
u32 pre_dst_width, pre_dst_height;
u32 hfactor, vfactor;
@@ -942,13 +762,13 @@ static int fimc_set_prescaler(struct fimc_context *ctx, struct fimc_scaler *sc,
/* fimc_ippdrv_check_property assures that dividers are not null */
hfactor = fls(src_w / dst_w / 2);
if (hfactor > FIMC_SHFACTOR / 2) {
- dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+ dev_err(ctx->dev, "failed to get ratio horizontal.\n");
return -EINVAL;
}
vfactor = fls(src_h / dst_h / 2);
if (vfactor > FIMC_SHFACTOR / 2) {
- dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+ dev_err(ctx->dev, "failed to get ratio vertical.\n");
return -EINVAL;
}
@@ -1019,83 +839,77 @@ static void fimc_set_scaler(struct fimc_context *ctx, struct fimc_scaler *sc)
fimc_write(ctx, cfg_ext, EXYNOS_CIEXTEN);
}
-static int fimc_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void fimc_dst_set_size(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
- struct drm_exynos_sz img_sz = *sz;
- u32 cfg;
+ u32 cfg, cfg_ext;
- DRM_DEBUG_KMS("swap[%d]hsize[%d]vsize[%d]\n",
- swap, sz->hsize, sz->vsize);
+ DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", buf->buf.width, buf->buf.height);
/* original size */
- cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(img_sz.hsize) |
- EXYNOS_ORGOSIZE_VERTICAL(img_sz.vsize));
+ cfg = (EXYNOS_ORGOSIZE_HORIZONTAL(buf->buf.width) |
+ EXYNOS_ORGOSIZE_VERTICAL(buf->buf.height));
fimc_write(ctx, cfg, EXYNOS_ORGOSIZE);
- DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", pos->x, pos->y, pos->w, pos->h);
+ DRM_DEBUG_KMS("x[%d]y[%d]w[%d]h[%d]\n", buf->rect.x, buf->rect.y,
+ buf->rect.w, buf->rect.h);
/* CSC ITU */
cfg = fimc_read(ctx, EXYNOS_CIGCTRL);
cfg &= ~EXYNOS_CIGCTRL_CSC_MASK;
- if (sz->hsize >= FIMC_WIDTH_ITU_709)
+ if (buf->buf.width >= FIMC_WIDTH_ITU_709)
cfg |= EXYNOS_CIGCTRL_CSC_ITU709;
else
cfg |= EXYNOS_CIGCTRL_CSC_ITU601;
fimc_write(ctx, cfg, EXYNOS_CIGCTRL);
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- img_sz.hsize = sz->vsize;
- img_sz.vsize = sz->hsize;
- }
+ cfg_ext = fimc_read(ctx, EXYNOS_CITRGFMT);
/* target image size */
cfg = fimc_read(ctx, EXYNOS_CITRGFMT);
cfg &= ~(EXYNOS_CITRGFMT_TARGETH_MASK |
EXYNOS_CITRGFMT_TARGETV_MASK);
- cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(img_pos.w) |
- EXYNOS_CITRGFMT_TARGETVSIZE(img_pos.h));
+ if (cfg_ext & EXYNOS_CITRGFMT_OUTROT90_CLOCKWISE)
+ cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.h) |
+ EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.w));
+ else
+ cfg |= (EXYNOS_CITRGFMT_TARGETHSIZE(buf->rect.w) |
+ EXYNOS_CITRGFMT_TARGETVSIZE(buf->rect.h));
fimc_write(ctx, cfg, EXYNOS_CITRGFMT);
/* target area */
- cfg = EXYNOS_CITAREA_TARGET_AREA(img_pos.w * img_pos.h);
+ cfg = EXYNOS_CITAREA_TARGET_AREA(buf->rect.w * buf->rect.h);
fimc_write(ctx, cfg, EXYNOS_CITAREA);
/* offset Y(RGB), Cb, Cr */
- cfg = (EXYNOS_CIOYOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOYOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOYOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOYOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOYOFF);
- cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOCBOFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOCBOFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOCBOFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOCBOFF);
- cfg = (EXYNOS_CIOCROFF_HORIZONTAL(img_pos.x) |
- EXYNOS_CIOCROFF_VERTICAL(img_pos.y));
+ cfg = (EXYNOS_CIOCROFF_HORIZONTAL(buf->rect.x) |
+ EXYNOS_CIOCROFF_VERTICAL(buf->rect.y));
fimc_write(ctx, cfg, EXYNOS_CIOCROFF);
-
- return 0;
}
static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+ bool enqueue)
{
unsigned long flags;
u32 buf_num;
u32 cfg;
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
+ DRM_DEBUG_KMS("buf_id[%d]enqueu[%d]\n", buf_id, enqueue);
spin_lock_irqsave(&ctx->lock, flags);
cfg = fimc_read(ctx, EXYNOS_CIFCNTSEQ);
- if (buf_type == IPP_BUF_ENQUEUE)
+ if (enqueue)
cfg |= (1 << buf_id);
else
cfg &= ~(1 << buf_id);
@@ -1104,88 +918,29 @@ static void fimc_dst_set_buf_seq(struct fimc_context *ctx, u32 buf_id,
buf_num = hweight32(cfg);
- if (buf_type == IPP_BUF_ENQUEUE && buf_num >= FIMC_BUF_START)
+ if (enqueue && buf_num >= FIMC_BUF_START)
fimc_mask_irq(ctx, true);
- else if (buf_type == IPP_BUF_DEQUEUE && buf_num <= FIMC_BUF_STOP)
+ else if (!enqueue && buf_num <= FIMC_BUF_STOP)
fimc_mask_irq(ctx, false);
spin_unlock_irqrestore(&ctx->lock, flags);
}
-static int fimc_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void fimc_dst_set_addr(struct fimc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
+ fimc_write(ctx, buf->dma_addr[0], EXYNOS_CIOYSA(0));
+ fimc_write(ctx, buf->dma_addr[1], EXYNOS_CIOCBSA(0));
+ fimc_write(ctx, buf->dma_addr[2], EXYNOS_CIOCRSA(0));
- if (buf_id > FIMC_MAX_DST) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -ENOMEM;
- }
-
- /* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- config = &property->config[EXYNOS_DRM_OPS_DST];
-
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_Y],
- EXYNOS_CIOYSA(buf_id));
-
- if (config->fmt == DRM_FORMAT_YVU420) {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIOCRSA(buf_id));
- } else {
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CB],
- EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, buf_info->base[EXYNOS_DRM_PLANAR_CR],
- EXYNOS_CIOCRSA(buf_id));
- }
- break;
- case IPP_BUF_DEQUEUE:
- fimc_write(ctx, 0x0, EXYNOS_CIOYSA(buf_id));
- fimc_write(ctx, 0x0, EXYNOS_CIOCBSA(buf_id));
- fimc_write(ctx, 0x0, EXYNOS_CIOCRSA(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
-
- fimc_dst_set_buf_seq(ctx, buf_id, buf_type);
-
- return 0;
+ fimc_dst_set_buf_seq(ctx, 0, true);
}
-static struct exynos_drm_ipp_ops fimc_dst_ops = {
- .set_fmt = fimc_dst_set_fmt,
- .set_transf = fimc_dst_set_transf,
- .set_size = fimc_dst_set_size,
- .set_addr = fimc_dst_set_addr,
-};
+static void fimc_stop(struct fimc_context *ctx);
static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
{
struct fimc_context *ctx = dev_id;
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work =
- c_node->event_work;
int buf_id;
DRM_DEBUG_KMS("fimc id[%d]\n", ctx->id);
@@ -1203,170 +958,19 @@ static irqreturn_t fimc_irq_handler(int irq, void *dev_id)
DRM_DEBUG_KMS("buf_id[%d]\n", buf_id);
- fimc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
-
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
- queue_work(ippdrv->event_workq, &event_work->work);
-
- return IRQ_HANDLED;
-}
-
-static int fimc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->writeback = 1;
- prop_list->refresh_min = FIMC_REFRESH_MIN;
- prop_list->refresh_max = FIMC_REFRESH_MAX;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_NONE) |
- (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 1;
- prop_list->crop = 1;
- prop_list->crop_max.hsize = FIMC_CROP_MAX;
- prop_list->crop_max.vsize = FIMC_CROP_MAX;
- prop_list->crop_min.hsize = FIMC_CROP_MIN;
- prop_list->crop_min.vsize = FIMC_CROP_MIN;
- prop_list->scale = 1;
- prop_list->scale_max.hsize = FIMC_SCALE_MAX;
- prop_list->scale_max.vsize = FIMC_SCALE_MAX;
- prop_list->scale_min.hsize = FIMC_SCALE_MIN;
- prop_list->scale_min.vsize = FIMC_SCALE_MIN;
-
- return 0;
-}
-
-static inline bool fimc_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
-
-static int fimc_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
-{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos *pos;
- struct drm_exynos_sz *sz;
- bool swap;
- int i;
-
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
-
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- /* check for flip */
- if (!fimc_check_drm_flip(config->flip)) {
- DRM_ERROR("invalid flip.\n");
- goto err_property;
- }
-
- /* check for degree */
- switch (config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- break;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- swap = false;
- break;
- default:
- DRM_ERROR("invalid degree.\n");
- goto err_property;
- }
-
- /* check for buffer bound */
- if ((pos->x + pos->w > sz->hsize) ||
- (pos->y + pos->h > sz->vsize)) {
- DRM_ERROR("out of buf bound.\n");
- goto err_property;
- }
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- /* check for crop */
- if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
- if (swap) {
- if ((pos->h < pp->crop_min.hsize) ||
- (sz->vsize > pp->crop_max.hsize) ||
- (pos->w < pp->crop_min.vsize) ||
- (sz->hsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->crop_min.hsize) ||
- (sz->hsize > pp->crop_max.hsize) ||
- (pos->h < pp->crop_min.vsize) ||
- (sz->vsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- }
- }
-
- /* check for scale */
- if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
- if (swap) {
- if ((pos->h < pp->scale_min.hsize) ||
- (sz->vsize > pp->scale_max.hsize) ||
- (pos->w < pp->scale_min.vsize) ||
- (sz->hsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->scale_min.hsize) ||
- (sz->hsize > pp->scale_max.hsize) ||
- (pos->h < pp->scale_min.vsize) ||
- (sz->vsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- }
- }
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, 0);
}
- return 0;
+ fimc_dst_set_buf_seq(ctx, buf_id, false);
+ fimc_stop(ctx);
-err_property:
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
-
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
- i ? "dst" : "src", config->flip, config->degree,
- pos->x, pos->y, pos->w, pos->h,
- sz->hsize, sz->vsize);
- }
-
- return -EINVAL;
+ return IRQ_HANDLED;
}
static void fimc_clear_addr(struct fimc_context *ctx)
@@ -1386,10 +990,8 @@ static void fimc_clear_addr(struct fimc_context *ctx)
}
}
-static int fimc_ippdrv_reset(struct device *dev)
+static void fimc_reset(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
-
/* reset h/w block */
fimc_sw_reset(ctx);
@@ -1397,82 +999,26 @@ static int fimc_ippdrv_reset(struct device *dev)
memset(&ctx->sc, 0x0, sizeof(ctx->sc));
fimc_clear_addr(ctx);
-
- return 0;
}
-static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_start(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX];
- struct drm_exynos_ipp_set_wb set_wb;
- int ret, i;
u32 cfg0, cfg1;
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
-
fimc_mask_irq(ctx, true);
- for_each_ipp_ops(i) {
- config = &property->config[i];
- img_pos[i] = config->pos;
- }
-
- ret = fimc_set_prescaler(ctx, &ctx->sc,
- &img_pos[EXYNOS_DRM_OPS_SRC],
- &img_pos[EXYNOS_DRM_OPS_DST]);
- if (ret) {
- dev_err(dev, "failed to set prescaler.\n");
- return ret;
- }
-
- /* If set ture, we can save jpeg about screen */
+ /* If set true, we can save jpeg about screen */
fimc_handle_jpeg(ctx, false);
fimc_set_scaler(ctx, &ctx->sc);
- switch (cmd) {
- case IPP_CMD_M2M:
- fimc_set_type_ctrl(ctx, FIMC_WB_NONE);
- fimc_handle_lastend(ctx, false);
-
- /* setup dma */
- cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
- cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
- cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
- fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
- break;
- case IPP_CMD_WB:
- fimc_set_type_ctrl(ctx, FIMC_WB_A);
- fimc_handle_lastend(ctx, true);
-
- /* setup FIMD */
- ret = fimc_set_camblk_fimd0_wb(ctx);
- if (ret < 0) {
- dev_err(dev, "camblk setup failed.\n");
- return ret;
- }
+ fimc_set_type_ctrl(ctx);
+ fimc_handle_lastend(ctx, false);
- set_wb.enable = 1;
- set_wb.refresh = property->refresh_rate;
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- ret = -EINVAL;
- dev_err(dev, "invalid operations.\n");
- return ret;
- }
+ /* setup dma */
+ cfg0 = fimc_read(ctx, EXYNOS_MSCTRL);
+ cfg0 &= ~EXYNOS_MSCTRL_INPUT_MASK;
+ cfg0 |= EXYNOS_MSCTRL_INPUT_MEMORY;
+ fimc_write(ctx, cfg0, EXYNOS_MSCTRL);
/* Reset status */
fimc_write(ctx, 0x0, EXYNOS_CISTATUS);
@@ -1498,36 +1044,18 @@ static int fimc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
fimc_clear_bits(ctx, EXYNOS_CIOCTRL, EXYNOS_CIOCTRL_WEAVE_MASK);
- if (cmd == IPP_CMD_M2M)
- fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
-
- return 0;
+ fimc_set_bits(ctx, EXYNOS_MSCTRL, EXYNOS_MSCTRL_ENVID);
}
-static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void fimc_stop(struct fimc_context *ctx)
{
- struct fimc_context *ctx = get_fimc_context(dev);
- struct drm_exynos_ipp_set_wb set_wb = {0, 0};
u32 cfg;
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- switch (cmd) {
- case IPP_CMD_M2M:
- /* Source clear */
- cfg = fimc_read(ctx, EXYNOS_MSCTRL);
- cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
- cfg &= ~EXYNOS_MSCTRL_ENVID;
- fimc_write(ctx, cfg, EXYNOS_MSCTRL);
- break;
- case IPP_CMD_WB:
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- dev_err(dev, "invalid operations.\n");
- break;
- }
+ /* Source clear */
+ cfg = fimc_read(ctx, EXYNOS_MSCTRL);
+ cfg &= ~EXYNOS_MSCTRL_INPUT_MASK;
+ cfg &= ~EXYNOS_MSCTRL_ENVID;
+ fimc_write(ctx, cfg, EXYNOS_MSCTRL);
fimc_mask_irq(ctx, false);
@@ -1545,6 +1073,87 @@ static void fimc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
fimc_set_bits(ctx, EXYNOS_CIGCTRL, EXYNOS_CIGCTRL_IRQ_END_DISABLE);
}
+static int fimc_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct fimc_context *ctx =
+ container_of(ipp, struct fimc_context, ipp);
+
+ pm_runtime_get_sync(ctx->dev);
+ ctx->task = task;
+
+ fimc_src_set_fmt(ctx, task->src.buf.fourcc, task->src.buf.modifier);
+ fimc_src_set_size(ctx, &task->src);
+ fimc_src_set_transf(ctx, DRM_MODE_ROTATE_0);
+ fimc_src_set_addr(ctx, &task->src);
+ fimc_dst_set_fmt(ctx, task->dst.buf.fourcc, task->dst.buf.modifier);
+ fimc_dst_set_transf(ctx, task->transform.rotation);
+ fimc_dst_set_size(ctx, &task->dst);
+ fimc_dst_set_addr(ctx, &task->dst);
+ fimc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+ fimc_start(ctx);
+
+ return 0;
+}
+
+static void fimc_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct fimc_context *ctx =
+ container_of(ipp, struct fimc_context, ipp);
+
+ fimc_reset(ctx);
+
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
+
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, -EIO);
+ }
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = fimc_commit,
+ .abort = fimc_abort,
+};
+
+static int fimc_bind(struct device *dev, struct device *master, void *data)
+{
+ struct fimc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ ctx->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ ctx->formats, ctx->num_formats, "fimc");
+
+ dev_info(dev, "The exynos fimc has been probed successfully\n");
+
+ return 0;
+}
+
+static void fimc_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct fimc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(drm_dev, dev);
+}
+
+static const struct component_ops fimc_component_ops = {
+ .bind = fimc_bind,
+ .unbind = fimc_unbind,
+};
+
static void fimc_put_clocks(struct fimc_context *ctx)
{
int i;
@@ -1559,7 +1168,7 @@ static void fimc_put_clocks(struct fimc_context *ctx)
static int fimc_setup_clocks(struct fimc_context *ctx)
{
- struct device *fimc_dev = ctx->ippdrv.dev;
+ struct device *fimc_dev = ctx->dev;
struct device *dev;
int ret, i;
@@ -1574,8 +1183,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx)
ctx->clocks[i] = clk_get(dev, fimc_clock_names[i]);
if (IS_ERR(ctx->clocks[i])) {
- if (i >= FIMC_CLK_MUX)
- break;
ret = PTR_ERR(ctx->clocks[i]);
dev_err(fimc_dev, "failed to get clock: %s\n",
fimc_clock_names[i]);
@@ -1583,20 +1190,6 @@ static int fimc_setup_clocks(struct fimc_context *ctx)
}
}
- /* Optional FIMC LCLK parent clock setting */
- if (!IS_ERR(ctx->clocks[FIMC_CLK_PARENT])) {
- ret = clk_set_parent(ctx->clocks[FIMC_CLK_MUX],
- ctx->clocks[FIMC_CLK_PARENT]);
- if (ret < 0) {
- dev_err(fimc_dev, "failed to set parent.\n");
- goto e_clk_free;
- }
- }
-
- ret = clk_set_rate(ctx->clocks[FIMC_CLK_LCLK], ctx->clk_frequency);
- if (ret < 0)
- goto e_clk_free;
-
ret = clk_prepare_enable(ctx->clocks[FIMC_CLK_LCLK]);
if (!ret)
return ret;
@@ -1605,57 +1198,118 @@ e_clk_free:
return ret;
}
-static int fimc_parse_dt(struct fimc_context *ctx)
+int exynos_drm_check_fimc_device(struct device *dev)
{
- struct device_node *node = ctx->ippdrv.dev->of_node;
+ unsigned int id = of_alias_get_id(dev->of_node, "fimc");
- /* Handle only devices that support the LCD Writeback data path */
- if (!of_property_read_bool(node, "samsung,lcd-wb"))
- return -ENODEV;
+ if (id >= 0 && (BIT(id) & fimc_mask))
+ return 0;
+ return -ENODEV;
+}
- if (of_property_read_u32(node, "clock-frequency",
- &ctx->clk_frequency))
- ctx->clk_frequency = FIMC_DEFAULT_LCLK_FREQUENCY;
+static const unsigned int fimc_formats[] = {
+ DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565,
+ DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+ DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+ DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+ DRM_FORMAT_YUV444,
+};
- ctx->id = of_alias_get_id(node, "fimc");
+static const unsigned int fimc_tiled_formats[] = {
+ DRM_FORMAT_NV12, DRM_FORMAT_NV21,
+};
- if (ctx->id < 0) {
- dev_err(ctx->ippdrv.dev, "failed to get node alias id.\n");
- return -EINVAL;
- }
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v1[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4224, 2 }, .v = { 16, 0, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1920 }, .v = { 128, 0 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
- return 0;
-}
+static const struct drm_exynos_ipp_limit fimc_4210_limits_v2[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, 8192, 8 }, .v = { 16, 8192, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 1920, 2 }, .v = { 16, 0, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 128, 1366 }, .v = { 128, 0 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v1[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 128, 1920, 2 }, .v = { 128, 0, 2 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
+
+static const struct drm_exynos_ipp_limit fimc_4210_limits_tiled_v2[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 128, 1920, 128 }, .v = { 32, 1920, 32 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 128, 1366, 2 }, .v = { 128, 0, 2 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 64, (1 << 16) * 64 },
+ .v = { (1 << 16) / 64, (1 << 16) * 64 }) },
+};
static int fimc_probe(struct platform_device *pdev)
{
+ const struct drm_exynos_ipp_limit *limits;
+ struct exynos_drm_ipp_formats *formats;
struct device *dev = &pdev->dev;
struct fimc_context *ctx;
struct resource *res;
- struct exynos_drm_ippdrv *ippdrv;
int ret;
+ int i, j, num_limits, num_formats;
- if (!dev->of_node) {
- dev_err(dev, "device tree node not found.\n");
+ if (exynos_drm_check_fimc_device(dev) != 0)
return -ENODEV;
- }
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- ctx->ippdrv.dev = dev;
+ ctx->dev = dev;
+ ctx->id = of_alias_get_id(dev->of_node, "fimc");
- ret = fimc_parse_dt(ctx);
- if (ret < 0)
- return ret;
+ /* construct formats/limits array */
+ num_formats = ARRAY_SIZE(fimc_formats) + ARRAY_SIZE(fimc_tiled_formats);
+ formats = devm_kzalloc(dev, sizeof(*formats) * num_formats, GFP_KERNEL);
+ if (!formats)
+ return -ENOMEM;
+
+ /* linear formats */
+ if (ctx->id < 3) {
+ limits = fimc_4210_limits_v1;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_v1);
+ } else {
+ limits = fimc_4210_limits_v2;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_v2);
+ }
+ for (i = 0; i < ARRAY_SIZE(fimc_formats); i++) {
+ formats[i].fourcc = fimc_formats[i];
+ formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[i].limits = limits;
+ formats[i].num_limits = num_limits;
+ }
- ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "samsung,sysreg");
- if (IS_ERR(ctx->sysreg)) {
- dev_err(dev, "syscon regmap lookup failed.\n");
- return PTR_ERR(ctx->sysreg);
+ /* tiled formats */
+ if (ctx->id < 3) {
+ limits = fimc_4210_limits_tiled_v1;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v1);
+ } else {
+ limits = fimc_4210_limits_tiled_v2;
+ num_limits = ARRAY_SIZE(fimc_4210_limits_tiled_v2);
}
+ for (j = i, i = 0; i < ARRAY_SIZE(fimc_tiled_formats); j++, i++) {
+ formats[j].fourcc = fimc_tiled_formats[i];
+ formats[j].modifier = DRM_FORMAT_MOD_SAMSUNG_64_32_TILE;
+ formats[j].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[j].limits = limits;
+ formats[j].num_limits = num_limits;
+ }
+
+ ctx->formats = formats;
+ ctx->num_formats = num_formats;
/* resource memory */
ctx->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1670,9 +1324,8 @@ static int fimc_probe(struct platform_device *pdev)
return -ENOENT;
}
- ctx->irq = res->start;
- ret = devm_request_threaded_irq(dev, ctx->irq, NULL, fimc_irq_handler,
- IRQF_ONESHOT, "drm_fimc", ctx);
+ ret = devm_request_irq(dev, res->start, fimc_irq_handler,
+ 0, dev_name(dev), ctx);
if (ret < 0) {
dev_err(dev, "failed to request irq.\n");
return ret;
@@ -1682,39 +1335,24 @@ static int fimc_probe(struct platform_device *pdev)
if (ret < 0)
return ret;
- ippdrv = &ctx->ippdrv;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &fimc_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &fimc_dst_ops;
- ippdrv->check_property = fimc_ippdrv_check_property;
- ippdrv->reset = fimc_ippdrv_reset;
- ippdrv->start = fimc_ippdrv_start;
- ippdrv->stop = fimc_ippdrv_stop;
- ret = fimc_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- goto err_put_clk;
- }
-
- DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
spin_lock_init(&ctx->lock);
platform_set_drvdata(pdev, ctx);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, FIMC_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm fimc device.\n");
+ ret = component_add(dev, &fimc_component_ops);
+ if (ret)
goto err_pm_dis;
- }
dev_info(dev, "drm fimc registered successfully.\n");
return 0;
err_pm_dis:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
-err_put_clk:
fimc_put_clocks(ctx);
return ret;
@@ -1724,42 +1362,24 @@ static int fimc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
struct fimc_context *ctx = get_fimc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- exynos_drm_ippdrv_unregister(ippdrv);
+ component_del(dev, &fimc_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
fimc_put_clocks(ctx);
- pm_runtime_set_suspended(dev);
- pm_runtime_disable(dev);
return 0;
}
#ifdef CONFIG_PM
-static int fimc_clk_ctrl(struct fimc_context *ctx, bool enable)
-{
- DRM_DEBUG_KMS("enable[%d]\n", enable);
-
- if (enable) {
- clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
- clk_prepare_enable(ctx->clocks[FIMC_CLK_WB_A]);
- ctx->suspended = false;
- } else {
- clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
- clk_disable_unprepare(ctx->clocks[FIMC_CLK_WB_A]);
- ctx->suspended = true;
- }
-
- return 0;
-}
-
static int fimc_runtime_suspend(struct device *dev)
{
struct fimc_context *ctx = get_fimc_context(dev);
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- return fimc_clk_ctrl(ctx, false);
+ clk_disable_unprepare(ctx->clocks[FIMC_CLK_GATE]);
+ return 0;
}
static int fimc_runtime_resume(struct device *dev)
@@ -1767,8 +1387,7 @@ static int fimc_runtime_resume(struct device *dev)
struct fimc_context *ctx = get_fimc_context(dev);
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
-
- return fimc_clk_ctrl(ctx, true);
+ return clk_prepare_enable(ctx->clocks[FIMC_CLK_GATE]);
}
#endif
@@ -1795,4 +1414,3 @@ struct platform_driver fimc_driver = {
.pm = &fimc_pm_ops,
},
};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimc.h b/drivers/gpu/drm/exynos/exynos_drm_fimc.h
deleted file mode 100644
index 127a424..0000000
--- a/drivers/gpu/drm/exynos/exynos_drm_fimc.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * Eunchul Kim <chulspro.kim@samsung.com>
- * Jinyoung Jeon <jy0.jeon@samsung.com>
- * Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_FIMC_H_
-#define _EXYNOS_DRM_FIMC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_FIMC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index d42ae2b..01b1570 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -121,6 +121,12 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
.has_limited_fmt = 1,
};
+static struct fimd_driver_data s5pv210_fimd_driver_data = {
+ .timing_base = 0x0,
+ .has_shadowcon = 1,
+ .has_clksel = 1,
+};
+
static struct fimd_driver_data exynos3_fimd_driver_data = {
.timing_base = 0x20000,
.lcdblk_offset = 0x210,
@@ -193,6 +199,8 @@ struct fimd_context {
static const struct of_device_id fimd_driver_dt_match[] = {
{ .compatible = "samsung,s3c6400-fimd",
.data = &s3c64xx_fimd_driver_data },
+ { .compatible = "samsung,s5pv210-fimd",
+ .data = &s5pv210_fimd_driver_data },
{ .compatible = "samsung,exynos3250-fimd",
.data = &exynos3_fimd_driver_data },
{ .compatible = "samsung,exynos4210-fimd",
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 11cc01b..6e1494f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -431,37 +431,24 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
return 0;
}
-int exynos_drm_gem_fault(struct vm_fault *vmf)
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
unsigned long pfn;
pgoff_t page_offset;
- int ret;
page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
if (page_offset >= (exynos_gem->size >> PAGE_SHIFT)) {
DRM_ERROR("invalid page offset\n");
- ret = -EINVAL;
- goto out;
+ return VM_FAULT_SIGBUS;
}
pfn = page_to_pfn(exynos_gem->pages[page_offset]);
- ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
-
-out:
- switch (ret) {
- case 0:
- case -ERESTARTSYS:
- case -EINTR:
- return VM_FAULT_NOPAGE;
- case -ENOMEM:
- return VM_FAULT_OOM;
- default:
- return VM_FAULT_SIGBUS;
- }
+ return vmf_insert_mixed(vma, vmf->address,
+ __pfn_to_pfn_t(pfn, PFN_DEV));
}
static int exynos_drm_gem_mmap_obj(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index 5a4c7de..9057d7f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -13,6 +13,7 @@
#define _EXYNOS_DRM_GEM_H_
#include <drm/drm_gem.h>
+#include <linux/mm_types.h>
#define to_exynos_gem(x) container_of(x, struct exynos_drm_gem, base)
@@ -111,7 +112,7 @@ int exynos_drm_gem_dumb_create(struct drm_file *file_priv,
struct drm_mode_create_dumb *args);
/* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_fault *vmf);
+vm_fault_t exynos_drm_gem_fault(struct vm_fault *vmf);
/* set vm_flags and we can change the vm attribute to other one at here. */
int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 0506b2b..e99dd1e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -12,18 +12,20 @@
*
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/platform_device.h>
#include <linux/clk.h>
#include <linux/pm_runtime.h>
#include <linux/mfd/syscon.h>
+#include <linux/of_device.h>
#include <linux/regmap.h>
#include <drm/drmP.h>
#include <drm/exynos_drm.h>
#include "regs-gsc.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
-#include "exynos_drm_gsc.h"
/*
* GSC stands for General SCaler and
@@ -31,26 +33,10 @@
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
* GSC supports image rotation and image effect functions.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> GSC H/W ----> Memory.
- * Writeback operation : supports cloned screen with FIMD.
- * FIMD ----> GSC H/W ----> Memory.
- * Output operation : supports direct display using local path.
- * Memory ----> GSC H/W ----> FIMD, Mixer.
*/
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. added check_prepare api for right register.
- * 5. need to add supported list in prop_list.
- * 6. check prescaler/scaler optimization.
- */
-#define GSC_MAX_DEVS 4
+#define GSC_MAX_CLOCKS 8
#define GSC_MAX_SRC 4
#define GSC_MAX_DST 16
#define GSC_RESET_TIMEOUT 50
@@ -65,8 +51,6 @@
#define GSC_SC_DOWN_RATIO_4_8 131072
#define GSC_SC_DOWN_RATIO_3_8 174762
#define GSC_SC_DOWN_RATIO_2_8 262144
-#define GSC_REFRESH_MIN 12
-#define GSC_REFRESH_MAX 60
#define GSC_CROP_MAX 8192
#define GSC_CROP_MIN 32
#define GSC_SCALE_MAX 4224
@@ -77,10 +61,9 @@
#define GSC_COEF_H_8T 8
#define GSC_COEF_V_4T 4
#define GSC_COEF_DEPTH 3
+#define GSC_AUTOSUSPEND_DELAY 2000
#define get_gsc_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct gsc_context, ippdrv);
#define gsc_read(offset) readl(ctx->regs + (offset))
#define gsc_write(cfg, offset) writel(cfg, ctx->regs + (offset))
@@ -104,50 +87,47 @@ struct gsc_scaler {
};
/*
- * A structure of scaler capability.
- *
- * find user manual 49.2 features.
- * @tile_w: tile mode or rotation width.
- * @tile_h: tile mode or rotation height.
- * @w: other cases width.
- * @h: other cases height.
- */
-struct gsc_capability {
- /* tile or rotation */
- u32 tile_w;
- u32 tile_h;
- /* other cases */
- u32 w;
- u32 h;
-};
-
-/*
* A structure of gsc context.
*
- * @ippdrv: prepare initialization using ippdrv.
* @regs_res: register resources.
* @regs: memory mapped io registers.
- * @sysreg: handle to SYSREG block regmap.
- * @lock: locking of operations.
* @gsc_clk: gsc gate clock.
* @sc: scaler infomations.
* @id: gsc id.
* @irq: irq number.
* @rotation: supports rotation of src.
- * @suspended: qos operations.
*/
struct gsc_context {
- struct exynos_drm_ippdrv ippdrv;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ struct exynos_drm_ipp_task *task;
+ struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+
struct resource *regs_res;
void __iomem *regs;
- struct regmap *sysreg;
- struct mutex lock;
- struct clk *gsc_clk;
+ const char **clk_names;
+ struct clk *clocks[GSC_MAX_CLOCKS];
+ int num_clocks;
struct gsc_scaler sc;
int id;
int irq;
bool rotation;
- bool suspended;
+};
+
+/**
+ * struct gsc_driverdata - per device type driver data for init time.
+ *
+ * @limits: picture size limits array
+ * @clk_names: names of clocks needed by this variant
+ * @num_clocks: the number of clocks needed by this variant
+ */
+struct gsc_driverdata {
+ const struct drm_exynos_ipp_limit *limits;
+ int num_limits;
+ const char *clk_names[GSC_MAX_CLOCKS];
+ int num_clocks;
};
/* 8-tap Filter Coefficient */
@@ -438,25 +418,6 @@ static int gsc_sw_reset(struct gsc_context *ctx)
return 0;
}
-static void gsc_set_gscblk_fimd_wb(struct gsc_context *ctx, bool enable)
-{
- unsigned int gscblk_cfg;
-
- if (!ctx->sysreg)
- return;
-
- regmap_read(ctx->sysreg, SYSREG_GSCBLK_CFG1, &gscblk_cfg);
-
- if (enable)
- gscblk_cfg |= GSC_BLK_DISP1WB_DEST(ctx->id) |
- GSC_BLK_GSCL_WB_IN_SRC_SEL(ctx->id) |
- GSC_BLK_SW_RESET_WB_DEST(ctx->id);
- else
- gscblk_cfg |= GSC_BLK_PXLASYNC_LO_MASK_WB(ctx->id);
-
- regmap_write(ctx->sysreg, SYSREG_GSCBLK_CFG1, gscblk_cfg);
-}
-
static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
bool overflow, bool done)
{
@@ -487,10 +448,8 @@ static void gsc_handle_irq(struct gsc_context *ctx, bool enable,
}
-static int gsc_src_set_fmt(struct device *dev, u32 fmt)
+static void gsc_src_set_fmt(struct gsc_context *ctx, u32 fmt)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -506,6 +465,7 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt)
cfg |= GSC_IN_RGB565;
break;
case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
cfg |= GSC_IN_XRGB8888;
break;
case DRM_FORMAT_BGRX8888:
@@ -548,115 +508,84 @@ static int gsc_src_set_fmt(struct device *dev, u32 fmt)
cfg |= (GSC_IN_CHROMA_ORDER_CBCR |
GSC_IN_YUV420_2P);
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
gsc_write(cfg, GSC_IN_CON);
-
- return 0;
}
-static int gsc_src_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
+static void gsc_src_set_transf(struct gsc_context *ctx, unsigned int rotation)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
+ unsigned int degree = rotation & DRM_MODE_ROTATE_MASK;
u32 cfg;
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
cfg = gsc_read(GSC_IN_CON);
cfg &= ~GSC_IN_ROT_MASK;
switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ case DRM_MODE_ROTATE_0:
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg |= GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg |= GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_90:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_90_XFLIP;
- else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_90_YFLIP;
- else
- cfg |= GSC_IN_ROT_90;
+ case DRM_MODE_ROTATE_90:
+ cfg |= GSC_IN_ROT_90;
+ if (rotation & DRM_MODE_REFLECT_Y)
+ cfg |= GSC_IN_ROT_XFLIP;
+ if (rotation & DRM_MODE_REFLECT_X)
+ cfg |= GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_180:
+ case DRM_MODE_ROTATE_180:
cfg |= GSC_IN_ROT_180;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~GSC_IN_ROT_YFLIP;
break;
- case EXYNOS_DRM_DEGREE_270:
+ case DRM_MODE_ROTATE_270:
cfg |= GSC_IN_ROT_270;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
+ if (rotation & DRM_MODE_REFLECT_Y)
cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
+ if (rotation & DRM_MODE_REFLECT_X)
cfg &= ~GSC_IN_ROT_YFLIP;
break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
}
gsc_write(cfg, GSC_IN_CON);
ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
- *swap = ctx->rotation;
-
- return 0;
}
-static int gsc_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_src_set_size(struct gsc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
struct gsc_scaler *sc = &ctx->sc;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
- swap, pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- }
-
/* pixel offset */
- cfg = (GSC_SRCIMG_OFFSET_X(img_pos.x) |
- GSC_SRCIMG_OFFSET_Y(img_pos.y));
+ cfg = (GSC_SRCIMG_OFFSET_X(buf->rect.x) |
+ GSC_SRCIMG_OFFSET_Y(buf->rect.y));
gsc_write(cfg, GSC_SRCIMG_OFFSET);
/* cropped size */
- cfg = (GSC_CROPPED_WIDTH(img_pos.w) |
- GSC_CROPPED_HEIGHT(img_pos.h));
+ cfg = (GSC_CROPPED_WIDTH(buf->rect.w) |
+ GSC_CROPPED_HEIGHT(buf->rect.h));
gsc_write(cfg, GSC_CROPPED_SIZE);
- DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
/* original size */
cfg = gsc_read(GSC_SRCIMG_SIZE);
cfg &= ~(GSC_SRCIMG_HEIGHT_MASK |
GSC_SRCIMG_WIDTH_MASK);
- cfg |= (GSC_SRCIMG_WIDTH(sz->hsize) |
- GSC_SRCIMG_HEIGHT(sz->vsize));
+ cfg |= (GSC_SRCIMG_WIDTH(buf->buf.width) |
+ GSC_SRCIMG_HEIGHT(buf->buf.height));
gsc_write(cfg, GSC_SRCIMG_SIZE);
cfg = gsc_read(GSC_IN_CON);
cfg &= ~GSC_IN_RGB_TYPE_MASK;
- DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
- if (pos->w >= GSC_WIDTH_ITU_709)
+ if (buf->rect.w >= GSC_WIDTH_ITU_709)
if (sc->range)
cfg |= GSC_IN_RGB_HD_WIDE;
else
@@ -668,103 +597,39 @@ static int gsc_src_set_size(struct device *dev, int swap,
cfg |= GSC_IN_RGB_SD_NARROW;
gsc_write(cfg, GSC_IN_CON);
-
- return 0;
}
-static int gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+ bool enqueue)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- bool masked;
+ bool masked = !enqueue;
u32 cfg;
u32 mask = 0x00000001 << buf_id;
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
/* mask register set */
cfg = gsc_read(GSC_IN_BASE_ADDR_Y_MASK);
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- masked = false;
- break;
- case IPP_BUF_DEQUEUE:
- masked = true;
- break;
- default:
- dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
- return -EINVAL;
- }
-
/* sequence id */
cfg &= ~mask;
cfg |= masked << buf_id;
gsc_write(cfg, GSC_IN_BASE_ADDR_Y_MASK);
gsc_write(cfg, GSC_IN_BASE_ADDR_CB_MASK);
gsc_write(cfg, GSC_IN_BASE_ADDR_CR_MASK);
-
- return 0;
}
-static int gsc_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_src_set_addr(struct gsc_context *ctx, u32 buf_id,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EFAULT;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > GSC_MAX_SRC) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -EINVAL;
- }
-
/* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
- GSC_IN_BASE_ADDR_Y(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
- GSC_IN_BASE_ADDR_CB(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
- GSC_IN_BASE_ADDR_CR(buf_id));
- break;
- case IPP_BUF_DEQUEUE:
- gsc_write(0x0, GSC_IN_BASE_ADDR_Y(buf_id));
- gsc_write(0x0, GSC_IN_BASE_ADDR_CB(buf_id));
- gsc_write(0x0, GSC_IN_BASE_ADDR_CR(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
+ gsc_write(buf->dma_addr[0], GSC_IN_BASE_ADDR_Y(buf_id));
+ gsc_write(buf->dma_addr[1], GSC_IN_BASE_ADDR_CB(buf_id));
+ gsc_write(buf->dma_addr[2], GSC_IN_BASE_ADDR_CR(buf_id));
- return gsc_src_set_buf_seq(ctx, buf_id, buf_type);
+ gsc_src_set_buf_seq(ctx, buf_id, true);
}
-static struct exynos_drm_ipp_ops gsc_src_ops = {
- .set_fmt = gsc_src_set_fmt,
- .set_transf = gsc_src_set_transf,
- .set_size = gsc_src_set_size,
- .set_addr = gsc_src_set_addr,
-};
-
-static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
+static void gsc_dst_set_fmt(struct gsc_context *ctx, u32 fmt)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
DRM_DEBUG_KMS("fmt[0x%x]\n", fmt);
@@ -779,8 +644,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_RGB565:
cfg |= GSC_OUT_RGB565;
break;
+ case DRM_FORMAT_ARGB8888:
case DRM_FORMAT_XRGB8888:
- cfg |= GSC_OUT_XRGB8888;
+ cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_GLOBAL_ALPHA(0xff));
break;
case DRM_FORMAT_BGRX8888:
cfg |= (GSC_OUT_XRGB8888 | GSC_OUT_RB_SWAP);
@@ -819,69 +685,9 @@ static int gsc_dst_set_fmt(struct device *dev, u32 fmt)
cfg |= (GSC_OUT_CHROMA_ORDER_CBCR |
GSC_OUT_YUV420_2P);
break;
- default:
- dev_err(ippdrv->dev, "invalid target yuv order 0x%x.\n", fmt);
- return -EINVAL;
}
gsc_write(cfg, GSC_OUT_CON);
-
- return 0;
-}
-
-static int gsc_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- u32 cfg;
-
- DRM_DEBUG_KMS("degree[%d]flip[0x%x]\n", degree, flip);
-
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~GSC_IN_ROT_MASK;
-
- switch (degree) {
- case EXYNOS_DRM_DEGREE_0:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_YFLIP;
- break;
- case EXYNOS_DRM_DEGREE_90:
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg |= GSC_IN_ROT_90_XFLIP;
- else if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg |= GSC_IN_ROT_90_YFLIP;
- else
- cfg |= GSC_IN_ROT_90;
- break;
- case EXYNOS_DRM_DEGREE_180:
- cfg |= GSC_IN_ROT_180;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg &= ~GSC_IN_ROT_YFLIP;
- break;
- case EXYNOS_DRM_DEGREE_270:
- cfg |= GSC_IN_ROT_270;
- if (flip & EXYNOS_DRM_FLIP_VERTICAL)
- cfg &= ~GSC_IN_ROT_XFLIP;
- if (flip & EXYNOS_DRM_FLIP_HORIZONTAL)
- cfg &= ~GSC_IN_ROT_YFLIP;
- break;
- default:
- dev_err(ippdrv->dev, "invalid degree value %d.\n", degree);
- return -EINVAL;
- }
-
- gsc_write(cfg, GSC_IN_CON);
-
- ctx->rotation = (cfg & GSC_IN_ROT_90) ? 1 : 0;
- *swap = ctx->rotation;
-
- return 0;
}
static int gsc_get_ratio_shift(u32 src, u32 dst, u32 *ratio)
@@ -919,9 +725,9 @@ static void gsc_get_prescaler_shfactor(u32 hratio, u32 vratio, u32 *shfactor)
}
static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
- struct drm_exynos_pos *src, struct drm_exynos_pos *dst)
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
u32 cfg;
u32 src_w, src_h, dst_w, dst_h;
int ret = 0;
@@ -939,13 +745,13 @@ static int gsc_set_prescaler(struct gsc_context *ctx, struct gsc_scaler *sc,
ret = gsc_get_ratio_shift(src_w, dst_w, &sc->pre_hratio);
if (ret) {
- dev_err(ippdrv->dev, "failed to get ratio horizontal.\n");
+ dev_err(ctx->dev, "failed to get ratio horizontal.\n");
return ret;
}
ret = gsc_get_ratio_shift(src_h, dst_h, &sc->pre_vratio);
if (ret) {
- dev_err(ippdrv->dev, "failed to get ratio vertical.\n");
+ dev_err(ctx->dev, "failed to get ratio vertical.\n");
return ret;
}
@@ -1039,47 +845,37 @@ static void gsc_set_scaler(struct gsc_context *ctx, struct gsc_scaler *sc)
gsc_write(cfg, GSC_MAIN_V_RATIO);
}
-static int gsc_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos, struct drm_exynos_sz *sz)
+static void gsc_dst_set_size(struct gsc_context *ctx,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_pos img_pos = *pos;
struct gsc_scaler *sc = &ctx->sc;
u32 cfg;
- DRM_DEBUG_KMS("swap[%d]x[%d]y[%d]w[%d]h[%d]\n",
- swap, pos->x, pos->y, pos->w, pos->h);
-
- if (swap) {
- img_pos.w = pos->h;
- img_pos.h = pos->w;
- }
-
/* pixel offset */
- cfg = (GSC_DSTIMG_OFFSET_X(pos->x) |
- GSC_DSTIMG_OFFSET_Y(pos->y));
+ cfg = (GSC_DSTIMG_OFFSET_X(buf->rect.x) |
+ GSC_DSTIMG_OFFSET_Y(buf->rect.y));
gsc_write(cfg, GSC_DSTIMG_OFFSET);
/* scaled size */
- cfg = (GSC_SCALED_WIDTH(img_pos.w) | GSC_SCALED_HEIGHT(img_pos.h));
+ if (ctx->rotation)
+ cfg = (GSC_SCALED_WIDTH(buf->rect.h) |
+ GSC_SCALED_HEIGHT(buf->rect.w));
+ else
+ cfg = (GSC_SCALED_WIDTH(buf->rect.w) |
+ GSC_SCALED_HEIGHT(buf->rect.h));
gsc_write(cfg, GSC_SCALED_SIZE);
- DRM_DEBUG_KMS("hsize[%d]vsize[%d]\n", sz->hsize, sz->vsize);
-
/* original size */
cfg = gsc_read(GSC_DSTIMG_SIZE);
- cfg &= ~(GSC_DSTIMG_HEIGHT_MASK |
- GSC_DSTIMG_WIDTH_MASK);
- cfg |= (GSC_DSTIMG_WIDTH(sz->hsize) |
- GSC_DSTIMG_HEIGHT(sz->vsize));
+ cfg &= ~(GSC_DSTIMG_HEIGHT_MASK | GSC_DSTIMG_WIDTH_MASK);
+ cfg |= GSC_DSTIMG_WIDTH(buf->buf.width) |
+ GSC_DSTIMG_HEIGHT(buf->buf.height);
gsc_write(cfg, GSC_DSTIMG_SIZE);
cfg = gsc_read(GSC_OUT_CON);
cfg &= ~GSC_OUT_RGB_TYPE_MASK;
- DRM_DEBUG_KMS("width[%d]range[%d]\n", pos->w, sc->range);
-
- if (pos->w >= GSC_WIDTH_ITU_709)
+ if (buf->rect.w >= GSC_WIDTH_ITU_709)
if (sc->range)
cfg |= GSC_OUT_RGB_HD_WIDE;
else
@@ -1091,8 +887,6 @@ static int gsc_dst_set_size(struct device *dev, int swap,
cfg |= GSC_OUT_RGB_SD_NARROW;
gsc_write(cfg, GSC_OUT_CON);
-
- return 0;
}
static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
@@ -1111,35 +905,16 @@ static int gsc_dst_get_buf_seq(struct gsc_context *ctx)
return buf_num;
}
-static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
+ bool enqueue)
{
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- bool masked;
+ bool masked = !enqueue;
u32 cfg;
u32 mask = 0x00000001 << buf_id;
- int ret = 0;
-
- DRM_DEBUG_KMS("buf_id[%d]buf_type[%d]\n", buf_id, buf_type);
-
- mutex_lock(&ctx->lock);
/* mask register set */
cfg = gsc_read(GSC_OUT_BASE_ADDR_Y_MASK);
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- masked = false;
- break;
- case IPP_BUF_DEQUEUE:
- masked = true;
- break;
- default:
- dev_err(ippdrv->dev, "invalid buf ctrl parameter.\n");
- ret = -EINVAL;
- goto err_unlock;
- }
-
/* sequence id */
cfg &= ~mask;
cfg |= masked << buf_id;
@@ -1148,94 +923,29 @@ static int gsc_dst_set_buf_seq(struct gsc_context *ctx, u32 buf_id,
gsc_write(cfg, GSC_OUT_BASE_ADDR_CR_MASK);
/* interrupt enable */
- if (buf_type == IPP_BUF_ENQUEUE &&
- gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
+ if (enqueue && gsc_dst_get_buf_seq(ctx) >= GSC_BUF_START)
gsc_handle_irq(ctx, true, false, true);
/* interrupt disable */
- if (buf_type == IPP_BUF_DEQUEUE &&
- gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
+ if (!enqueue && gsc_dst_get_buf_seq(ctx) <= GSC_BUF_STOP)
gsc_handle_irq(ctx, false, false, true);
-
-err_unlock:
- mutex_unlock(&ctx->lock);
- return ret;
}
-static int gsc_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info, u32 buf_id,
- enum drm_exynos_ipp_buf_type buf_type)
+static void gsc_dst_set_addr(struct gsc_context *ctx,
+ u32 buf_id, struct exynos_drm_ipp_buffer *buf)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EFAULT;
- }
-
- property = &c_node->property;
-
- DRM_DEBUG_KMS("prop_id[%d]buf_id[%d]buf_type[%d]\n",
- property->prop_id, buf_id, buf_type);
-
- if (buf_id > GSC_MAX_DST) {
- dev_info(ippdrv->dev, "invalid buf_id %d.\n", buf_id);
- return -EINVAL;
- }
-
/* address register set */
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_Y],
- GSC_OUT_BASE_ADDR_Y(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CB],
- GSC_OUT_BASE_ADDR_CB(buf_id));
- gsc_write(buf_info->base[EXYNOS_DRM_PLANAR_CR],
- GSC_OUT_BASE_ADDR_CR(buf_id));
- break;
- case IPP_BUF_DEQUEUE:
- gsc_write(0x0, GSC_OUT_BASE_ADDR_Y(buf_id));
- gsc_write(0x0, GSC_OUT_BASE_ADDR_CB(buf_id));
- gsc_write(0x0, GSC_OUT_BASE_ADDR_CR(buf_id));
- break;
- default:
- /* bypass */
- break;
- }
+ gsc_write(buf->dma_addr[0], GSC_OUT_BASE_ADDR_Y(buf_id));
+ gsc_write(buf->dma_addr[1], GSC_OUT_BASE_ADDR_CB(buf_id));
+ gsc_write(buf->dma_addr[2], GSC_OUT_BASE_ADDR_CR(buf_id));
- return gsc_dst_set_buf_seq(ctx, buf_id, buf_type);
-}
-
-static struct exynos_drm_ipp_ops gsc_dst_ops = {
- .set_fmt = gsc_dst_set_fmt,
- .set_transf = gsc_dst_set_transf,
- .set_size = gsc_dst_set_size,
- .set_addr = gsc_dst_set_addr,
-};
-
-static int gsc_clk_ctrl(struct gsc_context *ctx, bool enable)
-{
- DRM_DEBUG_KMS("enable[%d]\n", enable);
-
- if (enable) {
- clk_prepare_enable(ctx->gsc_clk);
- ctx->suspended = false;
- } else {
- clk_disable_unprepare(ctx->gsc_clk);
- ctx->suspended = true;
- }
-
- return 0;
+ gsc_dst_set_buf_seq(ctx, buf_id, true);
}
static int gsc_get_src_buf_index(struct gsc_context *ctx)
{
u32 cfg, curr_index, i;
u32 buf_id = GSC_MAX_SRC;
- int ret;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
@@ -1249,19 +959,15 @@ static int gsc_get_src_buf_index(struct gsc_context *ctx)
}
}
+ DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
+ curr_index, buf_id);
+
if (buf_id == GSC_MAX_SRC) {
DRM_ERROR("failed to get in buffer index.\n");
return -EINVAL;
}
- ret = gsc_src_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
- if (ret < 0) {
- DRM_ERROR("failed to dequeue.\n");
- return ret;
- }
-
- DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
- curr_index, buf_id);
+ gsc_src_set_buf_seq(ctx, buf_id, false);
return buf_id;
}
@@ -1270,7 +976,6 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
{
u32 cfg, curr_index, i;
u32 buf_id = GSC_MAX_DST;
- int ret;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
@@ -1289,11 +994,7 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
return -EINVAL;
}
- ret = gsc_dst_set_buf_seq(ctx, buf_id, IPP_BUF_DEQUEUE);
- if (ret < 0) {
- DRM_ERROR("failed to dequeue.\n");
- return ret;
- }
+ gsc_dst_set_buf_seq(ctx, buf_id, false);
DRM_DEBUG_KMS("cfg[0x%x]curr_index[%d]buf_id[%d]\n", cfg,
curr_index, buf_id);
@@ -1304,215 +1005,55 @@ static int gsc_get_dst_buf_index(struct gsc_context *ctx)
static irqreturn_t gsc_irq_handler(int irq, void *dev_id)
{
struct gsc_context *ctx = dev_id;
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work =
- c_node->event_work;
u32 status;
- int buf_id[EXYNOS_DRM_OPS_MAX];
+ int err = 0;
DRM_DEBUG_KMS("gsc id[%d]\n", ctx->id);
status = gsc_read(GSC_IRQ);
if (status & GSC_IRQ_STATUS_OR_IRQ) {
- dev_err(ippdrv->dev, "occurred overflow at %d, status 0x%x.\n",
+ dev_err(ctx->dev, "occurred overflow at %d, status 0x%x.\n",
ctx->id, status);
- return IRQ_NONE;
+ err = -EINVAL;
}
if (status & GSC_IRQ_STATUS_OR_FRM_DONE) {
- dev_dbg(ippdrv->dev, "occurred frame done at %d, status 0x%x.\n",
- ctx->id, status);
-
- buf_id[EXYNOS_DRM_OPS_SRC] = gsc_get_src_buf_index(ctx);
- if (buf_id[EXYNOS_DRM_OPS_SRC] < 0)
- return IRQ_HANDLED;
-
- buf_id[EXYNOS_DRM_OPS_DST] = gsc_get_dst_buf_index(ctx);
- if (buf_id[EXYNOS_DRM_OPS_DST] < 0)
- return IRQ_HANDLED;
-
- DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n",
- buf_id[EXYNOS_DRM_OPS_SRC], buf_id[EXYNOS_DRM_OPS_DST]);
-
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_SRC] =
- buf_id[EXYNOS_DRM_OPS_SRC];
- event_work->buf_id[EXYNOS_DRM_OPS_DST] =
- buf_id[EXYNOS_DRM_OPS_DST];
- queue_work(ippdrv->event_workq, &event_work->work);
- }
-
- return IRQ_HANDLED;
-}
-
-static int gsc_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->writeback = 1;
- prop_list->refresh_min = GSC_REFRESH_MIN;
- prop_list->refresh_max = GSC_REFRESH_MAX;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 1;
- prop_list->crop = 1;
- prop_list->crop_max.hsize = GSC_CROP_MAX;
- prop_list->crop_max.vsize = GSC_CROP_MAX;
- prop_list->crop_min.hsize = GSC_CROP_MIN;
- prop_list->crop_min.vsize = GSC_CROP_MIN;
- prop_list->scale = 1;
- prop_list->scale_max.hsize = GSC_SCALE_MAX;
- prop_list->scale_max.vsize = GSC_SCALE_MAX;
- prop_list->scale_min.hsize = GSC_SCALE_MIN;
- prop_list->scale_min.vsize = GSC_SCALE_MIN;
-
- return 0;
-}
-
-static inline bool gsc_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
-
-static int gsc_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
-{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_prop_list *pp = &ippdrv->prop_list;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos *pos;
- struct drm_exynos_sz *sz;
- bool swap;
- int i;
-
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
+ int src_buf_id, dst_buf_id;
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- /* check for flip */
- if (!gsc_check_drm_flip(config->flip)) {
- DRM_ERROR("invalid flip.\n");
- goto err_property;
- }
-
- /* check for degree */
- switch (config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- break;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- swap = false;
- break;
- default:
- DRM_ERROR("invalid degree.\n");
- goto err_property;
- }
+ dev_dbg(ctx->dev, "occurred frame done at %d, status 0x%x.\n",
+ ctx->id, status);
- /* check for buffer bound */
- if ((pos->x + pos->w > sz->hsize) ||
- (pos->y + pos->h > sz->vsize)) {
- DRM_ERROR("out of buf bound.\n");
- goto err_property;
- }
+ src_buf_id = gsc_get_src_buf_index(ctx);
+ dst_buf_id = gsc_get_dst_buf_index(ctx);
- /* check for crop */
- if ((i == EXYNOS_DRM_OPS_SRC) && (pp->crop)) {
- if (swap) {
- if ((pos->h < pp->crop_min.hsize) ||
- (sz->vsize > pp->crop_max.hsize) ||
- (pos->w < pp->crop_min.vsize) ||
- (sz->hsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->crop_min.hsize) ||
- (sz->hsize > pp->crop_max.hsize) ||
- (pos->h < pp->crop_min.vsize) ||
- (sz->vsize > pp->crop_max.vsize)) {
- DRM_ERROR("out of crop size.\n");
- goto err_property;
- }
- }
- }
+ DRM_DEBUG_KMS("buf_id_src[%d]buf_id_dst[%d]\n", src_buf_id,
+ dst_buf_id);
- /* check for scale */
- if ((i == EXYNOS_DRM_OPS_DST) && (pp->scale)) {
- if (swap) {
- if ((pos->h < pp->scale_min.hsize) ||
- (sz->vsize > pp->scale_max.hsize) ||
- (pos->w < pp->scale_min.vsize) ||
- (sz->hsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- } else {
- if ((pos->w < pp->scale_min.hsize) ||
- (sz->hsize > pp->scale_max.hsize) ||
- (pos->h < pp->scale_min.vsize) ||
- (sz->vsize > pp->scale_max.vsize)) {
- DRM_ERROR("out of scale size.\n");
- goto err_property;
- }
- }
- }
+ if (src_buf_id < 0 || dst_buf_id < 0)
+ err = -EINVAL;
}
- return 0;
-
-err_property:
- for_each_ipp_ops(i) {
- if ((i == EXYNOS_DRM_OPS_SRC) &&
- (property->cmd == IPP_CMD_WB))
- continue;
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- config = &property->config[i];
- pos = &config->pos;
- sz = &config->sz;
-
- DRM_ERROR("[%s]f[%d]r[%d]pos[%d %d %d %d]sz[%d %d]\n",
- i ? "dst" : "src", config->flip, config->degree,
- pos->x, pos->y, pos->w, pos->h,
- sz->hsize, sz->vsize);
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, err);
}
- return -EINVAL;
+ return IRQ_HANDLED;
}
-
-static int gsc_ippdrv_reset(struct device *dev)
+static int gsc_reset(struct gsc_context *ctx)
{
- struct gsc_context *ctx = get_gsc_context(dev);
struct gsc_scaler *sc = &ctx->sc;
int ret;
/* reset h/w block */
ret = gsc_sw_reset(ctx);
if (ret < 0) {
- dev_err(dev, "failed to reset hardware.\n");
+ dev_err(ctx->dev, "failed to reset hardware.\n");
return ret;
}
@@ -1523,166 +1064,172 @@ static int gsc_ippdrv_reset(struct device *dev)
return 0;
}
-static int gsc_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_start(struct gsc_context *ctx)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_property *property;
- struct drm_exynos_ipp_config *config;
- struct drm_exynos_pos img_pos[EXYNOS_DRM_OPS_MAX];
- struct drm_exynos_ipp_set_wb set_wb;
u32 cfg;
- int ret, i;
-
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
-
- if (!c_node) {
- DRM_ERROR("failed to get c_node.\n");
- return -EINVAL;
- }
-
- property = &c_node->property;
gsc_handle_irq(ctx, true, false, true);
- for_each_ipp_ops(i) {
- config = &property->config[i];
- img_pos[i] = config->pos;
- }
+ /* enable one shot */
+ cfg = gsc_read(GSC_ENABLE);
+ cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
+ GSC_ENABLE_CLK_GATE_MODE_MASK);
+ cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
+ gsc_write(cfg, GSC_ENABLE);
- switch (cmd) {
- case IPP_CMD_M2M:
- /* enable one shot */
- cfg = gsc_read(GSC_ENABLE);
- cfg &= ~(GSC_ENABLE_ON_CLEAR_MASK |
- GSC_ENABLE_CLK_GATE_MODE_MASK);
- cfg |= GSC_ENABLE_ON_CLEAR_ONESHOT;
- gsc_write(cfg, GSC_ENABLE);
-
- /* src dma memory */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= GSC_IN_PATH_MEMORY;
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst dma memory */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- case IPP_CMD_WB:
- set_wb.enable = 1;
- set_wb.refresh = property->refresh_rate;
- gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
-
- /* src local path */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= (GSC_IN_PATH_LOCAL | GSC_IN_LOCAL_FIMD_WB);
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst dma memory */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- case IPP_CMD_OUTPUT:
- /* src dma memory */
- cfg = gsc_read(GSC_IN_CON);
- cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
- cfg |= GSC_IN_PATH_MEMORY;
- gsc_write(cfg, GSC_IN_CON);
-
- /* dst local path */
- cfg = gsc_read(GSC_OUT_CON);
- cfg |= GSC_OUT_PATH_MEMORY;
- gsc_write(cfg, GSC_OUT_CON);
- break;
- default:
- ret = -EINVAL;
- dev_err(dev, "invalid operations.\n");
- return ret;
- }
+ /* src dma memory */
+ cfg = gsc_read(GSC_IN_CON);
+ cfg &= ~(GSC_IN_PATH_MASK | GSC_IN_LOCAL_SEL_MASK);
+ cfg |= GSC_IN_PATH_MEMORY;
+ gsc_write(cfg, GSC_IN_CON);
- ret = gsc_set_prescaler(ctx, &ctx->sc,
- &img_pos[EXYNOS_DRM_OPS_SRC],
- &img_pos[EXYNOS_DRM_OPS_DST]);
- if (ret) {
- dev_err(dev, "failed to set prescaler.\n");
- return ret;
- }
+ /* dst dma memory */
+ cfg = gsc_read(GSC_OUT_CON);
+ cfg |= GSC_OUT_PATH_MEMORY;
+ gsc_write(cfg, GSC_OUT_CON);
gsc_set_scaler(ctx, &ctx->sc);
cfg = gsc_read(GSC_ENABLE);
cfg |= GSC_ENABLE_ON;
gsc_write(cfg, GSC_ENABLE);
+}
+
+static int gsc_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct gsc_context *ctx = container_of(ipp, struct gsc_context, ipp);
+ int ret;
+
+ pm_runtime_get_sync(ctx->dev);
+ ctx->task = task;
+
+ ret = gsc_reset(ctx);
+ if (ret) {
+ pm_runtime_put_autosuspend(ctx->dev);
+ ctx->task = NULL;
+ return ret;
+ }
+
+ gsc_src_set_fmt(ctx, task->src.buf.fourcc);
+ gsc_src_set_transf(ctx, task->transform.rotation);
+ gsc_src_set_size(ctx, &task->src);
+ gsc_src_set_addr(ctx, 0, &task->src);
+ gsc_dst_set_fmt(ctx, task->dst.buf.fourcc);
+ gsc_dst_set_size(ctx, &task->dst);
+ gsc_dst_set_addr(ctx, 0, &task->dst);
+ gsc_set_prescaler(ctx, &ctx->sc, &task->src.rect, &task->dst.rect);
+ gsc_start(ctx);
return 0;
}
-static void gsc_ippdrv_stop(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void gsc_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
{
- struct gsc_context *ctx = get_gsc_context(dev);
- struct drm_exynos_ipp_set_wb set_wb = {0, 0};
- u32 cfg;
+ struct gsc_context *ctx =
+ container_of(ipp, struct gsc_context, ipp);
- DRM_DEBUG_KMS("cmd[%d]\n", cmd);
+ gsc_reset(ctx);
+ if (ctx->task) {
+ struct exynos_drm_ipp_task *task = ctx->task;
- switch (cmd) {
- case IPP_CMD_M2M:
- /* bypass */
- break;
- case IPP_CMD_WB:
- gsc_set_gscblk_fimd_wb(ctx, set_wb.enable);
- exynos_drm_ippnb_send_event(IPP_SET_WRITEBACK, (void *)&set_wb);
- break;
- case IPP_CMD_OUTPUT:
- default:
- dev_err(dev, "invalid operations.\n");
- break;
+ ctx->task = NULL;
+ pm_runtime_mark_last_busy(ctx->dev);
+ pm_runtime_put_autosuspend(ctx->dev);
+ exynos_drm_ipp_task_done(task, -EIO);
}
+}
- gsc_handle_irq(ctx, false, false, true);
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = gsc_commit,
+ .abort = gsc_abort,
+};
- /* reset sequence */
- gsc_write(0xff, GSC_OUT_BASE_ADDR_Y_MASK);
- gsc_write(0xff, GSC_OUT_BASE_ADDR_CB_MASK);
- gsc_write(0xff, GSC_OUT_BASE_ADDR_CR_MASK);
+static int gsc_bind(struct device *dev, struct device *master, void *data)
+{
+ struct gsc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
- cfg = gsc_read(GSC_ENABLE);
- cfg &= ~GSC_ENABLE_ON;
- gsc_write(cfg, GSC_ENABLE);
+ ctx->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ ctx->formats, ctx->num_formats, "gsc");
+
+ dev_info(dev, "The exynos gscaler has been probed successfully\n");
+
+ return 0;
+}
+
+static void gsc_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct gsc_context *ctx = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &ctx->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(drm_dev, dev);
}
+static const struct component_ops gsc_component_ops = {
+ .bind = gsc_bind,
+ .unbind = gsc_unbind,
+};
+
+static const unsigned int gsc_formats[] = {
+ DRM_FORMAT_ARGB8888,
+ DRM_FORMAT_XRGB8888, DRM_FORMAT_RGB565, DRM_FORMAT_BGRX8888,
+ DRM_FORMAT_NV12, DRM_FORMAT_NV16, DRM_FORMAT_NV21, DRM_FORMAT_NV61,
+ DRM_FORMAT_UYVY, DRM_FORMAT_VYUY, DRM_FORMAT_YUYV, DRM_FORMAT_YVYU,
+ DRM_FORMAT_YUV420, DRM_FORMAT_YVU420, DRM_FORMAT_YUV422,
+};
+
static int gsc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct gsc_driverdata *driver_data;
+ struct exynos_drm_ipp_formats *formats;
struct gsc_context *ctx;
struct resource *res;
- struct exynos_drm_ippdrv *ippdrv;
- int ret;
+ int ret, i;
ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
- if (dev->of_node) {
- ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
- "samsung,sysreg");
- if (IS_ERR(ctx->sysreg)) {
- dev_warn(dev, "failed to get system register.\n");
- ctx->sysreg = NULL;
- }
+ formats = devm_kzalloc(dev, sizeof(*formats) *
+ (ARRAY_SIZE(gsc_formats)), GFP_KERNEL);
+ if (!formats)
+ return -ENOMEM;
+
+ driver_data = (struct gsc_driverdata *)of_device_get_match_data(dev);
+ ctx->dev = dev;
+ ctx->num_clocks = driver_data->num_clocks;
+ ctx->clk_names = driver_data->clk_names;
+
+ for (i = 0; i < ARRAY_SIZE(gsc_formats); i++) {
+ formats[i].fourcc = gsc_formats[i];
+ formats[i].type = DRM_EXYNOS_IPP_FORMAT_SOURCE |
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION;
+ formats[i].limits = driver_data->limits;
+ formats[i].num_limits = driver_data->num_limits;
}
+ ctx->formats = formats;
+ ctx->num_formats = ARRAY_SIZE(gsc_formats);
/* clock control */
- ctx->gsc_clk = devm_clk_get(dev, "gscl");
- if (IS_ERR(ctx->gsc_clk)) {
- dev_err(dev, "failed to get gsc clock.\n");
- return PTR_ERR(ctx->gsc_clk);
+ for (i = 0; i < ctx->num_clocks; i++) {
+ ctx->clocks[i] = devm_clk_get(dev, ctx->clk_names[i]);
+ if (IS_ERR(ctx->clocks[i])) {
+ dev_err(dev, "failed to get clock: %s\n",
+ ctx->clk_names[i]);
+ return PTR_ERR(ctx->clocks[i]);
+ }
}
/* resource memory */
@@ -1699,8 +1246,8 @@ static int gsc_probe(struct platform_device *pdev)
}
ctx->irq = res->start;
- ret = devm_request_threaded_irq(dev, ctx->irq, NULL, gsc_irq_handler,
- IRQF_ONESHOT, "drm_gsc", ctx);
+ ret = devm_request_irq(dev, ctx->irq, gsc_irq_handler, 0,
+ dev_name(dev), ctx);
if (ret < 0) {
dev_err(dev, "failed to request irq.\n");
return ret;
@@ -1709,38 +1256,22 @@ static int gsc_probe(struct platform_device *pdev)
/* context initailization */
ctx->id = pdev->id;
- ippdrv = &ctx->ippdrv;
- ippdrv->dev = dev;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &gsc_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &gsc_dst_ops;
- ippdrv->check_property = gsc_ippdrv_check_property;
- ippdrv->reset = gsc_ippdrv_reset;
- ippdrv->start = gsc_ippdrv_start;
- ippdrv->stop = gsc_ippdrv_stop;
- ret = gsc_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- return ret;
- }
-
- DRM_DEBUG_KMS("id[%d]ippdrv[%pK]\n", ctx->id, ippdrv);
-
- mutex_init(&ctx->lock);
platform_set_drvdata(pdev, ctx);
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, GSC_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm gsc device.\n");
- goto err_ippdrv_register;
- }
+ ret = component_add(dev, &gsc_component_ops);
+ if (ret)
+ goto err_pm_dis;
dev_info(dev, "drm gsc registered successfully.\n");
return 0;
-err_ippdrv_register:
+err_pm_dis:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return ret;
}
@@ -1748,13 +1279,8 @@ err_ippdrv_register:
static int gsc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct gsc_context *ctx = get_gsc_context(dev);
- struct exynos_drm_ippdrv *ippdrv = &ctx->ippdrv;
- exynos_drm_ippdrv_unregister(ippdrv);
- mutex_destroy(&ctx->lock);
-
- pm_runtime_set_suspended(dev);
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return 0;
@@ -1763,19 +1289,32 @@ static int gsc_remove(struct platform_device *pdev)
static int __maybe_unused gsc_runtime_suspend(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
+ int i;
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
- return gsc_clk_ctrl(ctx, false);
+ for (i = ctx->num_clocks - 1; i >= 0; i--)
+ clk_disable_unprepare(ctx->clocks[i]);
+
+ return 0;
}
static int __maybe_unused gsc_runtime_resume(struct device *dev)
{
struct gsc_context *ctx = get_gsc_context(dev);
+ int i, ret;
DRM_DEBUG_KMS("id[%d]\n", ctx->id);
- return gsc_clk_ctrl(ctx, true);
+ for (i = 0; i < ctx->num_clocks; i++) {
+ ret = clk_prepare_enable(ctx->clocks[i]);
+ if (ret) {
+ while (--i > 0)
+ clk_disable_unprepare(ctx->clocks[i]);
+ return ret;
+ }
+ }
+ return 0;
}
static const struct dev_pm_ops gsc_pm_ops = {
@@ -1784,9 +1323,66 @@ static const struct dev_pm_ops gsc_pm_ops = {
SET_RUNTIME_PM_OPS(gsc_runtime_suspend, gsc_runtime_resume, NULL)
};
+static const struct drm_exynos_ipp_limit gsc_5250_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2048 }, .v = { 16, 2048 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5420_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 4800, 8 }, .v = { 16, 3344, 8 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 2 }, .v = { 8, 3344, 2 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 16, 2016 }, .v = { 8, 2016 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static const struct drm_exynos_ipp_limit gsc_5433_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, 8191, 2 }, .v = { 16, 8191, 2 }) },
+ { IPP_SIZE_LIMIT(AREA, .h = { 16, 4800, 1 }, .v = { 8, 3344, 1 }) },
+ { IPP_SIZE_LIMIT(ROTATED, .h = { 32, 2047 }, .v = { 8, 8191 }) },
+ { IPP_SCALE_LIMIT(.h = { (1 << 16) / 16, (1 << 16) * 8 },
+ .v = { (1 << 16) / 16, (1 << 16) * 8 }) },
+};
+
+static struct gsc_driverdata gsc_exynos5250_drvdata = {
+ .clk_names = {"gscl"},
+ .num_clocks = 1,
+ .limits = gsc_5250_limits,
+ .num_limits = ARRAY_SIZE(gsc_5250_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5420_drvdata = {
+ .clk_names = {"gscl"},
+ .num_clocks = 1,
+ .limits = gsc_5420_limits,
+ .num_limits = ARRAY_SIZE(gsc_5420_limits),
+};
+
+static struct gsc_driverdata gsc_exynos5433_drvdata = {
+ .clk_names = {"pclk", "aclk", "aclk_xiu", "aclk_gsclbend"},
+ .num_clocks = 4,
+ .limits = gsc_5433_limits,
+ .num_limits = ARRAY_SIZE(gsc_5433_limits),
+};
+
static const struct of_device_id exynos_drm_gsc_of_match[] = {
- { .compatible = "samsung,exynos5-gsc" },
- { },
+ {
+ .compatible = "samsung,exynos5-gsc",
+ .data = &gsc_exynos5250_drvdata,
+ }, {
+ .compatible = "samsung,exynos5250-gsc",
+ .data = &gsc_exynos5250_drvdata,
+ }, {
+ .compatible = "samsung,exynos5420-gsc",
+ .data = &gsc_exynos5420_drvdata,
+ }, {
+ .compatible = "samsung,exynos5433-gsc",
+ .data = &gsc_exynos5433_drvdata,
+ }, {
+ },
};
MODULE_DEVICE_TABLE(of, exynos_drm_gsc_of_match);
@@ -1800,4 +1396,3 @@ struct platform_driver gsc_driver = {
.of_match_table = of_match_ptr(exynos_drm_gsc_of_match),
},
};
-
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.h b/drivers/gpu/drm/exynos/exynos_drm_gsc.h
deleted file mode 100644
index 29ec1c5..0000000
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *
- * Authors:
- * Eunchul Kim <chulspro.kim@samsung.com>
- * Jinyoung Jeon <jy0.jeon@samsung.com>
- * Sangmin Lee <lsmin.lee@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#ifndef _EXYNOS_DRM_GSC_H_
-#define _EXYNOS_DRM_GSC_H_
-
-/*
- * TODO
- * FIMD output interface notifier callback.
- * Mixer output interface notifier callback.
- */
-
-#endif /* _EXYNOS_DRM_GSC_H_ */
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
new file mode 100644
index 0000000..26374e5
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Authors:
+ * Marek Szyprowski <m.szyprowski@samsung.com>
+ *
+ * Exynos DRM Image Post Processing (IPP) related functions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ */
+
+
+#include <drm/drmP.h>
+#include <drm/drm_mode.h>
+#include <uapi/drm/exynos_drm.h>
+
+#include "exynos_drm_drv.h"
+#include "exynos_drm_gem.h"
+#include "exynos_drm_ipp.h"
+
+static int num_ipp;
+static LIST_HEAD(ipp_list);
+
+/**
+ * exynos_drm_ipp_register - Register a new picture processor hardware module
+ * @dev: DRM device
+ * @ipp: ipp module to init
+ * @funcs: callbacks for the new ipp object
+ * @caps: bitmask of ipp capabilities (%DRM_EXYNOS_IPP_CAP_*)
+ * @formats: array of supported formats
+ * @num_formats: size of the supported formats array
+ * @name: name (for debugging purposes)
+ *
+ * Initializes a ipp module.
+ *
+ * Returns:
+ * Zero on success, error code on failure.
+ */
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+ const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+ const struct exynos_drm_ipp_formats *formats,
+ unsigned int num_formats, const char *name)
+{
+ WARN_ON(!ipp);
+ WARN_ON(!funcs);
+ WARN_ON(!formats);
+ WARN_ON(!num_formats);
+
+ spin_lock_init(&ipp->lock);
+ INIT_LIST_HEAD(&ipp->todo_list);
+ init_waitqueue_head(&ipp->done_wq);
+ ipp->dev = dev;
+ ipp->funcs = funcs;
+ ipp->capabilities = caps;
+ ipp->name = name;
+ ipp->formats = formats;
+ ipp->num_formats = num_formats;
+
+ /* ipp_list modification is serialized by component framework */
+ list_add_tail(&ipp->head, &ipp_list);
+ ipp->id = num_ipp++;
+
+ DRM_DEBUG_DRIVER("Registered ipp %d\n", ipp->id);
+
+ return 0;
+}
+
+/**
+ * exynos_drm_ipp_unregister - Unregister the picture processor module
+ * @dev: DRM device
+ * @ipp: ipp module
+ */
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+ struct exynos_drm_ipp *ipp)
+{
+ WARN_ON(ipp->task);
+ WARN_ON(!list_empty(&ipp->todo_list));
+ list_del(&ipp->head);
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_res_ioctl - enumerate all ipp modules
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a list of ipp ids.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_res *resp = data;
+ struct exynos_drm_ipp *ipp;
+ uint32_t __user *ipp_ptr = (uint32_t __user *)
+ (unsigned long)resp->ipp_id_ptr;
+ unsigned int count = num_ipp, copied = 0;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (count && resp->count_ipps >= count) {
+ list_for_each_entry(ipp, &ipp_list, head) {
+ if (put_user(ipp->id, ipp_ptr + copied))
+ return -EFAULT;
+ copied++;
+ }
+ }
+ resp->count_ipps = count;
+
+ return 0;
+}
+
+static inline struct exynos_drm_ipp *__ipp_get(uint32_t id)
+{
+ struct exynos_drm_ipp *ipp;
+
+ list_for_each_entry(ipp, &ipp_list, head)
+ if (ipp->id == id)
+ return ipp;
+ return NULL;
+}
+
+/**
+ * exynos_drm_ipp_ioctl_get_caps - get ipp module capabilities and formats
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module capabilities.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_caps *resp = data;
+ void __user *ptr = (void __user *)(unsigned long)resp->formats_ptr;
+ struct exynos_drm_ipp *ipp;
+ int i;
+
+ ipp = __ipp_get(resp->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ resp->ipp_id = ipp->id;
+ resp->capabilities = ipp->capabilities;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (resp->formats_count >= ipp->num_formats) {
+ for (i = 0; i < ipp->num_formats; i++) {
+ struct drm_exynos_ipp_format tmp = {
+ .fourcc = ipp->formats[i].fourcc,
+ .type = ipp->formats[i].type,
+ .modifier = ipp->formats[i].modifier,
+ };
+
+ if (copy_to_user(ptr, &tmp, sizeof(tmp)))
+ return -EFAULT;
+ ptr += sizeof(tmp);
+ }
+ }
+ resp->formats_count = ipp->num_formats;
+
+ return 0;
+}
+
+static inline const struct exynos_drm_ipp_formats *__ipp_format_get(
+ struct exynos_drm_ipp *ipp, uint32_t fourcc,
+ uint64_t mod, unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < ipp->num_formats; i++) {
+ if ((ipp->formats[i].type & type) &&
+ ipp->formats[i].fourcc == fourcc &&
+ ipp->formats[i].modifier == mod)
+ return &ipp->formats[i];
+ }
+ return NULL;
+}
+
+/**
+ * exynos_drm_ipp_get_limits_ioctl - get ipp module limits
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a structure describing ipp module limitations for provided
+ * picture format.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_limits *resp = data;
+ void __user *ptr = (void __user *)(unsigned long)resp->limits_ptr;
+ const struct exynos_drm_ipp_formats *format;
+ struct exynos_drm_ipp *ipp;
+
+ if (resp->type != DRM_EXYNOS_IPP_FORMAT_SOURCE &&
+ resp->type != DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+ return -EINVAL;
+
+ ipp = __ipp_get(resp->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ format = __ipp_format_get(ipp, resp->fourcc, resp->modifier,
+ resp->type);
+ if (!format)
+ return -EINVAL;
+
+ /*
+ * This ioctl is called twice, once to determine how much space is
+ * needed, and the 2nd time to fill it.
+ */
+ if (format->num_limits && resp->limits_count >= format->num_limits)
+ if (copy_to_user((void __user *)ptr, format->limits,
+ sizeof(*format->limits) * format->num_limits))
+ return -EFAULT;
+ resp->limits_count = format->num_limits;
+
+ return 0;
+}
+
+struct drm_pending_exynos_ipp_event {
+ struct drm_pending_event base;
+ struct drm_exynos_ipp_event event;
+};
+
+static inline struct exynos_drm_ipp_task *
+ exynos_drm_ipp_task_alloc(struct exynos_drm_ipp *ipp)
+{
+ struct exynos_drm_ipp_task *task;
+
+ task = kzalloc(sizeof(*task), GFP_KERNEL);
+ if (!task)
+ return NULL;
+
+ task->dev = ipp->dev;
+ task->ipp = ipp;
+
+ /* some defaults */
+ task->src.rect.w = task->dst.rect.w = UINT_MAX;
+ task->src.rect.h = task->dst.rect.h = UINT_MAX;
+ task->transform.rotation = DRM_MODE_ROTATE_0;
+
+ DRM_DEBUG_DRIVER("Allocated task %pK\n", task);
+
+ return task;
+}
+
+static const struct exynos_drm_param_map {
+ unsigned int id;
+ unsigned int size;
+ unsigned int offset;
+} exynos_drm_ipp_params_maps[] = {
+ {
+ DRM_EXYNOS_IPP_TASK_BUFFER | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+ sizeof(struct drm_exynos_ipp_task_buffer),
+ offsetof(struct exynos_drm_ipp_task, src.buf),
+ }, {
+ DRM_EXYNOS_IPP_TASK_BUFFER |
+ DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+ sizeof(struct drm_exynos_ipp_task_buffer),
+ offsetof(struct exynos_drm_ipp_task, dst.buf),
+ }, {
+ DRM_EXYNOS_IPP_TASK_RECTANGLE | DRM_EXYNOS_IPP_TASK_TYPE_SOURCE,
+ sizeof(struct drm_exynos_ipp_task_rect),
+ offsetof(struct exynos_drm_ipp_task, src.rect),
+ }, {
+ DRM_EXYNOS_IPP_TASK_RECTANGLE |
+ DRM_EXYNOS_IPP_TASK_TYPE_DESTINATION,
+ sizeof(struct drm_exynos_ipp_task_rect),
+ offsetof(struct exynos_drm_ipp_task, dst.rect),
+ }, {
+ DRM_EXYNOS_IPP_TASK_TRANSFORM,
+ sizeof(struct drm_exynos_ipp_task_transform),
+ offsetof(struct exynos_drm_ipp_task, transform),
+ }, {
+ DRM_EXYNOS_IPP_TASK_ALPHA,
+ sizeof(struct drm_exynos_ipp_task_alpha),
+ offsetof(struct exynos_drm_ipp_task, alpha),
+ },
+};
+
+static int exynos_drm_ipp_task_set(struct exynos_drm_ipp_task *task,
+ struct drm_exynos_ioctl_ipp_commit *arg)
+{
+ const struct exynos_drm_param_map *map = exynos_drm_ipp_params_maps;
+ void __user *params = (void __user *)(unsigned long)arg->params_ptr;
+ unsigned int size = arg->params_size;
+ uint32_t id;
+ int i;
+
+ while (size) {
+ if (get_user(id, (uint32_t __user *)params))
+ return -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(exynos_drm_ipp_params_maps); i++)
+ if (map[i].id == id)
+ break;
+ if (i == ARRAY_SIZE(exynos_drm_ipp_params_maps) ||
+ map[i].size > size)
+ return -EINVAL;
+
+ if (copy_from_user((void *)task + map[i].offset, params,
+ map[i].size))
+ return -EFAULT;
+
+ params += map[i].size;
+ size -= map[i].size;
+ }
+
+ DRM_DEBUG_DRIVER("Got task %pK configuration from userspace\n", task);
+ return 0;
+}
+
+static int exynos_drm_ipp_task_setup_buffer(struct exynos_drm_ipp_buffer *buf,
+ struct drm_file *filp)
+{
+ int ret = 0;
+ int i;
+
+ /* basic checks */
+ if (buf->buf.width == 0 || buf->buf.height == 0)
+ return -EINVAL;
+ buf->format = drm_format_info(buf->buf.fourcc);
+ for (i = 0; i < buf->format->num_planes; i++) {
+ unsigned int width = (i == 0) ? buf->buf.width :
+ DIV_ROUND_UP(buf->buf.width, buf->format->hsub);
+
+ if (buf->buf.pitch[i] == 0)
+ buf->buf.pitch[i] = width * buf->format->cpp[i];
+ if (buf->buf.pitch[i] < width * buf->format->cpp[i])
+ return -EINVAL;
+ if (!buf->buf.gem_id[i])
+ return -ENOENT;
+ }
+
+ /* pitch for additional planes must match */
+ if (buf->format->num_planes > 2 &&
+ buf->buf.pitch[1] != buf->buf.pitch[2])
+ return -EINVAL;
+
+ /* get GEM buffers and check their size */
+ for (i = 0; i < buf->format->num_planes; i++) {
+ unsigned int height = (i == 0) ? buf->buf.height :
+ DIV_ROUND_UP(buf->buf.height, buf->format->vsub);
+ unsigned long size = height * buf->buf.pitch[i];
+ struct drm_gem_object *obj = drm_gem_object_lookup(filp,
+ buf->buf.gem_id[i]);
+ if (!obj) {
+ ret = -ENOENT;
+ goto gem_free;
+ }
+ buf->exynos_gem[i] = to_exynos_gem(obj);
+
+ if (size + buf->buf.offset[i] > buf->exynos_gem[i]->size) {
+ i++;
+ ret = -EINVAL;
+ goto gem_free;
+ }
+ buf->dma_addr[i] = buf->exynos_gem[i]->dma_addr +
+ buf->buf.offset[i];
+ }
+
+ return 0;
+gem_free:
+ while (i--) {
+ drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+ buf->exynos_gem[i] = NULL;
+ }
+ return ret;
+}
+
+static void exynos_drm_ipp_task_release_buf(struct exynos_drm_ipp_buffer *buf)
+{
+ int i;
+
+ if (!buf->exynos_gem[0])
+ return;
+ for (i = 0; i < buf->format->num_planes; i++)
+ drm_gem_object_put_unlocked(&buf->exynos_gem[i]->base);
+}
+
+static void exynos_drm_ipp_task_free(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ DRM_DEBUG_DRIVER("Freeing task %pK\n", task);
+
+ exynos_drm_ipp_task_release_buf(&task->src);
+ exynos_drm_ipp_task_release_buf(&task->dst);
+ if (task->event)
+ drm_event_cancel_free(ipp->dev, &task->event->base);
+ kfree(task);
+}
+
+struct drm_ipp_limit {
+ struct drm_exynos_ipp_limit_val h;
+ struct drm_exynos_ipp_limit_val v;
+};
+
+enum drm_ipp_size_id {
+ IPP_LIMIT_BUFFER, IPP_LIMIT_AREA, IPP_LIMIT_ROTATED, IPP_LIMIT_MAX
+};
+
+static const enum drm_ipp_size_id limit_id_fallback[IPP_LIMIT_MAX][4] = {
+ [IPP_LIMIT_BUFFER] = { DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+ [IPP_LIMIT_AREA] = { DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+ [IPP_LIMIT_ROTATED] = { DRM_EXYNOS_IPP_LIMIT_SIZE_ROTATED,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_AREA,
+ DRM_EXYNOS_IPP_LIMIT_SIZE_BUFFER },
+};
+
+static inline void __limit_set_val(unsigned int *ptr, unsigned int val)
+{
+ if (!*ptr)
+ *ptr = val;
+}
+
+static void __get_size_limit(const struct drm_exynos_ipp_limit *limits,
+ unsigned int num_limits, enum drm_ipp_size_id id,
+ struct drm_ipp_limit *res)
+{
+ const struct drm_exynos_ipp_limit *l = limits;
+ int i = 0;
+
+ memset(res, 0, sizeof(*res));
+ for (i = 0; limit_id_fallback[id][i]; i++)
+ for (l = limits; l - limits < num_limits; l++) {
+ if (((l->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) !=
+ DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE) ||
+ ((l->type & DRM_EXYNOS_IPP_LIMIT_SIZE_MASK) !=
+ limit_id_fallback[id][i]))
+ continue;
+ __limit_set_val(&res->h.min, l->h.min);
+ __limit_set_val(&res->h.max, l->h.max);
+ __limit_set_val(&res->h.align, l->h.align);
+ __limit_set_val(&res->v.min, l->v.min);
+ __limit_set_val(&res->v.max, l->v.max);
+ __limit_set_val(&res->v.align, l->v.align);
+ }
+}
+
+static inline bool __align_check(unsigned int val, unsigned int align)
+{
+ if (align && (val & (align - 1))) {
+ DRM_DEBUG_DRIVER("Value %d exceeds HW limits (align %d)\n",
+ val, align);
+ return false;
+ }
+ return true;
+}
+
+static inline bool __size_limit_check(unsigned int val,
+ struct drm_exynos_ipp_limit_val *l)
+{
+ if ((l->min && val < l->min) || (l->max && val > l->max)) {
+ DRM_DEBUG_DRIVER("Value %d exceeds HW limits (min %d, max %d)\n",
+ val, l->min, l->max);
+ return false;
+ }
+ return __align_check(val, l->align);
+}
+
+static int exynos_drm_ipp_check_size_limits(struct exynos_drm_ipp_buffer *buf,
+ const struct drm_exynos_ipp_limit *limits, unsigned int num_limits,
+ bool rotate, bool swap)
+{
+ enum drm_ipp_size_id id = rotate ? IPP_LIMIT_ROTATED : IPP_LIMIT_AREA;
+ struct drm_ipp_limit l;
+ struct drm_exynos_ipp_limit_val *lh = &l.h, *lv = &l.v;
+
+ if (!limits)
+ return 0;
+
+ __get_size_limit(limits, num_limits, IPP_LIMIT_BUFFER, &l);
+ if (!__size_limit_check(buf->buf.width, &l.h) ||
+ !__size_limit_check(buf->buf.height, &l.v))
+ return -EINVAL;
+
+ if (swap) {
+ lv = &l.h;
+ lh = &l.v;
+ }
+ __get_size_limit(limits, num_limits, id, &l);
+ if (!__size_limit_check(buf->rect.w, lh) ||
+ !__align_check(buf->rect.x, lh->align) ||
+ !__size_limit_check(buf->rect.h, lv) ||
+ !__align_check(buf->rect.y, lv->align))
+ return -EINVAL;
+
+ return 0;
+}
+
+static inline bool __scale_limit_check(unsigned int src, unsigned int dst,
+ unsigned int min, unsigned int max)
+{
+ if ((max && (dst << 16) > src * max) ||
+ (min && (dst << 16) < src * min)) {
+ DRM_DEBUG_DRIVER("Scale from %d to %d exceeds HW limits (ratio min %d.%05d, max %d.%05d)\n",
+ src, dst,
+ min >> 16, 100000 * (min & 0xffff) / (1 << 16),
+ max >> 16, 100000 * (max & 0xffff) / (1 << 16));
+ return false;
+ }
+ return true;
+}
+
+static int exynos_drm_ipp_check_scale_limits(
+ struct drm_exynos_ipp_task_rect *src,
+ struct drm_exynos_ipp_task_rect *dst,
+ const struct drm_exynos_ipp_limit *limits,
+ unsigned int num_limits, bool swap)
+{
+ const struct drm_exynos_ipp_limit_val *lh, *lv;
+ int dw, dh;
+
+ for (; num_limits; limits++, num_limits--)
+ if ((limits->type & DRM_EXYNOS_IPP_LIMIT_TYPE_MASK) ==
+ DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE)
+ break;
+ if (!num_limits)
+ return 0;
+
+ lh = (!swap) ? &limits->h : &limits->v;
+ lv = (!swap) ? &limits->v : &limits->h;
+ dw = (!swap) ? dst->w : dst->h;
+ dh = (!swap) ? dst->h : dst->w;
+
+ if (!__scale_limit_check(src->w, dw, lh->min, lh->max) ||
+ !__scale_limit_check(src->h, dh, lv->min, lv->max))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int exynos_drm_ipp_task_check(struct exynos_drm_ipp_task *task)
+{
+ struct exynos_drm_ipp *ipp = task->ipp;
+ const struct exynos_drm_ipp_formats *src_fmt, *dst_fmt;
+ struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+ unsigned int rotation = task->transform.rotation;
+ int ret = 0;
+ bool swap = drm_rotation_90_or_270(rotation);
+ bool rotate = (rotation != DRM_MODE_ROTATE_0);
+ bool scale = false;
+
+ DRM_DEBUG_DRIVER("Checking task %pK\n", task);
+
+ if (src->rect.w == UINT_MAX)
+ src->rect.w = src->buf.width;
+ if (src->rect.h == UINT_MAX)
+ src->rect.h = src->buf.height;
+ if (dst->rect.w == UINT_MAX)
+ dst->rect.w = dst->buf.width;
+ if (dst->rect.h == UINT_MAX)
+ dst->rect.h = dst->buf.height;
+
+ if (src->rect.x + src->rect.w > (src->buf.width) ||
+ src->rect.y + src->rect.h > (src->buf.height) ||
+ dst->rect.x + dst->rect.w > (dst->buf.width) ||
+ dst->rect.y + dst->rect.h > (dst->buf.height)) {
+ DRM_DEBUG_DRIVER("Task %pK: defined area is outside provided buffers\n",
+ task);
+ return -EINVAL;
+ }
+
+ if ((!swap && (src->rect.w != dst->rect.w ||
+ src->rect.h != dst->rect.h)) ||
+ (swap && (src->rect.w != dst->rect.h ||
+ src->rect.h != dst->rect.w)))
+ scale = true;
+
+ if ((!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CROP) &&
+ (src->rect.x || src->rect.y || dst->rect.x || dst->rect.y)) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_ROTATE) && rotate) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_SCALE) && scale) ||
+ (!(ipp->capabilities & DRM_EXYNOS_IPP_CAP_CONVERT) &&
+ src->buf.fourcc != dst->buf.fourcc)) {
+ DRM_DEBUG_DRIVER("Task %pK: hw capabilities exceeded\n", task);
+ return -EINVAL;
+ }
+
+ src_fmt = __ipp_format_get(ipp, src->buf.fourcc, src->buf.modifier,
+ DRM_EXYNOS_IPP_FORMAT_SOURCE);
+ if (!src_fmt) {
+ DRM_DEBUG_DRIVER("Task %pK: src format not supported\n", task);
+ return -EINVAL;
+ }
+ ret = exynos_drm_ipp_check_size_limits(src, src_fmt->limits,
+ src_fmt->num_limits,
+ rotate, false);
+ if (ret)
+ return ret;
+ ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+ src_fmt->limits,
+ src_fmt->num_limits, swap);
+ if (ret)
+ return ret;
+
+ dst_fmt = __ipp_format_get(ipp, dst->buf.fourcc, dst->buf.modifier,
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION);
+ if (!dst_fmt) {
+ DRM_DEBUG_DRIVER("Task %pK: dst format not supported\n", task);
+ return -EINVAL;
+ }
+ ret = exynos_drm_ipp_check_size_limits(dst, dst_fmt->limits,
+ dst_fmt->num_limits,
+ false, swap);
+ if (ret)
+ return ret;
+ ret = exynos_drm_ipp_check_scale_limits(&src->rect, &dst->rect,
+ dst_fmt->limits,
+ dst_fmt->num_limits, swap);
+ if (ret)
+ return ret;
+
+ DRM_DEBUG_DRIVER("Task %pK: all checks done.\n", task);
+
+ return ret;
+}
+
+static int exynos_drm_ipp_task_setup_buffers(struct exynos_drm_ipp_task *task,
+ struct drm_file *filp)
+{
+ struct exynos_drm_ipp_buffer *src = &task->src, *dst = &task->dst;
+ int ret = 0;
+
+ DRM_DEBUG_DRIVER("Setting buffer for task %pK\n", task);
+
+ ret = exynos_drm_ipp_task_setup_buffer(src, filp);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Task %pK: src buffer setup failed\n", task);
+ return ret;
+ }
+ ret = exynos_drm_ipp_task_setup_buffer(dst, filp);
+ if (ret) {
+ DRM_DEBUG_DRIVER("Task %pK: dst buffer setup failed\n", task);
+ return ret;
+ }
+
+ DRM_DEBUG_DRIVER("Task %pK: buffers prepared.\n", task);
+
+ return ret;
+}
+
+
+static int exynos_drm_ipp_event_create(struct exynos_drm_ipp_task *task,
+ struct drm_file *file_priv, uint64_t user_data)
+{
+ struct drm_pending_exynos_ipp_event *e = NULL;
+ int ret;
+
+ e = kzalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+
+ e->event.base.type = DRM_EXYNOS_IPP_EVENT;
+ e->event.base.length = sizeof(e->event);
+ e->event.user_data = user_data;
+
+ ret = drm_event_reserve_init(task->dev, file_priv, &e->base,
+ &e->event.base);
+ if (ret)
+ goto free;
+
+ task->event = e;
+ return 0;
+free:
+ kfree(e);
+ return ret;
+}
+
+static void exynos_drm_ipp_event_send(struct exynos_drm_ipp_task *task)
+{
+ struct timespec64 now;
+
+ ktime_get_ts64(&now);
+ task->event->event.tv_sec = now.tv_sec;
+ task->event->event.tv_usec = now.tv_nsec / NSEC_PER_USEC;
+ task->event->event.sequence = atomic_inc_return(&task->ipp->sequence);
+
+ drm_send_event(task->dev, &task->event->base);
+}
+
+static int exynos_drm_ipp_task_cleanup(struct exynos_drm_ipp_task *task)
+{
+ int ret = task->ret;
+
+ if (ret == 0 && task->event) {
+ exynos_drm_ipp_event_send(task);
+ /* ensure event won't be canceled on task free */
+ task->event = NULL;
+ }
+
+ exynos_drm_ipp_task_free(task->ipp, task);
+ return ret;
+}
+
+static void exynos_drm_ipp_cleanup_work(struct work_struct *work)
+{
+ struct exynos_drm_ipp_task *task = container_of(work,
+ struct exynos_drm_ipp_task, cleanup_work);
+
+ exynos_drm_ipp_task_cleanup(task);
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp);
+
+/**
+ * exynos_drm_ipp_task_done - finish given task and set return code
+ * @task: ipp task to finish
+ * @ret: error code or 0 if operation has been performed successfully
+ */
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret)
+{
+ struct exynos_drm_ipp *ipp = task->ipp;
+ unsigned long flags;
+
+ DRM_DEBUG_DRIVER("ipp: %d, task %pK done: %d\n", ipp->id, task, ret);
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ if (ipp->task == task)
+ ipp->task = NULL;
+ task->flags |= DRM_EXYNOS_IPP_TASK_DONE;
+ task->ret = ret;
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ exynos_drm_ipp_next_task(ipp);
+ wake_up(&ipp->done_wq);
+
+ if (task->flags & DRM_EXYNOS_IPP_TASK_ASYNC) {
+ INIT_WORK(&task->cleanup_work, exynos_drm_ipp_cleanup_work);
+ schedule_work(&task->cleanup_work);
+ }
+}
+
+static void exynos_drm_ipp_next_task(struct exynos_drm_ipp *ipp)
+{
+ struct exynos_drm_ipp_task *task;
+ unsigned long flags;
+ int ret;
+
+ DRM_DEBUG_DRIVER("ipp: %d, try to run new task\n", ipp->id);
+
+ spin_lock_irqsave(&ipp->lock, flags);
+
+ if (ipp->task || list_empty(&ipp->todo_list)) {
+ spin_unlock_irqrestore(&ipp->lock, flags);
+ return;
+ }
+
+ task = list_first_entry(&ipp->todo_list, struct exynos_drm_ipp_task,
+ head);
+ list_del_init(&task->head);
+ ipp->task = task;
+
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ DRM_DEBUG_DRIVER("ipp: %d, selected task %pK to run\n", ipp->id, task);
+
+ ret = ipp->funcs->commit(ipp, task);
+ if (ret)
+ exynos_drm_ipp_task_done(task, ret);
+}
+
+static void exynos_drm_ipp_schedule_task(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ list_add(&task->head, &ipp->todo_list);
+ spin_unlock_irqrestore(&ipp->lock, flags);
+
+ exynos_drm_ipp_next_task(ipp);
+}
+
+static void exynos_drm_ipp_task_abort(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ipp->lock, flags);
+ if (task->flags & DRM_EXYNOS_IPP_TASK_DONE) {
+ /* already completed task */
+ exynos_drm_ipp_task_cleanup(task);
+ } else if (ipp->task != task) {
+ /* task has not been scheduled for execution yet */
+ list_del_init(&task->head);
+ exynos_drm_ipp_task_cleanup(task);
+ } else {
+ /*
+ * currently processed task, call abort() and perform
+ * cleanup with async worker
+ */
+ task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+ spin_unlock_irqrestore(&ipp->lock, flags);
+ if (ipp->funcs->abort)
+ ipp->funcs->abort(ipp, task);
+ return;
+ }
+ spin_unlock_irqrestore(&ipp->lock, flags);
+}
+
+/**
+ * exynos_drm_ipp_commit_ioctl - perform image processing operation
+ * @dev: DRM device
+ * @data: ioctl data
+ * @file_priv: DRM file info
+ *
+ * Construct a ipp task from the set of properties provided from the user
+ * and try to schedule it to framebuffer processor hardware.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_commit *arg = data;
+ struct exynos_drm_ipp *ipp;
+ struct exynos_drm_ipp_task *task;
+ int ret = 0;
+
+ if ((arg->flags & ~DRM_EXYNOS_IPP_FLAGS) || arg->reserved)
+ return -EINVAL;
+
+ /* can't test and expect an event at the same time */
+ if ((arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY) &&
+ (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT))
+ return -EINVAL;
+
+ ipp = __ipp_get(arg->ipp_id);
+ if (!ipp)
+ return -ENOENT;
+
+ task = exynos_drm_ipp_task_alloc(ipp);
+ if (!task)
+ return -ENOMEM;
+
+ ret = exynos_drm_ipp_task_set(task, arg);
+ if (ret)
+ goto free;
+
+ ret = exynos_drm_ipp_task_check(task);
+ if (ret)
+ goto free;
+
+ ret = exynos_drm_ipp_task_setup_buffers(task, file_priv);
+ if (ret || arg->flags & DRM_EXYNOS_IPP_FLAG_TEST_ONLY)
+ goto free;
+
+ if (arg->flags & DRM_EXYNOS_IPP_FLAG_EVENT) {
+ ret = exynos_drm_ipp_event_create(task, file_priv,
+ arg->user_data);
+ if (ret)
+ goto free;
+ }
+
+ /*
+ * Queue task for processing on the hardware. task object will be
+ * then freed after exynos_drm_ipp_task_done()
+ */
+ if (arg->flags & DRM_EXYNOS_IPP_FLAG_NONBLOCK) {
+ DRM_DEBUG_DRIVER("ipp: %d, nonblocking processing task %pK\n",
+ ipp->id, task);
+
+ task->flags |= DRM_EXYNOS_IPP_TASK_ASYNC;
+ exynos_drm_ipp_schedule_task(task->ipp, task);
+ ret = 0;
+ } else {
+ DRM_DEBUG_DRIVER("ipp: %d, processing task %pK\n", ipp->id,
+ task);
+ exynos_drm_ipp_schedule_task(ipp, task);
+ ret = wait_event_interruptible(ipp->done_wq,
+ task->flags & DRM_EXYNOS_IPP_TASK_DONE);
+ if (ret)
+ exynos_drm_ipp_task_abort(ipp, task);
+ else
+ ret = exynos_drm_ipp_task_cleanup(task);
+ }
+ return ret;
+free:
+ exynos_drm_ipp_task_free(ipp, task);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.h b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
new file mode 100644
index 0000000..0b27d4a
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#ifndef _EXYNOS_DRM_IPP_H_
+#define _EXYNOS_DRM_IPP_H_
+
+#include <drm/drmP.h>
+
+struct exynos_drm_ipp;
+struct exynos_drm_ipp_task;
+
+/**
+ * struct exynos_drm_ipp_funcs - exynos_drm_ipp control functions
+ */
+struct exynos_drm_ipp_funcs {
+ /**
+ * @commit:
+ *
+ * This is the main entry point to start framebuffer processing
+ * in the hardware. The exynos_drm_ipp_task has been already validated.
+ * This function must not wait until the device finishes processing.
+ * When the driver finishes processing, it has to call
+ * exynos_exynos_drm_ipp_task_done() function.
+ *
+ * RETURNS:
+ *
+ * 0 on success or negative error codes in case of failure.
+ */
+ int (*commit)(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task);
+
+ /**
+ * @abort:
+ *
+ * Informs the driver that it has to abort the currently running
+ * task as soon as possible (i.e. as soon as it can stop the device
+ * safely), even if the task would not have been finished by then.
+ * After the driver performs the necessary steps, it has to call
+ * exynos_drm_ipp_task_done() (as if the task ended normally).
+ * This function does not have to (and will usually not) wait
+ * until the device enters a state when it can be stopped.
+ */
+ void (*abort)(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task);
+};
+
+/**
+ * struct exynos_drm_ipp - central picture processor module structure
+ */
+struct exynos_drm_ipp {
+ struct drm_device *dev;
+ struct list_head head;
+ unsigned int id;
+
+ const char *name;
+ const struct exynos_drm_ipp_funcs *funcs;
+ unsigned int capabilities;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+ atomic_t sequence;
+
+ spinlock_t lock;
+ struct exynos_drm_ipp_task *task;
+ struct list_head todo_list;
+ wait_queue_head_t done_wq;
+};
+
+struct exynos_drm_ipp_buffer {
+ struct drm_exynos_ipp_task_buffer buf;
+ struct drm_exynos_ipp_task_rect rect;
+
+ struct exynos_drm_gem *exynos_gem[MAX_FB_BUFFER];
+ const struct drm_format_info *format;
+ dma_addr_t dma_addr[MAX_FB_BUFFER];
+};
+
+/**
+ * struct exynos_drm_ipp_task - a structure describing transformation that
+ * has to be performed by the picture processor hardware module
+ */
+struct exynos_drm_ipp_task {
+ struct drm_device *dev;
+ struct exynos_drm_ipp *ipp;
+ struct list_head head;
+
+ struct exynos_drm_ipp_buffer src;
+ struct exynos_drm_ipp_buffer dst;
+
+ struct drm_exynos_ipp_task_transform transform;
+ struct drm_exynos_ipp_task_alpha alpha;
+
+ struct work_struct cleanup_work;
+ unsigned int flags;
+ int ret;
+
+ struct drm_pending_exynos_ipp_event *event;
+};
+
+#define DRM_EXYNOS_IPP_TASK_DONE (1 << 0)
+#define DRM_EXYNOS_IPP_TASK_ASYNC (1 << 1)
+
+struct exynos_drm_ipp_formats {
+ uint32_t fourcc;
+ uint32_t type;
+ uint64_t modifier;
+ const struct drm_exynos_ipp_limit *limits;
+ unsigned int num_limits;
+};
+
+/* helper macros to set exynos_drm_ipp_formats structure and limits*/
+#define IPP_SRCDST_MFORMAT(f, m, l) \
+ .fourcc = DRM_FORMAT_##f, .modifier = m, .limits = l, \
+ .num_limits = ARRAY_SIZE(l), \
+ .type = (DRM_EXYNOS_IPP_FORMAT_SOURCE | \
+ DRM_EXYNOS_IPP_FORMAT_DESTINATION)
+
+#define IPP_SRCDST_FORMAT(f, l) IPP_SRCDST_MFORMAT(f, 0, l)
+
+#define IPP_SIZE_LIMIT(l, val...) \
+ .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SIZE | \
+ DRM_EXYNOS_IPP_LIMIT_SIZE_##l), val
+
+#define IPP_SCALE_LIMIT(val...) \
+ .type = (DRM_EXYNOS_IPP_LIMIT_TYPE_SCALE), val
+
+int exynos_drm_ipp_register(struct drm_device *dev, struct exynos_drm_ipp *ipp,
+ const struct exynos_drm_ipp_funcs *funcs, unsigned int caps,
+ const struct exynos_drm_ipp_formats *formats,
+ unsigned int num_formats, const char *name);
+void exynos_drm_ipp_unregister(struct drm_device *dev,
+ struct exynos_drm_ipp *ipp);
+
+void exynos_drm_ipp_task_done(struct exynos_drm_ipp_task *task, int ret);
+
+#ifdef CONFIG_DRM_EXYNOS_IPP
+int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv);
+#else
+static inline int exynos_drm_ipp_get_res_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ struct drm_exynos_ioctl_ipp_get_res *resp = data;
+
+ resp->count_ipps = 0;
+ return 0;
+}
+static inline int exynos_drm_ipp_get_caps_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+static inline int exynos_drm_ipp_get_limits_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+static inline int exynos_drm_ipp_commit_ioctl(struct drm_device *dev,
+ void *data, struct drm_file *file_priv)
+{
+ return -ENODEV;
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/exynos/exynos_drm_rotator.c b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
index 79282a8..1a76dd3 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_rotator.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_rotator.c
@@ -10,6 +10,7 @@
*/
#include <linux/kernel.h>
+#include <linux/component.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
@@ -22,29 +23,18 @@
#include <drm/exynos_drm.h>
#include "regs-rotator.h"
#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
#include "exynos_drm_ipp.h"
/*
* Rotator supports image crop/rotator and input/output DMA operations.
* input DMA reads image data from the memory.
* output DMA writes image data to memory.
- *
- * M2M operation : supports crop/scale/rotation/csc so on.
- * Memory ----> Rotator H/W ----> Memory.
*/
-/*
- * TODO
- * 1. check suspend/resume api if needed.
- * 2. need to check use case platform_device_id.
- * 3. check src/dst size with, height.
- * 4. need to add supported list in prop_list.
- */
+#define ROTATOR_AUTOSUSPEND_DELAY 2000
-#define get_rot_context(dev) platform_get_drvdata(to_platform_device(dev))
-#define get_ctx_from_ippdrv(ippdrv) container_of(ippdrv,\
- struct rot_context, ippdrv);
-#define rot_read(offset) readl(rot->regs + (offset))
+#define rot_read(offset) readl(rot->regs + (offset))
#define rot_write(cfg, offset) writel(cfg, rot->regs + (offset))
enum rot_irq_status {
@@ -52,54 +42,28 @@ enum rot_irq_status {
ROT_IRQ_STATUS_ILLEGAL = 9,
};
-/*
- * A structure of limitation.
- *
- * @min_w: minimum width.
- * @min_h: minimum height.
- * @max_w: maximum width.
- * @max_h: maximum height.
- * @align: align size.
- */
-struct rot_limit {
- u32 min_w;
- u32 min_h;
- u32 max_w;
- u32 max_h;
- u32 align;
-};
-
-/*
- * A structure of limitation table.
- *
- * @ycbcr420_2p: case of YUV.
- * @rgb888: case of RGB.
- */
-struct rot_limit_table {
- struct rot_limit ycbcr420_2p;
- struct rot_limit rgb888;
+struct rot_variant {
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
};
/*
* A structure of rotator context.
* @ippdrv: prepare initialization using ippdrv.
- * @regs_res: register resources.
* @regs: memory mapped io registers.
* @clock: rotator gate clock.
* @limit_tbl: limitation of rotator.
* @irq: irq number.
- * @cur_buf_id: current operation buffer id.
- * @suspended: suspended state.
*/
struct rot_context {
- struct exynos_drm_ippdrv ippdrv;
- struct resource *regs_res;
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
void __iomem *regs;
struct clk *clock;
- struct rot_limit_table *limit_tbl;
- int irq;
- int cur_buf_id[EXYNOS_DRM_OPS_MAX];
- bool suspended;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+ struct exynos_drm_ipp_task *task;
};
static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
@@ -114,15 +78,6 @@ static void rotator_reg_set_irq(struct rot_context *rot, bool enable)
rot_write(val, ROT_CONFIG);
}
-static u32 rotator_reg_get_fmt(struct rot_context *rot)
-{
- u32 val = rot_read(ROT_CONTROL);
-
- val &= ROT_CONTROL_FMT_MASK;
-
- return val;
-}
-
static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
{
u32 val = rot_read(ROT_STATUS);
@@ -138,9 +93,6 @@ static enum rot_irq_status rotator_reg_get_irq_status(struct rot_context *rot)
static irqreturn_t rotator_irq_handler(int irq, void *arg)
{
struct rot_context *rot = arg;
- struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
- struct drm_exynos_ipp_cmd_node *c_node = ippdrv->c_node;
- struct drm_exynos_ipp_event_work *event_work = c_node->event_work;
enum rot_irq_status irq_status;
u32 val;
@@ -152,56 +104,21 @@ static irqreturn_t rotator_irq_handler(int irq, void *arg)
val |= ROT_STATUS_IRQ_PENDING((u32)irq_status);
rot_write(val, ROT_STATUS);
- if (irq_status == ROT_IRQ_STATUS_COMPLETE) {
- event_work->ippdrv = ippdrv;
- event_work->buf_id[EXYNOS_DRM_OPS_DST] =
- rot->cur_buf_id[EXYNOS_DRM_OPS_DST];
- queue_work(ippdrv->event_workq, &event_work->work);
- } else {
- DRM_ERROR("the SFR is set illegally\n");
+ if (rot->task) {
+ struct exynos_drm_ipp_task *task = rot->task;
+
+ rot->task = NULL;
+ pm_runtime_mark_last_busy(rot->dev);
+ pm_runtime_put_autosuspend(rot->dev);
+ exynos_drm_ipp_task_done(task,
+ irq_status == ROT_IRQ_STATUS_COMPLETE ? 0 : -EINVAL);
}
return IRQ_HANDLED;
}
-static void rotator_align_size(struct rot_context *rot, u32 fmt, u32 *hsize,
- u32 *vsize)
+static void rotator_src_set_fmt(struct rot_context *rot, u32 fmt)
{
- struct rot_limit_table *limit_tbl = rot->limit_tbl;
- struct rot_limit *limit;
- u32 mask, val;
-
- /* Get size limit */
- if (fmt == ROT_CONTROL_FMT_RGB888)
- limit = &limit_tbl->rgb888;
- else
- limit = &limit_tbl->ycbcr420_2p;
-
- /* Get mask for rounding to nearest aligned val */
- mask = ~((1 << limit->align) - 1);
-
- /* Set aligned width */
- val = ROT_ALIGN(*hsize, limit->align, mask);
- if (val < limit->min_w)
- *hsize = ROT_MIN(limit->min_w, mask);
- else if (val > limit->max_w)
- *hsize = ROT_MAX(limit->max_w, mask);
- else
- *hsize = val;
-
- /* Set aligned height */
- val = ROT_ALIGN(*vsize, limit->align, mask);
- if (val < limit->min_h)
- *vsize = ROT_MIN(limit->min_h, mask);
- else if (val > limit->max_h)
- *vsize = ROT_MAX(limit->max_h, mask);
- else
- *vsize = val;
-}
-
-static int rotator_src_set_fmt(struct device *dev, u32 fmt)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
u32 val;
val = rot_read(ROT_CONTROL);
@@ -214,515 +131,176 @@ static int rotator_src_set_fmt(struct device *dev, u32 fmt)
case DRM_FORMAT_XRGB8888:
val |= ROT_CONTROL_FMT_RGB888;
break;
- default:
- DRM_ERROR("invalid image format\n");
- return -EINVAL;
}
rot_write(val, ROT_CONTROL);
-
- return 0;
}
-static inline bool rotator_check_reg_fmt(u32 fmt)
+static void rotator_src_set_buf(struct rot_context *rot,
+ struct exynos_drm_ipp_buffer *buf)
{
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) ||
- (fmt == ROT_CONTROL_FMT_RGB888))
- return true;
-
- return false;
-}
-
-static int rotator_src_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos,
- struct drm_exynos_sz *sz)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
- u32 fmt, hsize, vsize;
u32 val;
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Align buffer size */
- hsize = sz->hsize;
- vsize = sz->vsize;
- rotator_align_size(rot, fmt, &hsize, &vsize);
-
/* Set buffer size configuration */
- val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+ val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+ ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
rot_write(val, ROT_SRC_BUF_SIZE);
/* Set crop image position configuration */
- val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+ val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
rot_write(val, ROT_SRC_CROP_POS);
- val = ROT_SRC_CROP_SIZE_H(pos->h) | ROT_SRC_CROP_SIZE_W(pos->w);
+ val = ROT_SRC_CROP_SIZE_H(buf->rect.h) |
+ ROT_SRC_CROP_SIZE_W(buf->rect.w);
rot_write(val, ROT_SRC_CROP_SIZE);
- return 0;
+ /* Set buffer DMA address */
+ rot_write(buf->dma_addr[0], ROT_SRC_BUF_ADDR(0));
+ rot_write(buf->dma_addr[1], ROT_SRC_BUF_ADDR(1));
}
-static int rotator_src_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info,
- u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_dst_set_transf(struct rot_context *rot,
+ unsigned int rotation)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
- u32 val, fmt, hsize, vsize;
- int i;
-
- /* Set current buf_id */
- rot->cur_buf_id[EXYNOS_DRM_OPS_SRC] = buf_id;
-
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- /* Set address configuration */
- for_each_ipp_planar(i)
- addr[i] = buf_info->base[i];
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Re-set cb planar for NV12 format */
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
- !addr[EXYNOS_DRM_PLANAR_CB]) {
-
- val = rot_read(ROT_SRC_BUF_SIZE);
- hsize = ROT_GET_BUF_SIZE_W(val);
- vsize = ROT_GET_BUF_SIZE_H(val);
-
- /* Set cb planar */
- addr[EXYNOS_DRM_PLANAR_CB] =
- addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
- }
-
- for_each_ipp_planar(i)
- rot_write(addr[i], ROT_SRC_BUF_ADDR(i));
- break;
- case IPP_BUF_DEQUEUE:
- for_each_ipp_planar(i)
- rot_write(0x0, ROT_SRC_BUF_ADDR(i));
- break;
- default:
- /* Nothing to do */
- break;
- }
-
- return 0;
-}
-
-static int rotator_dst_set_transf(struct device *dev,
- enum drm_exynos_degree degree,
- enum drm_exynos_flip flip, bool *swap)
-{
- struct rot_context *rot = dev_get_drvdata(dev);
u32 val;
/* Set transform configuration */
val = rot_read(ROT_CONTROL);
val &= ~ROT_CONTROL_FLIP_MASK;
- switch (flip) {
- case EXYNOS_DRM_FLIP_VERTICAL:
- val |= ROT_CONTROL_FLIP_VERTICAL;
- break;
- case EXYNOS_DRM_FLIP_HORIZONTAL:
+ if (rotation & DRM_MODE_REFLECT_X)
val |= ROT_CONTROL_FLIP_HORIZONTAL;
- break;
- default:
- /* Flip None */
- break;
- }
+ if (rotation & DRM_MODE_REFLECT_Y)
+ val |= ROT_CONTROL_FLIP_VERTICAL;
val &= ~ROT_CONTROL_ROT_MASK;
- switch (degree) {
- case EXYNOS_DRM_DEGREE_90:
+ if (rotation & DRM_MODE_ROTATE_90)
val |= ROT_CONTROL_ROT_90;
- break;
- case EXYNOS_DRM_DEGREE_180:
+ else if (rotation & DRM_MODE_ROTATE_180)
val |= ROT_CONTROL_ROT_180;
- break;
- case EXYNOS_DRM_DEGREE_270:
+ else if (rotation & DRM_MODE_ROTATE_270)
val |= ROT_CONTROL_ROT_270;
- break;
- default:
- /* Rotation 0 Degree */
- break;
- }
rot_write(val, ROT_CONTROL);
-
- /* Check degree for setting buffer size swap */
- if ((degree == EXYNOS_DRM_DEGREE_90) ||
- (degree == EXYNOS_DRM_DEGREE_270))
- *swap = true;
- else
- *swap = false;
-
- return 0;
}
-static int rotator_dst_set_size(struct device *dev, int swap,
- struct drm_exynos_pos *pos,
- struct drm_exynos_sz *sz)
+static void rotator_dst_set_buf(struct rot_context *rot,
+ struct exynos_drm_ipp_buffer *buf)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- u32 val, fmt, hsize, vsize;
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Align buffer size */
- hsize = sz->hsize;
- vsize = sz->vsize;
- rotator_align_size(rot, fmt, &hsize, &vsize);
+ u32 val;
/* Set buffer size configuration */
- val = ROT_SET_BUF_SIZE_H(vsize) | ROT_SET_BUF_SIZE_W(hsize);
+ val = ROT_SET_BUF_SIZE_H(buf->buf.height) |
+ ROT_SET_BUF_SIZE_W(buf->buf.pitch[0] / buf->format->cpp[0]);
rot_write(val, ROT_DST_BUF_SIZE);
/* Set crop image position configuration */
- val = ROT_CROP_POS_Y(pos->y) | ROT_CROP_POS_X(pos->x);
+ val = ROT_CROP_POS_Y(buf->rect.y) | ROT_CROP_POS_X(buf->rect.x);
rot_write(val, ROT_DST_CROP_POS);
- return 0;
+ /* Set buffer DMA address */
+ rot_write(buf->dma_addr[0], ROT_DST_BUF_ADDR(0));
+ rot_write(buf->dma_addr[1], ROT_DST_BUF_ADDR(1));
}
-static int rotator_dst_set_addr(struct device *dev,
- struct drm_exynos_ipp_buf_info *buf_info,
- u32 buf_id, enum drm_exynos_ipp_buf_type buf_type)
+static void rotator_start(struct rot_context *rot)
{
- struct rot_context *rot = dev_get_drvdata(dev);
- dma_addr_t addr[EXYNOS_DRM_PLANAR_MAX];
- u32 val, fmt, hsize, vsize;
- int i;
-
- /* Set current buf_id */
- rot->cur_buf_id[EXYNOS_DRM_OPS_DST] = buf_id;
-
- switch (buf_type) {
- case IPP_BUF_ENQUEUE:
- /* Set address configuration */
- for_each_ipp_planar(i)
- addr[i] = buf_info->base[i];
-
- /* Get format */
- fmt = rotator_reg_get_fmt(rot);
- if (!rotator_check_reg_fmt(fmt)) {
- DRM_ERROR("invalid format.\n");
- return -EINVAL;
- }
-
- /* Re-set cb planar for NV12 format */
- if ((fmt == ROT_CONTROL_FMT_YCBCR420_2P) &&
- !addr[EXYNOS_DRM_PLANAR_CB]) {
- /* Get buf size */
- val = rot_read(ROT_DST_BUF_SIZE);
-
- hsize = ROT_GET_BUF_SIZE_W(val);
- vsize = ROT_GET_BUF_SIZE_H(val);
-
- /* Set cb planar */
- addr[EXYNOS_DRM_PLANAR_CB] =
- addr[EXYNOS_DRM_PLANAR_Y] + hsize * vsize;
- }
-
- for_each_ipp_planar(i)
- rot_write(addr[i], ROT_DST_BUF_ADDR(i));
- break;
- case IPP_BUF_DEQUEUE:
- for_each_ipp_planar(i)
- rot_write(0x0, ROT_DST_BUF_ADDR(i));
- break;
- default:
- /* Nothing to do */
- break;
- }
+ u32 val;
- return 0;
+ /* Set interrupt enable */
+ rotator_reg_set_irq(rot, true);
+
+ val = rot_read(ROT_CONTROL);
+ val |= ROT_CONTROL_START;
+ rot_write(val, ROT_CONTROL);
}
-static struct exynos_drm_ipp_ops rot_src_ops = {
- .set_fmt = rotator_src_set_fmt,
- .set_size = rotator_src_set_size,
- .set_addr = rotator_src_set_addr,
-};
+static int rotator_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct rot_context *rot =
+ container_of(ipp, struct rot_context, ipp);
-static struct exynos_drm_ipp_ops rot_dst_ops = {
- .set_transf = rotator_dst_set_transf,
- .set_size = rotator_dst_set_size,
- .set_addr = rotator_dst_set_addr,
-};
+ pm_runtime_get_sync(rot->dev);
+ rot->task = task;
-static int rotator_init_prop_list(struct exynos_drm_ippdrv *ippdrv)
-{
- struct drm_exynos_ipp_prop_list *prop_list = &ippdrv->prop_list;
-
- prop_list->version = 1;
- prop_list->flip = (1 << EXYNOS_DRM_FLIP_VERTICAL) |
- (1 << EXYNOS_DRM_FLIP_HORIZONTAL);
- prop_list->degree = (1 << EXYNOS_DRM_DEGREE_0) |
- (1 << EXYNOS_DRM_DEGREE_90) |
- (1 << EXYNOS_DRM_DEGREE_180) |
- (1 << EXYNOS_DRM_DEGREE_270);
- prop_list->csc = 0;
- prop_list->crop = 0;
- prop_list->scale = 0;
+ rotator_src_set_fmt(rot, task->src.buf.fourcc);
+ rotator_src_set_buf(rot, &task->src);
+ rotator_dst_set_transf(rot, task->transform.rotation);
+ rotator_dst_set_buf(rot, &task->dst);
+ rotator_start(rot);
return 0;
}
-static inline bool rotator_check_drm_fmt(u32 fmt)
-{
- switch (fmt) {
- case DRM_FORMAT_XRGB8888:
- case DRM_FORMAT_NV12:
- return true;
- default:
- DRM_DEBUG_KMS("not support format\n");
- return false;
- }
-}
-
-static inline bool rotator_check_drm_flip(enum drm_exynos_flip flip)
-{
- switch (flip) {
- case EXYNOS_DRM_FLIP_NONE:
- case EXYNOS_DRM_FLIP_VERTICAL:
- case EXYNOS_DRM_FLIP_HORIZONTAL:
- case EXYNOS_DRM_FLIP_BOTH:
- return true;
- default:
- DRM_DEBUG_KMS("invalid flip\n");
- return false;
- }
-}
+static const struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = rotator_commit,
+};
-static int rotator_ippdrv_check_property(struct device *dev,
- struct drm_exynos_ipp_property *property)
+static int rotator_bind(struct device *dev, struct device *master, void *data)
{
- struct drm_exynos_ipp_config *src_config =
- &property->config[EXYNOS_DRM_OPS_SRC];
- struct drm_exynos_ipp_config *dst_config =
- &property->config[EXYNOS_DRM_OPS_DST];
- struct drm_exynos_pos *src_pos = &src_config->pos;
- struct drm_exynos_pos *dst_pos = &dst_config->pos;
- struct drm_exynos_sz *src_sz = &src_config->sz;
- struct drm_exynos_sz *dst_sz = &dst_config->sz;
- bool swap = false;
-
- /* Check format configuration */
- if (src_config->fmt != dst_config->fmt) {
- DRM_DEBUG_KMS("not support csc feature\n");
- return -EINVAL;
- }
-
- if (!rotator_check_drm_fmt(dst_config->fmt)) {
- DRM_DEBUG_KMS("invalid format\n");
- return -EINVAL;
- }
-
- /* Check transform configuration */
- if (src_config->degree != EXYNOS_DRM_DEGREE_0) {
- DRM_DEBUG_KMS("not support source-side rotation\n");
- return -EINVAL;
- }
-
- switch (dst_config->degree) {
- case EXYNOS_DRM_DEGREE_90:
- case EXYNOS_DRM_DEGREE_270:
- swap = true;
- case EXYNOS_DRM_DEGREE_0:
- case EXYNOS_DRM_DEGREE_180:
- /* No problem */
- break;
- default:
- DRM_DEBUG_KMS("invalid degree\n");
- return -EINVAL;
- }
-
- if (src_config->flip != EXYNOS_DRM_FLIP_NONE) {
- DRM_DEBUG_KMS("not support source-side flip\n");
- return -EINVAL;
- }
+ struct rot_context *rot = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &rot->ipp;
- if (!rotator_check_drm_flip(dst_config->flip)) {
- DRM_DEBUG_KMS("invalid flip\n");
- return -EINVAL;
- }
+ rot->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
- /* Check size configuration */
- if ((src_pos->x + src_pos->w > src_sz->hsize) ||
- (src_pos->y + src_pos->h > src_sz->vsize)) {
- DRM_DEBUG_KMS("out of source buffer bound\n");
- return -EINVAL;
- }
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE,
+ rot->formats, rot->num_formats, "rotator");
- if (swap) {
- if ((dst_pos->x + dst_pos->h > dst_sz->vsize) ||
- (dst_pos->y + dst_pos->w > dst_sz->hsize)) {
- DRM_DEBUG_KMS("out of destination buffer bound\n");
- return -EINVAL;
- }
-
- if ((src_pos->w != dst_pos->h) || (src_pos->h != dst_pos->w)) {
- DRM_DEBUG_KMS("not support scale feature\n");
- return -EINVAL;
- }
- } else {
- if ((dst_pos->x + dst_pos->w > dst_sz->hsize) ||
- (dst_pos->y + dst_pos->h > dst_sz->vsize)) {
- DRM_DEBUG_KMS("out of destination buffer bound\n");
- return -EINVAL;
- }
-
- if ((src_pos->w != dst_pos->w) || (src_pos->h != dst_pos->h)) {
- DRM_DEBUG_KMS("not support scale feature\n");
- return -EINVAL;
- }
- }
+ dev_info(dev, "The exynos rotator has been probed successfully\n");
return 0;
}
-static int rotator_ippdrv_start(struct device *dev, enum drm_exynos_ipp_cmd cmd)
+static void rotator_unbind(struct device *dev, struct device *master,
+ void *data)
{
struct rot_context *rot = dev_get_drvdata(dev);
- u32 val;
-
- if (rot->suspended) {
- DRM_ERROR("suspended state\n");
- return -EPERM;
- }
-
- if (cmd != IPP_CMD_M2M) {
- DRM_ERROR("not support cmd: %d\n", cmd);
- return -EINVAL;
- }
-
- /* Set interrupt enable */
- rotator_reg_set_irq(rot, true);
-
- val = rot_read(ROT_CONTROL);
- val |= ROT_CONTROL_START;
-
- rot_write(val, ROT_CONTROL);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &rot->ipp;
- return 0;
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(rot->drm_dev, rot->dev);
}
-static struct rot_limit_table rot_limit_tbl_4210 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_64K,
- .max_h = SZ_64K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_16K,
- .max_h = SZ_16K,
- .align = 2,
- },
-};
-
-static struct rot_limit_table rot_limit_tbl_4x12 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_32K,
- .max_h = SZ_32K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_8K,
- .max_h = SZ_8K,
- .align = 2,
- },
+static const struct component_ops rotator_component_ops = {
+ .bind = rotator_bind,
+ .unbind = rotator_unbind,
};
-static struct rot_limit_table rot_limit_tbl_5250 = {
- .ycbcr420_2p = {
- .min_w = 32,
- .min_h = 32,
- .max_w = SZ_32K,
- .max_h = SZ_32K,
- .align = 3,
- },
- .rgb888 = {
- .min_w = 8,
- .min_h = 8,
- .max_w = SZ_8K,
- .max_h = SZ_8K,
- .align = 1,
- },
-};
-
-static const struct of_device_id exynos_rotator_match[] = {
- {
- .compatible = "samsung,exynos4210-rotator",
- .data = &rot_limit_tbl_4210,
- },
- {
- .compatible = "samsung,exynos4212-rotator",
- .data = &rot_limit_tbl_4x12,
- },
- {
- .compatible = "samsung,exynos5250-rotator",
- .data = &rot_limit_tbl_5250,
- },
- {},
-};
-MODULE_DEVICE_TABLE(of, exynos_rotator_match);
-
static int rotator_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ struct resource *regs_res;
struct rot_context *rot;
- struct exynos_drm_ippdrv *ippdrv;
+ const struct rot_variant *variant;
+ int irq;
int ret;
- if (!dev->of_node) {
- dev_err(dev, "cannot find of_node.\n");
- return -ENODEV;
- }
-
rot = devm_kzalloc(dev, sizeof(*rot), GFP_KERNEL);
if (!rot)
return -ENOMEM;
- rot->limit_tbl = (struct rot_limit_table *)
- of_device_get_match_data(dev);
- rot->regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- rot->regs = devm_ioremap_resource(dev, rot->regs_res);
+ variant = of_device_get_match_data(dev);
+ rot->formats = variant->formats;
+ rot->num_formats = variant->num_formats;
+ rot->dev = dev;
+ regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ rot->regs = devm_ioremap_resource(dev, regs_res);
if (IS_ERR(rot->regs))
return PTR_ERR(rot->regs);
- rot->irq = platform_get_irq(pdev, 0);
- if (rot->irq < 0) {
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
dev_err(dev, "failed to get irq\n");
- return rot->irq;
+ return irq;
}
- ret = devm_request_threaded_irq(dev, rot->irq, NULL,
- rotator_irq_handler, IRQF_ONESHOT, "drm_rotator", rot);
+ ret = devm_request_irq(dev, irq, rotator_irq_handler, 0, dev_name(dev),
+ rot);
if (ret < 0) {
dev_err(dev, "failed to request irq\n");
return ret;
@@ -734,35 +312,19 @@ static int rotator_probe(struct platform_device *pdev)
return PTR_ERR(rot->clock);
}
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, ROTATOR_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
-
- ippdrv = &rot->ippdrv;
- ippdrv->dev = dev;
- ippdrv->ops[EXYNOS_DRM_OPS_SRC] = &rot_src_ops;
- ippdrv->ops[EXYNOS_DRM_OPS_DST] = &rot_dst_ops;
- ippdrv->check_property = rotator_ippdrv_check_property;
- ippdrv->start = rotator_ippdrv_start;
- ret = rotator_init_prop_list(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to init property list.\n");
- goto err_ippdrv_register;
- }
-
- DRM_DEBUG_KMS("ippdrv[%pK]\n", ippdrv);
-
platform_set_drvdata(pdev, rot);
- ret = exynos_drm_ippdrv_register(ippdrv);
- if (ret < 0) {
- dev_err(dev, "failed to register drm rotator device\n");
- goto err_ippdrv_register;
- }
-
- dev_info(dev, "The exynos rotator is probed successfully\n");
+ ret = component_add(dev, &rotator_component_ops);
+ if (ret)
+ goto err_component;
return 0;
-err_ippdrv_register:
+err_component:
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return ret;
}
@@ -770,45 +332,101 @@ err_ippdrv_register:
static int rotator_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct rot_context *rot = dev_get_drvdata(dev);
- struct exynos_drm_ippdrv *ippdrv = &rot->ippdrv;
-
- exynos_drm_ippdrv_unregister(ippdrv);
+ component_del(dev, &rotator_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
return 0;
}
#ifdef CONFIG_PM
-static int rotator_clk_crtl(struct rot_context *rot, bool enable)
-{
- if (enable) {
- clk_prepare_enable(rot->clock);
- rot->suspended = false;
- } else {
- clk_disable_unprepare(rot->clock);
- rot->suspended = true;
- }
-
- return 0;
-}
-
static int rotator_runtime_suspend(struct device *dev)
{
struct rot_context *rot = dev_get_drvdata(dev);
- return rotator_clk_crtl(rot, false);
+ clk_disable_unprepare(rot->clock);
+ return 0;
}
static int rotator_runtime_resume(struct device *dev)
{
struct rot_context *rot = dev_get_drvdata(dev);
- return rotator_clk_crtl(rot, true);
+ return clk_prepare_enable(rot->clock);
}
#endif
+static const struct drm_exynos_ipp_limit rotator_4210_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_16K }, .v = { 8, SZ_16K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 4, .v.align = 4) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_5250_rbg888_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 8, SZ_8K }, .v = { 8, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4210_yuv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_64K }, .v = { 32, SZ_64K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct drm_exynos_ipp_limit rotator_4412_yuv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 32, SZ_32K }, .v = { 32, SZ_32K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 8, .v.align = 8) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4210_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_4210_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4210_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_4412_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_4412_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct exynos_drm_ipp_formats rotator_5250_formats[] = {
+ { IPP_SRCDST_FORMAT(XRGB8888, rotator_5250_rbg888_limits) },
+ { IPP_SRCDST_FORMAT(NV12, rotator_4412_yuv_limits) },
+};
+
+static const struct rot_variant rotator_4210_data = {
+ .formats = rotator_4210_formats,
+ .num_formats = ARRAY_SIZE(rotator_4210_formats),
+};
+
+static const struct rot_variant rotator_4412_data = {
+ .formats = rotator_4412_formats,
+ .num_formats = ARRAY_SIZE(rotator_4412_formats),
+};
+
+static const struct rot_variant rotator_5250_data = {
+ .formats = rotator_5250_formats,
+ .num_formats = ARRAY_SIZE(rotator_5250_formats),
+};
+
+static const struct of_device_id exynos_rotator_match[] = {
+ {
+ .compatible = "samsung,exynos4210-rotator",
+ .data = &rotator_4210_data,
+ }, {
+ .compatible = "samsung,exynos4212-rotator",
+ .data = &rotator_4412_data,
+ }, {
+ .compatible = "samsung,exynos5250-rotator",
+ .data = &rotator_5250_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, exynos_rotator_match);
+
static const struct dev_pm_ops rotator_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
pm_runtime_force_resume)
@@ -820,7 +438,7 @@ struct platform_driver rotator_driver = {
.probe = rotator_probe,
.remove = rotator_remove,
.driver = {
- .name = "exynos-rot",
+ .name = "exynos-rotator",
.owner = THIS_MODULE,
.pm = &rotator_pm_ops,
.of_match_table = exynos_rotator_match,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_scaler.c b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
new file mode 100644
index 0000000..63b05b7
--- /dev/null
+++ b/drivers/gpu/drm/exynos/exynos_drm_scaler.c
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 2017 Samsung Electronics Co.Ltd
+ * Author:
+ * Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundationr
+ */
+
+#include <linux/kernel.h>
+#include <linux/component.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/of_device.h>
+#include <linux/pm_runtime.h>
+
+#include <drm/drmP.h>
+#include <drm/exynos_drm.h>
+#include "regs-scaler.h"
+#include "exynos_drm_fb.h"
+#include "exynos_drm_drv.h"
+#include "exynos_drm_iommu.h"
+#include "exynos_drm_ipp.h"
+
+#define scaler_read(offset) readl(scaler->regs + (offset))
+#define scaler_write(cfg, offset) writel(cfg, scaler->regs + (offset))
+#define SCALER_MAX_CLK 4
+#define SCALER_AUTOSUSPEND_DELAY 2000
+
+struct scaler_data {
+ const char *clk_name[SCALER_MAX_CLK];
+ unsigned int num_clk;
+ const struct exynos_drm_ipp_formats *formats;
+ unsigned int num_formats;
+};
+
+struct scaler_context {
+ struct exynos_drm_ipp ipp;
+ struct drm_device *drm_dev;
+ struct device *dev;
+ void __iomem *regs;
+ struct clk *clock[SCALER_MAX_CLK];
+ struct exynos_drm_ipp_task *task;
+ const struct scaler_data *scaler_data;
+};
+
+static u32 scaler_get_format(u32 drm_fmt)
+{
+ switch (drm_fmt) {
+ case DRM_FORMAT_NV21:
+ return SCALER_YUV420_2P_UV;
+ case DRM_FORMAT_NV12:
+ return SCALER_YUV420_2P_VU;
+ case DRM_FORMAT_YUV420:
+ return SCALER_YUV420_3P;
+ case DRM_FORMAT_YUYV:
+ return SCALER_YUV422_1P_YUYV;
+ case DRM_FORMAT_UYVY:
+ return SCALER_YUV422_1P_UYVY;
+ case DRM_FORMAT_YVYU:
+ return SCALER_YUV422_1P_YVYU;
+ case DRM_FORMAT_NV61:
+ return SCALER_YUV422_2P_UV;
+ case DRM_FORMAT_NV16:
+ return SCALER_YUV422_2P_VU;
+ case DRM_FORMAT_YUV422:
+ return SCALER_YUV422_3P;
+ case DRM_FORMAT_NV42:
+ return SCALER_YUV444_2P_UV;
+ case DRM_FORMAT_NV24:
+ return SCALER_YUV444_2P_VU;
+ case DRM_FORMAT_YUV444:
+ return SCALER_YUV444_3P;
+ case DRM_FORMAT_RGB565:
+ return SCALER_RGB_565;
+ case DRM_FORMAT_XRGB1555:
+ return SCALER_ARGB1555;
+ case DRM_FORMAT_ARGB1555:
+ return SCALER_ARGB1555;
+ case DRM_FORMAT_XRGB4444:
+ return SCALER_ARGB4444;
+ case DRM_FORMAT_ARGB4444:
+ return SCALER_ARGB4444;
+ case DRM_FORMAT_XRGB8888:
+ return SCALER_ARGB8888;
+ case DRM_FORMAT_ARGB8888:
+ return SCALER_ARGB8888;
+ case DRM_FORMAT_RGBX8888:
+ return SCALER_RGBA8888;
+ case DRM_FORMAT_RGBA8888:
+ return SCALER_RGBA8888;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static inline void scaler_enable_int(struct scaler_context *scaler)
+{
+ u32 val;
+
+ val = SCALER_INT_EN_TIMEOUT |
+ SCALER_INT_EN_ILLEGAL_BLEND |
+ SCALER_INT_EN_ILLEGAL_RATIO |
+ SCALER_INT_EN_ILLEGAL_DST_HEIGHT |
+ SCALER_INT_EN_ILLEGAL_DST_WIDTH |
+ SCALER_INT_EN_ILLEGAL_DST_V_POS |
+ SCALER_INT_EN_ILLEGAL_DST_H_POS |
+ SCALER_INT_EN_ILLEGAL_DST_C_SPAN |
+ SCALER_INT_EN_ILLEGAL_DST_Y_SPAN |
+ SCALER_INT_EN_ILLEGAL_DST_CR_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_CB_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_Y_BASE |
+ SCALER_INT_EN_ILLEGAL_DST_COLOR |
+ SCALER_INT_EN_ILLEGAL_SRC_HEIGHT |
+ SCALER_INT_EN_ILLEGAL_SRC_WIDTH |
+ SCALER_INT_EN_ILLEGAL_SRC_CV_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_CH_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_YV_POS |
+ SCALER_INT_EN_ILLEGAL_SRC_YH_POS |
+ SCALER_INT_EN_ILLEGAL_DST_SPAN |
+ SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN |
+ SCALER_INT_EN_ILLEGAL_SRC_CR_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_CB_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_Y_BASE |
+ SCALER_INT_EN_ILLEGAL_SRC_COLOR |
+ SCALER_INT_EN_FRAME_END;
+ scaler_write(val, SCALER_INT_EN);
+}
+
+static inline void scaler_set_src_fmt(struct scaler_context *scaler,
+ u32 src_fmt)
+{
+ u32 val;
+
+ val = SCALER_SRC_CFG_SET_COLOR_FORMAT(src_fmt);
+ scaler_write(val, SCALER_SRC_CFG);
+}
+
+static inline void scaler_set_src_base(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *src_buf)
+{
+ static unsigned int bases[] = {
+ SCALER_SRC_Y_BASE,
+ SCALER_SRC_CB_BASE,
+ SCALER_SRC_CR_BASE,
+ };
+ int i;
+
+ for (i = 0; i < src_buf->format->num_planes; ++i)
+ scaler_write(src_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_src_span(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *src_buf)
+{
+ u32 val;
+
+ val = SCALER_SRC_SPAN_SET_Y_SPAN(src_buf->buf.pitch[0] /
+ src_buf->format->cpp[0]);
+
+ if (src_buf->format->num_planes > 1)
+ val |= SCALER_SRC_SPAN_SET_C_SPAN(src_buf->buf.pitch[1]);
+
+ scaler_write(val, SCALER_SRC_SPAN);
+}
+
+static inline void scaler_set_src_luma_pos(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *src_pos)
+{
+ u32 val;
+
+ val = SCALER_SRC_Y_POS_SET_YH_POS(src_pos->x << 2);
+ val |= SCALER_SRC_Y_POS_SET_YV_POS(src_pos->y << 2);
+ scaler_write(val, SCALER_SRC_Y_POS);
+ scaler_write(val, SCALER_SRC_C_POS); /* ATTENTION! */
+}
+
+static inline void scaler_set_src_wh(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *src_pos)
+{
+ u32 val;
+
+ val = SCALER_SRC_WH_SET_WIDTH(src_pos->w);
+ val |= SCALER_SRC_WH_SET_HEIGHT(src_pos->h);
+ scaler_write(val, SCALER_SRC_WH);
+}
+
+static inline void scaler_set_dst_fmt(struct scaler_context *scaler,
+ u32 dst_fmt)
+{
+ u32 val;
+
+ val = SCALER_DST_CFG_SET_COLOR_FORMAT(dst_fmt);
+ scaler_write(val, SCALER_DST_CFG);
+}
+
+static inline void scaler_set_dst_base(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *dst_buf)
+{
+ static unsigned int bases[] = {
+ SCALER_DST_Y_BASE,
+ SCALER_DST_CB_BASE,
+ SCALER_DST_CR_BASE,
+ };
+ int i;
+
+ for (i = 0; i < dst_buf->format->num_planes; ++i)
+ scaler_write(dst_buf->dma_addr[i], bases[i]);
+}
+
+static inline void scaler_set_dst_span(struct scaler_context *scaler,
+ struct exynos_drm_ipp_buffer *dst_buf)
+{
+ u32 val;
+
+ val = SCALER_DST_SPAN_SET_Y_SPAN(dst_buf->buf.pitch[0] /
+ dst_buf->format->cpp[0]);
+
+ if (dst_buf->format->num_planes > 1)
+ val |= SCALER_DST_SPAN_SET_C_SPAN(dst_buf->buf.pitch[1]);
+
+ scaler_write(val, SCALER_DST_SPAN);
+}
+
+static inline void scaler_set_dst_luma_pos(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val;
+
+ val = SCALER_DST_WH_SET_WIDTH(dst_pos->w);
+ val |= SCALER_DST_WH_SET_HEIGHT(dst_pos->h);
+ scaler_write(val, SCALER_DST_WH);
+}
+
+static inline void scaler_set_dst_wh(struct scaler_context *scaler,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val;
+
+ val = SCALER_DST_POS_SET_H_POS(dst_pos->x);
+ val |= SCALER_DST_POS_SET_V_POS(dst_pos->y);
+ scaler_write(val, SCALER_DST_POS);
+}
+
+static inline void scaler_set_hv_ratio(struct scaler_context *scaler,
+ unsigned int rotation,
+ struct drm_exynos_ipp_task_rect *src_pos,
+ struct drm_exynos_ipp_task_rect *dst_pos)
+{
+ u32 val, h_ratio, v_ratio;
+
+ if (drm_rotation_90_or_270(rotation)) {
+ h_ratio = (src_pos->h << 16) / dst_pos->w;
+ v_ratio = (src_pos->w << 16) / dst_pos->h;
+ } else {
+ h_ratio = (src_pos->w << 16) / dst_pos->w;
+ v_ratio = (src_pos->h << 16) / dst_pos->h;
+ }
+
+ val = SCALER_H_RATIO_SET(h_ratio);
+ scaler_write(val, SCALER_H_RATIO);
+
+ val = SCALER_V_RATIO_SET(v_ratio);
+ scaler_write(val, SCALER_V_RATIO);
+}
+
+static inline void scaler_set_rotation(struct scaler_context *scaler,
+ unsigned int rotation)
+{
+ u32 val = 0;
+
+ if (rotation & DRM_MODE_ROTATE_90)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_90);
+ else if (rotation & DRM_MODE_ROTATE_180)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_180);
+ else if (rotation & DRM_MODE_ROTATE_270)
+ val |= SCALER_ROT_CFG_SET_ROTMODE(SCALER_ROT_MODE_270);
+ if (rotation & DRM_MODE_REFLECT_X)
+ val |= SCALER_ROT_CFG_FLIP_X_EN;
+ if (rotation & DRM_MODE_REFLECT_Y)
+ val |= SCALER_ROT_CFG_FLIP_Y_EN;
+ scaler_write(val, SCALER_ROT_CFG);
+}
+
+static inline void scaler_set_csc(struct scaler_context *scaler,
+ const struct drm_format_info *fmt)
+{
+ static const u32 csc_mtx[2][3][3] = {
+ { /* YCbCr to RGB */
+ {0x254, 0x000, 0x331},
+ {0x254, 0xf38, 0xe60},
+ {0x254, 0x409, 0x000},
+ },
+ { /* RGB to YCbCr */
+ {0x084, 0x102, 0x032},
+ {0xfb4, 0xf6b, 0x0e1},
+ {0x0e1, 0xf44, 0xfdc},
+ },
+ };
+ int i, j, dir;
+
+ switch (fmt->format) {
+ case DRM_FORMAT_RGB565:
+ case DRM_FORMAT_XRGB1555:
+ case DRM_FORMAT_ARGB1555:
+ case DRM_FORMAT_XRGB4444:
+ case DRM_FORMAT_ARGB4444:
+ case DRM_FORMAT_XRGB8888:
+ case DRM_FORMAT_ARGB8888:
+ case DRM_FORMAT_RGBX8888:
+ case DRM_FORMAT_RGBA8888:
+ dir = 1;
+ break;
+ default:
+ dir = 0;
+ }
+
+ for (i = 0; i < 3; i++)
+ for (j = 0; j < 3; j++)
+ scaler_write(csc_mtx[dir][i][j], SCALER_CSC_COEF(j, i));
+}
+
+static inline void scaler_set_timer(struct scaler_context *scaler,
+ unsigned int timer, unsigned int divider)
+{
+ u32 val;
+
+ val = SCALER_TIMEOUT_CTRL_TIMER_ENABLE;
+ val |= SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(timer);
+ val |= SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(divider);
+ scaler_write(val, SCALER_TIMEOUT_CTRL);
+}
+
+static inline void scaler_start_hw(struct scaler_context *scaler)
+{
+ scaler_write(SCALER_CFG_START_CMD, SCALER_CFG);
+}
+
+static int scaler_commit(struct exynos_drm_ipp *ipp,
+ struct exynos_drm_ipp_task *task)
+{
+ struct scaler_context *scaler =
+ container_of(ipp, struct scaler_context, ipp);
+
+ u32 src_fmt = scaler_get_format(task->src.buf.fourcc);
+ struct drm_exynos_ipp_task_rect *src_pos = &task->src.rect;
+
+ u32 dst_fmt = scaler_get_format(task->dst.buf.fourcc);
+ struct drm_exynos_ipp_task_rect *dst_pos = &task->dst.rect;
+
+ scaler->task = task;
+
+ pm_runtime_get_sync(scaler->dev);
+
+ scaler_set_src_fmt(scaler, src_fmt);
+ scaler_set_src_base(scaler, &task->src);
+ scaler_set_src_span(scaler, &task->src);
+ scaler_set_src_luma_pos(scaler, src_pos);
+ scaler_set_src_wh(scaler, src_pos);
+
+ scaler_set_dst_fmt(scaler, dst_fmt);
+ scaler_set_dst_base(scaler, &task->dst);
+ scaler_set_dst_span(scaler, &task->dst);
+ scaler_set_dst_luma_pos(scaler, dst_pos);
+ scaler_set_dst_wh(scaler, dst_pos);
+
+ scaler_set_hv_ratio(scaler, task->transform.rotation, src_pos, dst_pos);
+ scaler_set_rotation(scaler, task->transform.rotation);
+
+ scaler_set_csc(scaler, task->src.format);
+
+ scaler_set_timer(scaler, 0xffff, 0xf);
+
+ scaler_enable_int(scaler);
+ scaler_start_hw(scaler);
+
+ return 0;
+}
+
+static struct exynos_drm_ipp_funcs ipp_funcs = {
+ .commit = scaler_commit,
+};
+
+static inline void scaler_disable_int(struct scaler_context *scaler)
+{
+ scaler_write(0, SCALER_INT_EN);
+}
+
+static inline u32 scaler_get_int_status(struct scaler_context *scaler)
+{
+ return scaler_read(SCALER_INT_STATUS);
+}
+
+static inline bool scaler_task_done(u32 val)
+{
+ return val & SCALER_INT_STATUS_FRAME_END ? 0 : -EINVAL;
+}
+
+static irqreturn_t scaler_irq_handler(int irq, void *arg)
+{
+ struct scaler_context *scaler = arg;
+
+ u32 val = scaler_get_int_status(scaler);
+
+ scaler_disable_int(scaler);
+
+ if (scaler->task) {
+ struct exynos_drm_ipp_task *task = scaler->task;
+
+ scaler->task = NULL;
+ pm_runtime_mark_last_busy(scaler->dev);
+ pm_runtime_put_autosuspend(scaler->dev);
+ exynos_drm_ipp_task_done(task, scaler_task_done(val));
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int scaler_bind(struct device *dev, struct device *master, void *data)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+ scaler->drm_dev = drm_dev;
+ drm_iommu_attach_device(drm_dev, dev);
+
+ exynos_drm_ipp_register(drm_dev, ipp, &ipp_funcs,
+ DRM_EXYNOS_IPP_CAP_CROP | DRM_EXYNOS_IPP_CAP_ROTATE |
+ DRM_EXYNOS_IPP_CAP_SCALE | DRM_EXYNOS_IPP_CAP_CONVERT,
+ scaler->scaler_data->formats,
+ scaler->scaler_data->num_formats, "scaler");
+
+ dev_info(dev, "The exynos scaler has been probed successfully\n");
+
+ return 0;
+}
+
+static void scaler_unbind(struct device *dev, struct device *master,
+ void *data)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_ipp *ipp = &scaler->ipp;
+
+ exynos_drm_ipp_unregister(drm_dev, ipp);
+ drm_iommu_detach_device(scaler->drm_dev, scaler->dev);
+}
+
+static const struct component_ops scaler_component_ops = {
+ .bind = scaler_bind,
+ .unbind = scaler_unbind,
+};
+
+static int scaler_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *regs_res;
+ struct scaler_context *scaler;
+ int irq;
+ int ret, i;
+
+ scaler = devm_kzalloc(dev, sizeof(*scaler), GFP_KERNEL);
+ if (!scaler)
+ return -ENOMEM;
+
+ scaler->scaler_data =
+ (struct scaler_data *)of_device_get_match_data(dev);
+
+ scaler->dev = dev;
+ regs_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ scaler->regs = devm_ioremap_resource(dev, regs_res);
+ if (IS_ERR(scaler->regs))
+ return PTR_ERR(scaler->regs);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ dev_err(dev, "failed to get irq\n");
+ return irq;
+ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL, scaler_irq_handler,
+ IRQF_ONESHOT, "drm_scaler", scaler);
+ if (ret < 0) {
+ dev_err(dev, "failed to request irq\n");
+ return ret;
+ }
+
+ for (i = 0; i < scaler->scaler_data->num_clk; ++i) {
+ scaler->clock[i] = devm_clk_get(dev,
+ scaler->scaler_data->clk_name[i]);
+ if (IS_ERR(scaler->clock[i])) {
+ dev_err(dev, "failed to get clock\n");
+ return PTR_ERR(scaler->clock[i]);
+ }
+ }
+
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, SCALER_AUTOSUSPEND_DELAY);
+ pm_runtime_enable(dev);
+ platform_set_drvdata(pdev, scaler);
+
+ ret = component_add(dev, &scaler_component_ops);
+ if (ret)
+ goto err_ippdrv_register;
+
+ return 0;
+
+err_ippdrv_register:
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
+ return ret;
+}
+
+static int scaler_remove(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+
+ component_del(dev, &scaler_component_ops);
+ pm_runtime_dont_use_autosuspend(dev);
+ pm_runtime_disable(dev);
+
+ return 0;
+}
+
+#ifdef CONFIG_PM
+
+static int clk_disable_unprepare_wrapper(struct clk *clk)
+{
+ clk_disable_unprepare(clk);
+
+ return 0;
+}
+
+static int scaler_clk_ctrl(struct scaler_context *scaler, bool enable)
+{
+ int (*clk_fun)(struct clk *clk), i;
+
+ clk_fun = enable ? clk_prepare_enable : clk_disable_unprepare_wrapper;
+
+ for (i = 0; i < scaler->scaler_data->num_clk; ++i)
+ clk_fun(scaler->clock[i]);
+
+ return 0;
+}
+
+static int scaler_runtime_suspend(struct device *dev)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+
+ return scaler_clk_ctrl(scaler, false);
+}
+
+static int scaler_runtime_resume(struct device *dev)
+{
+ struct scaler_context *scaler = dev_get_drvdata(dev);
+
+ return scaler_clk_ctrl(scaler, true);
+}
+#endif
+
+static const struct dev_pm_ops scaler_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+ pm_runtime_force_resume)
+ SET_RUNTIME_PM_OPS(scaler_runtime_suspend, scaler_runtime_resume, NULL)
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_hv_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 2) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_two_pixel_h_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SIZE_LIMIT(AREA, .h.align = 2, .v.align = 1) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct drm_exynos_ipp_limit scaler_5420_one_pixel_limits[] = {
+ { IPP_SIZE_LIMIT(BUFFER, .h = { 16, SZ_8K }, .v = { 16, SZ_8K }) },
+ { IPP_SCALE_LIMIT(.h = { 65536 * 1 / 4, 65536 * 16 },
+ .v = { 65536 * 1 / 4, 65536 * 16 }) },
+};
+
+static const struct exynos_drm_ipp_formats exynos5420_formats[] = {
+ /* SCALER_YUV420_2P_UV */
+ { IPP_SRCDST_FORMAT(NV21, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV420_2P_VU */
+ { IPP_SRCDST_FORMAT(NV12, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV420_3P */
+ { IPP_SRCDST_FORMAT(YUV420, scaler_5420_two_pixel_hv_limits) },
+
+ /* SCALER_YUV422_1P_YUYV */
+ { IPP_SRCDST_FORMAT(YUYV, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_1P_UYVY */
+ { IPP_SRCDST_FORMAT(UYVY, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_1P_YVYU */
+ { IPP_SRCDST_FORMAT(YVYU, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_2P_UV */
+ { IPP_SRCDST_FORMAT(NV61, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_2P_VU */
+ { IPP_SRCDST_FORMAT(NV16, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV422_3P */
+ { IPP_SRCDST_FORMAT(YUV422, scaler_5420_two_pixel_h_limits) },
+
+ /* SCALER_YUV444_2P_UV */
+ { IPP_SRCDST_FORMAT(NV42, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_YUV444_2P_VU */
+ { IPP_SRCDST_FORMAT(NV24, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_YUV444_3P */
+ { IPP_SRCDST_FORMAT(YUV444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGB_565 */
+ { IPP_SRCDST_FORMAT(RGB565, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB1555 */
+ { IPP_SRCDST_FORMAT(XRGB1555, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB1555 */
+ { IPP_SRCDST_FORMAT(ARGB1555, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB4444 */
+ { IPP_SRCDST_FORMAT(XRGB4444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB4444 */
+ { IPP_SRCDST_FORMAT(ARGB4444, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB8888 */
+ { IPP_SRCDST_FORMAT(XRGB8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_ARGB8888 */
+ { IPP_SRCDST_FORMAT(ARGB8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGBA8888 */
+ { IPP_SRCDST_FORMAT(RGBX8888, scaler_5420_one_pixel_limits) },
+
+ /* SCALER_RGBA8888 */
+ { IPP_SRCDST_FORMAT(RGBA8888, scaler_5420_one_pixel_limits) },
+};
+
+static const struct scaler_data exynos5420_data = {
+ .clk_name = {"mscl"},
+ .num_clk = 1,
+ .formats = exynos5420_formats,
+ .num_formats = ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct scaler_data exynos5433_data = {
+ .clk_name = {"pclk", "aclk", "aclk_xiu"},
+ .num_clk = 3,
+ .formats = exynos5420_formats, /* intentional */
+ .num_formats = ARRAY_SIZE(exynos5420_formats),
+};
+
+static const struct of_device_id exynos_scaler_match[] = {
+ {
+ .compatible = "samsung,exynos5420-scaler",
+ .data = &exynos5420_data,
+ }, {
+ .compatible = "samsung,exynos5433-scaler",
+ .data = &exynos5433_data,
+ }, {
+ },
+};
+MODULE_DEVICE_TABLE(of, exynos_scaler_match);
+
+struct platform_driver scaler_driver = {
+ .probe = scaler_probe,
+ .remove = scaler_remove,
+ .driver = {
+ .name = "exynos-scaler",
+ .owner = THIS_MODULE,
+ .pm = &scaler_pm_ops,
+ .of_match_table = exynos_scaler_match,
+ },
+};
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index abd84cb..09c4bc0 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -954,8 +954,6 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
drm_mode_connector_attach_encoder(connector, encoder);
if (hdata->bridge) {
- encoder->bridge = hdata->bridge;
- hdata->bridge->encoder = encoder;
ret = drm_bridge_attach(encoder, hdata->bridge, NULL);
if (ret)
DRM_ERROR("Failed to attach bridge\n");
diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c
index 257299e..272c79f 100644
--- a/drivers/gpu/drm/exynos/exynos_mixer.c
+++ b/drivers/gpu/drm/exynos/exynos_mixer.c
@@ -473,7 +473,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
chroma_addr[1] = chroma_addr[0] + 0x40;
} else {
luma_addr[1] = luma_addr[0] + fb->pitches[0];
- chroma_addr[1] = chroma_addr[0] + fb->pitches[0];
+ chroma_addr[1] = chroma_addr[0] + fb->pitches[1];
}
} else {
luma_addr[1] = 0;
@@ -482,6 +482,7 @@ static void vp_video_buffer(struct mixer_context *ctx,
spin_lock_irqsave(&ctx->reg_slock, flags);
+ vp_reg_write(ctx, VP_SHADOW_UPDATE, 1);
/* interlace or progressive scan mode */
val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0);
vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP);
@@ -495,21 +496,23 @@ static void vp_video_buffer(struct mixer_context *ctx,
vp_reg_write(ctx, VP_IMG_SIZE_Y, VP_IMG_HSIZE(fb->pitches[0]) |
VP_IMG_VSIZE(fb->height));
/* chroma plane for NV12/NV21 is half the height of the luma plane */
- vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[0]) |
+ vp_reg_write(ctx, VP_IMG_SIZE_C, VP_IMG_HSIZE(fb->pitches[1]) |
VP_IMG_VSIZE(fb->height / 2));
vp_reg_write(ctx, VP_SRC_WIDTH, state->src.w);
- vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
vp_reg_write(ctx, VP_SRC_H_POSITION,
VP_SRC_H_POSITION_VAL(state->src.x));
- vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
-
vp_reg_write(ctx, VP_DST_WIDTH, state->crtc.w);
vp_reg_write(ctx, VP_DST_H_POSITION, state->crtc.x);
+
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h / 2);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y / 2);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h / 2);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y / 2);
} else {
+ vp_reg_write(ctx, VP_SRC_HEIGHT, state->src.h);
+ vp_reg_write(ctx, VP_SRC_V_POSITION, state->src.y);
vp_reg_write(ctx, VP_DST_HEIGHT, state->crtc.h);
vp_reg_write(ctx, VP_DST_V_POSITION, state->crtc.y);
}
@@ -699,6 +702,15 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg)
/* interlace scan need to check shadow register */
if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) {
+ if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) &&
+ vp_reg_read(ctx, VP_SHADOW_UPDATE))
+ goto out;
+
+ base = mixer_reg_read(ctx, MXR_CFG);
+ shadow = mixer_reg_read(ctx, MXR_CFG_S);
+ if (base != shadow)
+ goto out;
+
base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0));
shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0));
if (base != shadow)
diff --git a/drivers/gpu/drm/exynos/regs-mixer.h b/drivers/gpu/drm/exynos/regs-mixer.h
index c311f57..189cfa2 100644
--- a/drivers/gpu/drm/exynos/regs-mixer.h
+++ b/drivers/gpu/drm/exynos/regs-mixer.h
@@ -47,6 +47,7 @@
#define MXR_MO 0x0304
#define MXR_RESOLUTION 0x0310
+#define MXR_CFG_S 0x2004
#define MXR_GRAPHIC0_BASE_S 0x2024
#define MXR_GRAPHIC1_BASE_S 0x2044
diff --git a/drivers/gpu/drm/exynos/regs-scaler.h b/drivers/gpu/drm/exynos/regs-scaler.h
new file mode 100644
index 0000000..fc7ccad
--- /dev/null
+++ b/drivers/gpu/drm/exynos/regs-scaler.h
@@ -0,0 +1,426 @@
+/* drivers/gpu/drm/exynos/regs-scaler.h
+ *
+ * Copyright (c) 2017 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com/
+ * Author: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
+ *
+ * Register definition file for Samsung scaler driver
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef EXYNOS_REGS_SCALER_H
+#define EXYNOS_REGS_SCALER_H
+
+/* Register part */
+
+/* Global setting */
+#define SCALER_STATUS 0x0 /* no shadow */
+#define SCALER_CFG 0x4
+
+/* Interrupt */
+#define SCALER_INT_EN 0x8 /* no shadow */
+#define SCALER_INT_STATUS 0xc /* no shadow */
+
+/* SRC */
+#define SCALER_SRC_CFG 0x10
+#define SCALER_SRC_Y_BASE 0x14
+#define SCALER_SRC_CB_BASE 0x18
+#define SCALER_SRC_CR_BASE 0x294
+#define SCALER_SRC_SPAN 0x1c
+#define SCALER_SRC_Y_POS 0x20
+#define SCALER_SRC_WH 0x24
+#define SCALER_SRC_C_POS 0x28
+
+/* DST */
+#define SCALER_DST_CFG 0x30
+#define SCALER_DST_Y_BASE 0x34
+#define SCALER_DST_CB_BASE 0x38
+#define SCALER_DST_CR_BASE 0x298
+#define SCALER_DST_SPAN 0x3c
+#define SCALER_DST_WH 0x40
+#define SCALER_DST_POS 0x44
+
+/* Ratio */
+#define SCALER_H_RATIO 0x50
+#define SCALER_V_RATIO 0x54
+
+/* Rotation */
+#define SCALER_ROT_CFG 0x58
+
+/* Coefficient */
+/*
+ * YHCOEF_{x}{A|B|C|D} CHCOEF_{x}{A|B|C|D}
+ *
+ * A B C D A B C D
+ * 0 60 64 68 6c 140 144 148 14c
+ * 1 70 74 78 7c 150 154 158 15c
+ * 2 80 84 88 8c 160 164 168 16c
+ * 3 90 94 98 9c 170 174 178 17c
+ * 4 a0 a4 a8 ac 180 184 188 18c
+ * 5 b0 b4 b8 bc 190 194 198 19c
+ * 6 c0 c4 c8 cc 1a0 1a4 1a8 1ac
+ * 7 d0 d4 d8 dc 1b0 1b4 1b8 1bc
+ * 8 e0 e4 e8 ec 1c0 1c4 1c8 1cc
+ *
+ *
+ * YVCOEF_{x}{A|B} CVCOEF_{x}{A|B}
+ *
+ * A B A B
+ * 0 f0 f4 1d0 1d4
+ * 1 f8 fc 1d8 1dc
+ * 2 100 104 1e0 1e4
+ * 3 108 10c 1e8 1ec
+ * 4 110 114 1f0 1f4
+ * 5 118 11c 1f8 1fc
+ * 6 120 124 200 204
+ * 7 128 12c 208 20c
+ * 8 130 134 210 214
+ */
+#define _SCALER_HCOEF_DELTA(r, c) ((r) * 0x10 + (c) * 0x4)
+#define _SCALER_VCOEF_DELTA(r, c) ((r) * 0x8 + (c) * 0x4)
+
+#define SCALER_YHCOEF(r, c) (0x60 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_YVCOEF(r, c) (0xf0 + _SCALER_VCOEF_DELTA((r), (c)))
+#define SCALER_CHCOEF(r, c) (0x140 + _SCALER_HCOEF_DELTA((r), (c)))
+#define SCALER_CVCOEF(r, c) (0x1d0 + _SCALER_VCOEF_DELTA((r), (c)))
+
+
+/* Color Space Conversion */
+#define SCALER_CSC_COEF(x, y) (0x220 + (y) * 0xc + (x) * 0x4)
+
+/* Dithering */
+#define SCALER_DITH_CFG 0x250
+
+/* Version Number */
+#define SCALER_VER 0x260 /* no shadow */
+
+/* Cycle count and Timeout */
+#define SCALER_CYCLE_COUNT 0x278 /* no shadow */
+#define SCALER_TIMEOUT_CTRL 0x2c0 /* no shadow */
+#define SCALER_TIMEOUT_CNT 0x2c4 /* no shadow */
+
+/* Blending */
+#define SCALER_SRC_BLEND_COLOR 0x280
+#define SCALER_SRC_BLEND_ALPHA 0x284
+#define SCALER_DST_BLEND_COLOR 0x288
+#define SCALER_DST_BLEND_ALPHA 0x28c
+
+/* Color Fill */
+#define SCALER_FILL_COLOR 0x290
+
+/* Multiple Command Queue */
+#define SCALER_ADDR_Q_CONFIG 0x2a0 /* no shadow */
+#define SCALER_SRC_ADDR_Q_STATUS 0x2a4 /* no shadow */
+#define SCALER_SRC_ADDR_Q 0x2a8 /* no shadow */
+
+/* CRC */
+#define SCALER_CRC_COLOR00_10 0x2b0 /* no shadow */
+#define SCALER_CRC_COLOR20_30 0x2b4 /* no shadow */
+#define SCALER_CRC_COLOR01_11 0x2b8 /* no shadow */
+#define SCALER_CRC_COLOR21_31 0x2bc /* no shadow */
+
+/* Shadow Registers */
+#define SCALER_SHADOW_OFFSET 0x1000
+
+
+/* Bit definition part */
+#define SCALER_MASK(hi_b, lo_b) ((1 << ((hi_b) - (lo_b) + 1)) - 1)
+#define SCALER_GET(reg, hi_b, lo_b) \
+ (((reg) >> (lo_b)) & SCALER_MASK(hi_b, lo_b))
+#define SCALER_SET(val, hi_b, lo_b) \
+ (((val) & SCALER_MASK(hi_b, lo_b)) << lo_b)
+
+/* SCALER_STATUS */
+#define SCALER_STATUS_SCALER_RUNNING (1 << 1)
+#define SCALER_STATUS_SCALER_READY_CLK_DOWN (1 << 0)
+
+/* SCALER_CFG */
+#define SCALER_CFG_FILL_EN (1 << 24)
+#define SCALER_CFG_BLEND_COLOR_DIVIDE_ALPHA_EN (1 << 17)
+#define SCALER_CFG_BLEND_EN (1 << 16)
+#define SCALER_CFG_CSC_Y_OFFSET_SRC_EN (1 << 10)
+#define SCALER_CFG_CSC_Y_OFFSET_DST_EN (1 << 9)
+#define SCALER_CFG_16_BURST_MODE (1 << 8)
+#define SCALER_CFG_SOFT_RESET (1 << 1)
+#define SCALER_CFG_START_CMD (1 << 0)
+
+/* SCALER_INT_EN */
+#define SCALER_INT_EN_TIMEOUT (1 << 31)
+#define SCALER_INT_EN_ILLEGAL_BLEND (1 << 24)
+#define SCALER_INT_EN_ILLEGAL_RATIO (1 << 23)
+#define SCALER_INT_EN_ILLEGAL_DST_HEIGHT (1 << 22)
+#define SCALER_INT_EN_ILLEGAL_DST_WIDTH (1 << 21)
+#define SCALER_INT_EN_ILLEGAL_DST_V_POS (1 << 20)
+#define SCALER_INT_EN_ILLEGAL_DST_H_POS (1 << 19)
+#define SCALER_INT_EN_ILLEGAL_DST_C_SPAN (1 << 18)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_SPAN (1 << 17)
+#define SCALER_INT_EN_ILLEGAL_DST_CR_BASE (1 << 16)
+#define SCALER_INT_EN_ILLEGAL_DST_CB_BASE (1 << 15)
+#define SCALER_INT_EN_ILLEGAL_DST_Y_BASE (1 << 14)
+#define SCALER_INT_EN_ILLEGAL_DST_COLOR (1 << 13)
+#define SCALER_INT_EN_ILLEGAL_SRC_HEIGHT (1 << 12)
+#define SCALER_INT_EN_ILLEGAL_SRC_WIDTH (1 << 11)
+#define SCALER_INT_EN_ILLEGAL_SRC_CV_POS (1 << 10)
+#define SCALER_INT_EN_ILLEGAL_SRC_CH_POS (1 << 9)
+#define SCALER_INT_EN_ILLEGAL_SRC_YV_POS (1 << 8)
+#define SCALER_INT_EN_ILLEGAL_SRC_YH_POS (1 << 7)
+#define SCALER_INT_EN_ILLEGAL_DST_SPAN (1 << 6)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_SPAN (1 << 5)
+#define SCALER_INT_EN_ILLEGAL_SRC_CR_BASE (1 << 4)
+#define SCALER_INT_EN_ILLEGAL_SRC_CB_BASE (1 << 3)
+#define SCALER_INT_EN_ILLEGAL_SRC_Y_BASE (1 << 2)
+#define SCALER_INT_EN_ILLEGAL_SRC_COLOR (1 << 1)
+#define SCALER_INT_EN_FRAME_END (1 << 0)
+
+/* SCALER_INT_STATUS */
+#define SCALER_INT_STATUS_TIMEOUT (1 << 31)
+#define SCALER_INT_STATUS_ILLEGAL_BLEND (1 << 24)
+#define SCALER_INT_STATUS_ILLEGAL_RATIO (1 << 23)
+#define SCALER_INT_STATUS_ILLEGAL_DST_HEIGHT (1 << 22)
+#define SCALER_INT_STATUS_ILLEGAL_DST_WIDTH (1 << 21)
+#define SCALER_INT_STATUS_ILLEGAL_DST_V_POS (1 << 20)
+#define SCALER_INT_STATUS_ILLEGAL_DST_H_POS (1 << 19)
+#define SCALER_INT_STATUS_ILLEGAL_DST_C_SPAN (1 << 18)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_SPAN (1 << 17)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CR_BASE (1 << 16)
+#define SCALER_INT_STATUS_ILLEGAL_DST_CB_BASE (1 << 15)
+#define SCALER_INT_STATUS_ILLEGAL_DST_Y_BASE (1 << 14)
+#define SCALER_INT_STATUS_ILLEGAL_DST_COLOR (1 << 13)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_HEIGHT (1 << 12)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_WIDTH (1 << 11)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CV_POS (1 << 10)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CH_POS (1 << 9)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YV_POS (1 << 8)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_YH_POS (1 << 7)
+#define SCALER_INT_STATUS_ILLEGAL_DST_SPAN (1 << 6)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_SPAN (1 << 5)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CR_BASE (1 << 4)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_CB_BASE (1 << 3)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_Y_BASE (1 << 2)
+#define SCALER_INT_STATUS_ILLEGAL_SRC_COLOR (1 << 1)
+#define SCALER_INT_STATUS_FRAME_END (1 << 0)
+
+/* SCALER_SRC_CFG */
+#define SCALER_SRC_CFG_TILE_EN (1 << 10)
+#define SCALER_SRC_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5)
+#define SCALER_SRC_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5)
+#define SCALER_SRC_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0)
+#define SCALER_SRC_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0)
+#define SCALER_YUV420_2P_UV 0
+#define SCALER_YUV422_2P_UV 2
+#define SCALER_YUV444_2P_UV 3
+#define SCALER_RGB_565 4
+#define SCALER_ARGB1555 5
+#define SCALER_ARGB8888 6
+#define SCALER_ARGB8888_PRE 7
+#define SCALER_YUV422_1P_YVYU 9
+#define SCALER_YUV422_1P_YUYV 10
+#define SCALER_YUV422_1P_UYVY 11
+#define SCALER_ARGB4444 12
+#define SCALER_L8A8 13
+#define SCALER_RGBA8888 14
+#define SCALER_L8 15
+#define SCALER_YUV420_2P_VU 16
+#define SCALER_YUV422_2P_VU 18
+#define SCALER_YUV444_2P_VU 19
+#define SCALER_YUV420_3P 20
+#define SCALER_YUV422_3P 22
+#define SCALER_YUV444_3P 23
+
+/* SCALER_SRC_SPAN */
+#define SCALER_SRC_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16)
+#define SCALER_SRC_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16)
+#define SCALER_SRC_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0)
+#define SCALER_SRC_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_Y_POS */
+#define SCALER_SRC_Y_POS_GET_YH_POS(r) SCALER_GET(r, 31, 16)
+#define SCALER_SRC_Y_POS_SET_YH_POS(v) SCALER_SET(v, 31, 16)
+#define SCALER_SRC_Y_POS_GET_YV_POS(r) SCALER_GET(r, 15, 0)
+#define SCALER_SRC_Y_POS_SET_YV_POS(v) SCALER_SET(v, 15, 0)
+
+/* SCALER_SRC_WH */
+#define SCALER_SRC_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16)
+#define SCALER_SRC_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16)
+#define SCALER_SRC_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0)
+#define SCALER_SRC_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_SRC_C_POS */
+#define SCALER_SRC_C_POS_GET_CH_POS(r) SCALER_GET(r, 31, 16)
+#define SCALER_SRC_C_POS_SET_CH_POS(v) SCALER_SET(v, 31, 16)
+#define SCALER_SRC_C_POS_GET_CV_POS(r) SCALER_GET(r, 15, 0)
+#define SCALER_SRC_C_POS_SET_CV_POS(v) SCALER_SET(v, 15, 0)
+
+/* SCALER_DST_CFG */
+#define SCALER_DST_CFG_GET_BYTE_SWAP(r) SCALER_GET(r, 6, 5)
+#define SCALER_DST_CFG_SET_BYTE_SWAP(v) SCALER_SET(v, 6, 5)
+#define SCALER_DST_CFG_GET_COLOR_FORMAT(r) SCALER_GET(r, 4, 0)
+#define SCALER_DST_CFG_SET_COLOR_FORMAT(v) SCALER_SET(v, 4, 0)
+
+/* SCALER_DST_SPAN */
+#define SCALER_DST_SPAN_GET_C_SPAN(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_SPAN_SET_C_SPAN(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_SPAN_GET_Y_SPAN(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_SPAN_SET_Y_SPAN(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_WH */
+#define SCALER_DST_WH_GET_WIDTH(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_WH_SET_WIDTH(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_WH_GET_HEIGHT(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_WH_SET_HEIGHT(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_DST_POS */
+#define SCALER_DST_POS_GET_H_POS(r) SCALER_GET(r, 29, 16)
+#define SCALER_DST_POS_SET_H_POS(v) SCALER_SET(v, 29, 16)
+#define SCALER_DST_POS_GET_V_POS(r) SCALER_GET(r, 13, 0)
+#define SCALER_DST_POS_SET_V_POS(v) SCALER_SET(v, 13, 0)
+
+/* SCALER_H_RATIO */
+#define SCALER_H_RATIO_GET(r) SCALER_GET(r, 18, 0)
+#define SCALER_H_RATIO_SET(v) SCALER_SET(v, 18, 0)
+
+/* SCALER_V_RATIO */
+#define SCALER_V_RATIO_GET(r) SCALER_GET(r, 18, 0)
+#define SCALER_V_RATIO_SET(v) SCALER_SET(v, 18, 0)
+
+/* SCALER_ROT_CFG */
+#define SCALER_ROT_CFG_FLIP_X_EN (1 << 3)
+#define SCALER_ROT_CFG_FLIP_Y_EN (1 << 2)
+#define SCALER_ROT_CFG_GET_ROTMODE(r) SCALER_GET(r, 1, 0)
+#define SCALER_ROT_CFG_SET_ROTMODE(v) SCALER_SET(v, 1, 0)
+#define SCALER_ROT_MODE_90 1
+#define SCALER_ROT_MODE_180 2
+#define SCALER_ROT_MODE_270 3
+
+/* SCALER_HCOEF, SCALER_VCOEF */
+#define SCALER_COEF_SHIFT(i) (16 * (1 - (i) % 2))
+#define SCALER_COEF_GET(r, i) \
+ (((r) >> SCALER_COEF_SHIFT(i)) & 0x1ff)
+#define SCALER_COEF_SET(v, i) \
+ (((v) & 0x1ff) << SCALER_COEF_SHIFT(i))
+
+/* SCALER_CSC_COEFxy */
+#define SCALER_CSC_COEF_GET(r) SCALER_GET(r, 11, 0)
+#define SCALER_CSC_COEF_SET(v) SCALER_SET(v, 11, 0)
+
+/* SCALER_DITH_CFG */
+#define SCALER_DITH_CFG_GET_R_TYPE(r) SCALER_GET(r, 8, 6)
+#define SCALER_DITH_CFG_SET_R_TYPE(v) SCALER_SET(v, 8, 6)
+#define SCALER_DITH_CFG_GET_G_TYPE(r) SCALER_GET(r, 5, 3)
+#define SCALER_DITH_CFG_SET_G_TYPE(v) SCALER_SET(v, 5, 3)
+#define SCALER_DITH_CFG_GET_B_TYPE(r) SCALER_GET(r, 2, 0)
+#define SCALER_DITH_CFG_SET_B_TYPE(v) SCALER_SET(v, 2, 0)
+
+/* SCALER_TIMEOUT_CTRL */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_VALUE(r) SCALER_GET(r, 31, 16)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_VALUE(v) SCALER_SET(v, 31, 16)
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_DIV(r) SCALER_GET(r, 7, 4)
+#define SCALER_TIMEOUT_CTRL_SET_TIMER_DIV(v) SCALER_SET(v, 7, 4)
+#define SCALER_TIMEOUT_CTRL_TIMER_ENABLE (1 << 0)
+
+/* SCALER_TIMEOUT_CNT */
+#define SCALER_TIMEOUT_CTRL_GET_TIMER_COUNT(r) SCALER_GET(r, 31, 16)
+
+/* SCALER_SRC_BLEND_COLOR */
+#define SCALER_SRC_BLEND_COLOR_SEL_INV (1 << 31)
+#define SCALER_SRC_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_COLOR_OP_SEL_INV (1 << 28)
+#define SCALER_SRC_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_SRC_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_SRC_BLEND_ALPHA */
+#define SCALER_SRC_BLEND_ALPHA_SEL_INV (1 << 31)
+#define SCALER_SRC_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_SRC_BLEND_ALPHA_OP_SEL_INV (1 << 28)
+#define SCALER_SRC_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_SRC_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0)
+#define SCALER_SRC_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_COLOR */
+#define SCALER_DST_BLEND_COLOR_SEL_INV (1 << 31)
+#define SCALER_DST_BLEND_COLOR_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_COLOR_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_COLOR_OP_SEL_INV (1 << 28)
+#define SCALER_DST_BLEND_COLOR_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_COLOR_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_DST_BLEND_COLOR_GET_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_COLOR_SET_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_DST_BLEND_ALPHA */
+#define SCALER_DST_BLEND_ALPHA_SEL_INV (1 << 31)
+#define SCALER_DST_BLEND_ALPHA_GET_SEL(r) SCALER_GET(r, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_SET_SEL(v) SCALER_SET(v, 30, 29)
+#define SCALER_DST_BLEND_ALPHA_OP_SEL_INV (1 << 28)
+#define SCALER_DST_BLEND_ALPHA_GET_OP_SEL(r) SCALER_GET(r, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_SET_OP_SEL(v) SCALER_SET(v, 27, 24)
+#define SCALER_DST_BLEND_ALPHA_GET_ALPHA(r) SCALER_GET(r, 7, 0)
+#define SCALER_DST_BLEND_ALPHA_SET_ALPHA(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_FILL_COLOR */
+#define SCALER_FILL_COLOR_GET_ALPHA(r) SCALER_GET(r, 31, 24)
+#define SCALER_FILL_COLOR_SET_ALPHA(v) SCALER_SET(v, 31, 24)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR0(r) SCALER_GET(r, 23, 16)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR0(v) SCALER_SET(v, 23, 16)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR1(r) SCALER_GET(r, 15, 8)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR1(v) SCALER_SET(v, 15, 8)
+#define SCALER_FILL_COLOR_GET_FILL_COLOR2(r) SCALER_GET(r, 7, 0)
+#define SCALER_FILL_COLOR_SET_FILL_COLOR2(v) SCALER_SET(v, 7, 0)
+
+/* SCALER_ADDR_Q_CONFIG */
+#define SCALER_ADDR_Q_CONFIG_RST (1 << 0)
+
+/* SCALER_SRC_ADDR_Q_STATUS */
+#define SCALER_SRC_ADDR_Q_STATUS_Y_FULL (1 << 23)
+#define SCALER_SRC_ADDR_Q_STATUS_Y_EMPTY (1 << 22)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_FULL (1 << 15)
+#define SCALER_SRC_ADDR_Q_STATUS_CB_EMPTY (1 << 14)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_FULL (1 << 7)
+#define SCALER_SRC_ADDR_Q_STATUS_CR_EMPTY (1 << 6)
+#define SCALER_SRC_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0)
+
+/* SCALER_DST_ADDR_Q_STATUS */
+#define SCALER_DST_ADDR_Q_STATUS_Y_FULL (1 << 23)
+#define SCALER_DST_ADDR_Q_STATUS_Y_EMPTY (1 << 22)
+#define SCALER_DST_ADDR_Q_STATUS_GET_Y_WR_IDX(r) SCALER_GET(r, 21, 16)
+#define SCALER_DST_ADDR_Q_STATUS_CB_FULL (1 << 15)
+#define SCALER_DST_ADDR_Q_STATUS_CB_EMPTY (1 << 14)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CB_WR_IDX(r) SCALER_GET(r, 13, 8)
+#define SCALER_DST_ADDR_Q_STATUS_CR_FULL (1 << 7)
+#define SCALER_DST_ADDR_Q_STATUS_CR_EMPTY (1 << 6)
+#define SCALER_DST_ADDR_Q_STATUS_GET_CR_WR_IDX(r) SCALER_GET(r, 5, 0)
+
+/* SCALER_CRC_COLOR00_10 */
+#define SCALER_CRC_COLOR00_10_GET_00(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR00_10_GET_10(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR20_30 */
+#define SCALER_CRC_COLOR20_30_GET_20(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR20_30_GET_30(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR01_11 */
+#define SCALER_CRC_COLOR01_11_GET_01(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR01_11_GET_11(r) SCALER_GET(r, 15, 0)
+
+/* SCALER_CRC_COLOR21_31 */
+#define SCALER_CRC_COLOR21_31_GET_21(r) SCALER_GET(r, 31, 16)
+#define SCALER_CRC_COLOR21_31_GET_31(r) SCALER_GET(r, 15, 0)
+
+#endif /* EXYNOS_REGS_SCALER_H */
diff --git a/drivers/gpu/drm/gma500/cdv_device.c b/drivers/gpu/drm/gma500/cdv_device.c
index 3a3bf75..34b8576 100644
--- a/drivers/gpu/drm/gma500/cdv_device.c
+++ b/drivers/gpu/drm/gma500/cdv_device.c
@@ -485,7 +485,7 @@ void cdv_intel_attach_force_audio_property(struct drm_connector *connector)
return;
for (i = 0; i < ARRAY_SIZE(force_audio_names); i++)
- drm_property_add_enum(prop, i, i-1, force_audio_names[i]);
+ drm_property_add_enum(prop, i-1, force_audio_names[i]);
dev_priv->force_audio_property = prop;
}
@@ -514,7 +514,7 @@ void cdv_intel_attach_broadcast_rgb_property(struct drm_connector *connector)
return;
for (i = 0; i < ARRAY_SIZE(broadcast_rgb_names); i++)
- drm_property_add_enum(prop, i, i, broadcast_rgb_names[i]);
+ drm_property_add_enum(prop, i, broadcast_rgb_names[i]);
dev_priv->broadcast_rgb_property = prop;
}
diff --git a/drivers/gpu/drm/gma500/psb_intel_sdvo.c b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
index 8dc2b19..f2ee6aa 100644
--- a/drivers/gpu/drm/gma500/psb_intel_sdvo.c
+++ b/drivers/gpu/drm/gma500/psb_intel_sdvo.c
@@ -2281,7 +2281,7 @@ static bool psb_intel_sdvo_tv_create_property(struct psb_intel_sdvo *psb_intel_s
for (i = 0; i < psb_intel_sdvo_connector->format_supported_num; i++)
drm_property_add_enum(
- psb_intel_sdvo_connector->tv_format, i,
+ psb_intel_sdvo_connector->tv_format,
i, tv_format_names[psb_intel_sdvo_connector->tv_format_supported[i]]);
psb_intel_sdvo->tv_format_index = psb_intel_sdvo_connector->tv_format_supported[0];
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index 80efee1..9de8b1c 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -26,7 +26,7 @@ config DRM_I915_DEBUG
select DRM_VGEM # used by igt/prime_vgem (dmabuf interop checks)
select DRM_DEBUG_MM if DRM=y
select STACKDEPOT if DRM=y # for DRM_DEBUG_MM
- select DRM_DEBUG_MM_SELFTEST
+ select DRM_DEBUG_SELFTEST
select SW_SYNC # signaling validation framework (igt/syncobj*)
select DRM_I915_SW_FENCE_DEBUG_OBJECTS
select DRM_I915_SELFTEST
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 2a96d08..9c449b8 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -2831,10 +2831,10 @@ static const struct drm_ioctl_desc i915_ioctls[] = {
DRM_IOCTL_DEF_DRV(I915_GEM_GET_APERTURE, i915_gem_get_aperture_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GET_PIPE_FROM_CRTC_ID, intel_get_pipe_from_crtc_id_ioctl, 0),
DRM_IOCTL_DEF_DRV(I915_GEM_MADVISE, i915_gem_madvise_ioctl, DRM_RENDER_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER|DRM_CONTROL_ALLOW),
- DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER|DRM_CONTROL_ALLOW),
+ DRM_IOCTL_DEF_DRV(I915_OVERLAY_PUT_IMAGE, intel_overlay_put_image_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_OVERLAY_ATTRS, intel_overlay_attrs_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_SET_SPRITE_COLORKEY, intel_sprite_set_colorkey_ioctl, DRM_MASTER),
+ DRM_IOCTL_DEF_DRV(I915_GET_SPRITE_COLORKEY, drm_noop, DRM_MASTER),
DRM_IOCTL_DEF_DRV(I915_GEM_WAIT, i915_gem_wait_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_CREATE, i915_gem_context_create_ioctl, DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(I915_GEM_CONTEXT_DESTROY, i915_gem_context_destroy_ioctl, DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7c6164d..15434ca 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -799,7 +799,6 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
port++;
}
- execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_user_end(execlists);
}
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 96e213e..2500502 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2779,9 +2779,8 @@ static bool intel_sdvo_tv_create_property(struct intel_sdvo *intel_sdvo,
return false;
for (i = 0; i < intel_sdvo_connector->format_supported_num; i++)
- drm_property_add_enum(
- intel_sdvo_connector->tv_format, i,
- i, tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
+ drm_property_add_enum(intel_sdvo_connector->tv_format, i,
+ tv_format_names[intel_sdvo_connector->tv_format_supported[i]]);
intel_sdvo_connector->base.base.state->tv.mode = intel_sdvo_connector->tv_format_supported[0];
drm_object_attach_property(&intel_sdvo_connector->base.base.base,
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index 1597938..ee23613f 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -936,22 +936,12 @@ intel_check_sprite_plane(struct intel_plane *plane,
struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
struct drm_framebuffer *fb = state->base.fb;
- int crtc_x, crtc_y;
- unsigned int crtc_w, crtc_h;
- uint32_t src_x, src_y, src_w, src_h;
- struct drm_rect *src = &state->base.src;
- struct drm_rect *dst = &state->base.dst;
- struct drm_rect clip = {};
int max_stride = INTEL_GEN(dev_priv) >= 9 ? 32768 : 16384;
- int hscale, vscale;
int max_scale, min_scale;
bool can_scale;
int ret;
uint32_t pixel_format = 0;
- *src = drm_plane_state_src(&state->base);
- *dst = drm_plane_state_dest(&state->base);
-
if (!fb) {
state->base.visible = false;
return 0;
@@ -990,64 +980,19 @@ intel_check_sprite_plane(struct intel_plane *plane,
min_scale = plane->can_scale ? 1 : (1 << 16);
}
- /*
- * FIXME the following code does a bunch of fuzzy adjustments to the
- * coordinates and sizes. We probably need some way to decide whether
- * more strict checking should be done instead.
- */
- drm_rect_rotate(src, fb->width << 16, fb->height << 16,
- state->base.rotation);
-
- hscale = drm_rect_calc_hscale_relaxed(src, dst, min_scale, max_scale);
- BUG_ON(hscale < 0);
-
- vscale = drm_rect_calc_vscale_relaxed(src, dst, min_scale, max_scale);
- BUG_ON(vscale < 0);
-
- if (crtc_state->base.enable)
- drm_mode_get_hv_timing(&crtc_state->base.mode,
- &clip.x2, &clip.y2);
-
- state->base.visible = drm_rect_clip_scaled(src, dst, &clip, hscale, vscale);
-
- crtc_x = dst->x1;
- crtc_y = dst->y1;
- crtc_w = drm_rect_width(dst);
- crtc_h = drm_rect_height(dst);
+ ret = drm_atomic_helper_check_plane_state(&state->base,
+ &crtc_state->base,
+ min_scale, max_scale,
+ true, true);
+ if (ret)
+ return ret;
if (state->base.visible) {
- /* check again in case clipping clamped the results */
- hscale = drm_rect_calc_hscale(src, dst, min_scale, max_scale);
- if (hscale < 0) {
- DRM_DEBUG_KMS("Horizontal scaling factor out of limits\n");
- drm_rect_debug_print("src: ", src, true);
- drm_rect_debug_print("dst: ", dst, false);
-
- return hscale;
- }
-
- vscale = drm_rect_calc_vscale(src, dst, min_scale, max_scale);
- if (vscale < 0) {
- DRM_DEBUG_KMS("Vertical scaling factor out of limits\n");
- drm_rect_debug_print("src: ", src, true);
- drm_rect_debug_print("dst: ", dst, false);
-
- return vscale;
- }
-
- /* Make the source viewport size an exact multiple of the scaling factors. */
- drm_rect_adjust_size(src,
- drm_rect_width(dst) * hscale - drm_rect_width(src),
- drm_rect_height(dst) * vscale - drm_rect_height(src));
-
- drm_rect_rotate_inv(src, fb->width << 16, fb->height << 16,
- state->base.rotation);
-
- /* sanity check to make sure the src viewport wasn't enlarged */
- WARN_ON(src->x1 < (int) state->base.src_x ||
- src->y1 < (int) state->base.src_y ||
- src->x2 > (int) state->base.src_x + state->base.src_w ||
- src->y2 > (int) state->base.src_y + state->base.src_h);
+ struct drm_rect *src = &state->base.src;
+ struct drm_rect *dst = &state->base.dst;
+ unsigned int crtc_w = drm_rect_width(dst);
+ unsigned int crtc_h = drm_rect_height(dst);
+ uint32_t src_x, src_y, src_w, src_h;
/*
* Hardware doesn't handle subpixel coordinates.
@@ -1060,59 +1005,40 @@ intel_check_sprite_plane(struct intel_plane *plane,
src_y = src->y1 >> 16;
src_h = drm_rect_height(src) >> 16;
+ src->x1 = src_x << 16;
+ src->x2 = (src_x + src_w) << 16;
+ src->y1 = src_y << 16;
+ src->y2 = (src_y + src_h) << 16;
+
if (intel_format_is_yuv(fb->format->format) &&
- fb->format->format != DRM_FORMAT_NV12) {
- src_x &= ~1;
- src_w &= ~1;
-
- /*
- * Must keep src and dst the
- * same if we can't scale.
- */
- if (!can_scale)
- crtc_w &= ~1;
-
- if (crtc_w == 0)
- state->base.visible = false;
+ fb->format->format != DRM_FORMAT_NV12 &&
+ (src_x % 2 || src_w % 2)) {
+ DRM_DEBUG_KMS("src x/w (%u, %u) must be a multiple of 2 for YUV planes\n",
+ src_x, src_w);
+ return -EINVAL;
}
- }
- /* Check size restrictions when scaling */
- if (state->base.visible && (src_w != crtc_w || src_h != crtc_h)) {
- unsigned int width_bytes;
- int cpp = fb->format->cpp[0];
+ /* Check size restrictions when scaling */
+ if (src_w != crtc_w || src_h != crtc_h) {
+ unsigned int width_bytes;
+ int cpp = fb->format->cpp[0];
- WARN_ON(!can_scale);
+ WARN_ON(!can_scale);
- /* FIXME interlacing min height is 6 */
+ width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
- if (crtc_w < 3 || crtc_h < 3)
- state->base.visible = false;
-
- if (src_w < 3 || src_h < 3)
- state->base.visible = false;
-
- width_bytes = ((src_x * cpp) & 63) + src_w * cpp;
-
- if (INTEL_GEN(dev_priv) < 9 && (src_w > 2048 || src_h > 2048 ||
- width_bytes > 4096 || fb->pitches[0] > 4096)) {
- DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
- return -EINVAL;
+ /* FIXME interlacing min height is 6 */
+ if (INTEL_GEN(dev_priv) < 9 && (
+ src_w < 3 || src_h < 3 ||
+ src_w > 2048 || src_h > 2048 ||
+ crtc_w < 3 || crtc_h < 3 ||
+ width_bytes > 4096 || fb->pitches[0] > 4096)) {
+ DRM_DEBUG_KMS("Source dimensions exceed hardware limits\n");
+ return -EINVAL;
+ }
}
}
- if (state->base.visible) {
- src->x1 = src_x << 16;
- src->x2 = (src_x + src_w) << 16;
- src->y1 = src_y << 16;
- src->y2 = (src_y + src_h) << 16;
- }
-
- dst->x1 = crtc_x;
- dst->x2 = crtc_x + crtc_w;
- dst->y1 = crtc_y;
- dst->y2 = crtc_y + crtc_h;
-
if (INTEL_GEN(dev_priv) >= 9) {
ret = skl_check_plane_surface(crtc_state, state);
if (ret)
diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig
index 294de45..119ec0a 100644
--- a/drivers/gpu/drm/mediatek/Kconfig
+++ b/drivers/gpu/drm/mediatek/Kconfig
@@ -11,6 +11,7 @@ config DRM_MEDIATEK
select DRM_PANEL
select MEMORY
select MTK_SMI
+ select VIDEOMODE_HELPERS
help
Choose this option if you have a Mediatek SoCs.
The module will be called mediatek-drm
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index e80a603..6c0ea39 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -22,6 +22,7 @@
#include <linux/interrupt.h>
#include <linux/types.h>
#include <linux/clk.h>
+#include <video/videomode.h>
#include "mtk_dpi_regs.h"
#include "mtk_drm_ddp_comp.h"
@@ -429,34 +430,35 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
struct mtk_dpi_sync_param vsync_leven = { 0 };
struct mtk_dpi_sync_param vsync_rodd = { 0 };
struct mtk_dpi_sync_param vsync_reven = { 0 };
- unsigned long pix_rate;
+ struct videomode vm = { 0 };
unsigned long pll_rate;
unsigned int factor;
/* let pll_rate can fix the valid range of tvdpll (1G~2GHz) */
- pix_rate = 1000UL * mode->clock;
+
if (mode->clock <= 27000)
- factor = 16 * 3;
+ factor = 3 << 4;
else if (mode->clock <= 84000)
- factor = 8 * 3;
+ factor = 3 << 3;
else if (mode->clock <= 167000)
- factor = 4 * 3;
+ factor = 3 << 2;
else
- factor = 2 * 3;
- pll_rate = pix_rate * factor;
+ factor = 3 << 1;
+ drm_display_mode_to_videomode(mode, &vm);
+ pll_rate = vm.pixelclock * factor;
dev_dbg(dpi->dev, "Want PLL %lu Hz, pixel clock %lu Hz\n",
- pll_rate, pix_rate);
+ pll_rate, vm.pixelclock);
clk_set_rate(dpi->tvd_clk, pll_rate);
pll_rate = clk_get_rate(dpi->tvd_clk);
- pix_rate = pll_rate / factor;
- clk_set_rate(dpi->pixel_clk, pix_rate);
- pix_rate = clk_get_rate(dpi->pixel_clk);
+ vm.pixelclock = pll_rate / factor;
+ clk_set_rate(dpi->pixel_clk, vm.pixelclock);
+ vm.pixelclock = clk_get_rate(dpi->pixel_clk);
dev_dbg(dpi->dev, "Got PLL %lu Hz, pixel clock %lu Hz\n",
- pll_rate, pix_rate);
+ pll_rate, vm.pixelclock);
limit.c_bottom = 0x0010;
limit.c_top = 0x0FE0;
@@ -465,33 +467,31 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
dpi_pol.ck_pol = MTK_DPI_POLARITY_FALLING;
dpi_pol.de_pol = MTK_DPI_POLARITY_RISING;
- dpi_pol.hsync_pol = mode->flags & DRM_MODE_FLAG_PHSYNC ?
+ dpi_pol.hsync_pol = vm.flags & DISPLAY_FLAGS_HSYNC_HIGH ?
MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
- dpi_pol.vsync_pol = mode->flags & DRM_MODE_FLAG_PVSYNC ?
+ dpi_pol.vsync_pol = vm.flags & DISPLAY_FLAGS_VSYNC_HIGH ?
MTK_DPI_POLARITY_FALLING : MTK_DPI_POLARITY_RISING;
-
- hsync.sync_width = mode->hsync_end - mode->hsync_start;
- hsync.back_porch = mode->htotal - mode->hsync_end;
- hsync.front_porch = mode->hsync_start - mode->hdisplay;
+ hsync.sync_width = vm.hsync_len;
+ hsync.back_porch = vm.hback_porch;
+ hsync.front_porch = vm.hfront_porch;
hsync.shift_half_line = false;
-
- vsync_lodd.sync_width = mode->vsync_end - mode->vsync_start;
- vsync_lodd.back_porch = mode->vtotal - mode->vsync_end;
- vsync_lodd.front_porch = mode->vsync_start - mode->vdisplay;
+ vsync_lodd.sync_width = vm.vsync_len;
+ vsync_lodd.back_porch = vm.vback_porch;
+ vsync_lodd.front_porch = vm.vfront_porch;
vsync_lodd.shift_half_line = false;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+ if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
mode->flags & DRM_MODE_FLAG_3D_MASK) {
vsync_leven = vsync_lodd;
vsync_rodd = vsync_lodd;
vsync_reven = vsync_lodd;
vsync_leven.shift_half_line = true;
vsync_reven.shift_half_line = true;
- } else if (mode->flags & DRM_MODE_FLAG_INTERLACE &&
+ } else if (vm.flags & DISPLAY_FLAGS_INTERLACED &&
!(mode->flags & DRM_MODE_FLAG_3D_MASK)) {
vsync_leven = vsync_lodd;
vsync_leven.shift_half_line = true;
- } else if (!(mode->flags & DRM_MODE_FLAG_INTERLACE) &&
+ } else if (!(vm.flags & DISPLAY_FLAGS_INTERLACED) &&
mode->flags & DRM_MODE_FLAG_3D_MASK) {
vsync_rodd = vsync_lodd;
}
@@ -505,12 +505,12 @@ static int mtk_dpi_set_display_mode(struct mtk_dpi *dpi,
mtk_dpi_config_vsync_reven(dpi, &vsync_reven);
mtk_dpi_config_3d(dpi, !!(mode->flags & DRM_MODE_FLAG_3D_MASK));
- mtk_dpi_config_interface(dpi, !!(mode->flags &
- DRM_MODE_FLAG_INTERLACE));
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay / 2);
+ mtk_dpi_config_interface(dpi, !!(vm.flags &
+ DISPLAY_FLAGS_INTERLACED));
+ if (vm.flags & DISPLAY_FLAGS_INTERLACED)
+ mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive >> 1);
else
- mtk_dpi_config_fb_size(dpi, mode->hdisplay, mode->vdisplay);
+ mtk_dpi_config_fb_size(dpi, vm.hactive, vm.vactive);
mtk_dpi_config_channel_limit(dpi, &limit);
mtk_dpi_config_bit_num(dpi, dpi->bit_num);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_gem.c b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
index f595ac8..259b7b0 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_gem.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_gem.c
@@ -220,7 +220,7 @@ struct drm_gem_object *mtk_gem_prime_import_sg_table(struct drm_device *dev,
mtk_gem = mtk_drm_gem_init(dev, attach->dmabuf->size);
if (IS_ERR(mtk_gem))
- return ERR_PTR(PTR_ERR(mtk_gem));
+ return ERR_CAST(mtk_gem);
expected = sg_dma_address(sg->sgl);
for_each_sg(sg->sgl, s, sg->nents, i) {
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 7e5e24c..aa0943e 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -551,13 +551,12 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
}
/**
- * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
* htotal_time = htotal * byte_per_pixel / num_lanes
* overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
* mipi_ratio = (htotal_time + overhead_time) / htotal_time
* data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
*/
- pixel_clock = dsi->vm.pixelclock * 1000;
+ pixel_clock = dsi->vm.pixelclock;
htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
dsi->vm.hsync_len;
htotal_bits = htotal * bit_per_pixel;
@@ -725,16 +724,7 @@ static void mtk_dsi_encoder_mode_set(struct drm_encoder *encoder,
{
struct mtk_dsi *dsi = encoder_to_dsi(encoder);
- dsi->vm.pixelclock = adjusted->clock;
- dsi->vm.hactive = adjusted->hdisplay;
- dsi->vm.hback_porch = adjusted->htotal - adjusted->hsync_end;
- dsi->vm.hfront_porch = adjusted->hsync_start - adjusted->hdisplay;
- dsi->vm.hsync_len = adjusted->hsync_end - adjusted->hsync_start;
-
- dsi->vm.vactive = adjusted->vdisplay;
- dsi->vm.vback_porch = adjusted->vtotal - adjusted->vsync_end;
- dsi->vm.vfront_porch = adjusted->vsync_start - adjusted->vdisplay;
- dsi->vm.vsync_len = adjusted->vsync_end - adjusted->vsync_start;
+ drm_display_mode_to_videomode(adjusted, &dsi->vm);
}
static void mtk_dsi_encoder_disable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 8e0cb16..0ae5ace 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -168,7 +168,6 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
if (gpu->funcs->debugfs_init) {
gpu->funcs->debugfs_init(gpu, dev->primary);
gpu->funcs->debugfs_init(gpu, dev->render);
- gpu->funcs->debugfs_init(gpu, dev->control);
}
#endif
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index ba74cb4..1ff3fda 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -140,9 +140,6 @@ int msm_debugfs_late_init(struct drm_device *dev)
if (ret)
return ret;
ret = late_init_minor(dev->render);
- if (ret)
- return ret;
- ret = late_init_minor(dev->control);
return ret;
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 0097134..7d0bec8d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -338,11 +338,9 @@ static struct nouveau_drm_prop_enum_list dither_depth[] = {
if (c) { \
p = drm_property_create(dev, DRM_MODE_PROP_ENUM, n, c); \
l = (list); \
- c = 0; \
while (p && l->gen_mask) { \
if (l->gen_mask & (1 << (gen))) { \
- drm_property_add_enum(p, c, l->type, l->name); \
- c++; \
+ drm_property_add_enum(p, l->type, l->name); \
} \
l++; \
} \
diff --git a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
index d964d45..2c9c972 100644
--- a/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
+++ b/drivers/gpu/drm/panel/panel-raspberrypi-touchscreen.c
@@ -238,12 +238,6 @@ static void rpi_touchscreen_i2c_write(struct rpi_touchscreen *ts,
static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
{
-#if 0
- /* The firmware uses LP DSI transactions like this to bring up
- * the hardware, which should be faster than using I2C to then
- * pass to the Toshiba. However, I was unable to get it to
- * work.
- */
u8 msg[] = {
reg,
reg >> 8,
@@ -253,13 +247,7 @@ static int rpi_touchscreen_write(struct rpi_touchscreen *ts, u16 reg, u32 val)
val >> 24,
};
- mipi_dsi_dcs_write_buffer(ts->dsi, msg, sizeof(msg));
-#else
- rpi_touchscreen_i2c_write(ts, REG_WR_ADDRH, reg >> 8);
- rpi_touchscreen_i2c_write(ts, REG_WR_ADDRL, reg);
- rpi_touchscreen_i2c_write(ts, REG_WRITEH, val >> 8);
- rpi_touchscreen_i2c_write(ts, REG_WRITEL, val);
-#endif
+ mipi_dsi_generic_write(ts->dsi, msg, sizeof(msg));
return 0;
}
diff --git a/drivers/gpu/drm/pl111/Makefile b/drivers/gpu/drm/pl111/Makefile
index 9c5e8db..19a8189 100644
--- a/drivers/gpu/drm/pl111/Makefile
+++ b/drivers/gpu/drm/pl111/Makefile
@@ -3,6 +3,7 @@ pl111_drm-y += pl111_display.o \
pl111_versatile.o \
pl111_drv.o
+pl111_drm-$(CONFIG_ARCH_VEXPRESS) += pl111_vexpress.o
pl111_drm-$(CONFIG_DEBUG_FS) += pl111_debugfs.o
obj-$(CONFIG_DRM_PL111) += pl111_drm.o
diff --git a/drivers/gpu/drm/pl111/pl111_drm.h b/drivers/gpu/drm/pl111/pl111_drm.h
index 8639b2d..ce4501d 100644
--- a/drivers/gpu/drm/pl111/pl111_drm.h
+++ b/drivers/gpu/drm/pl111/pl111_drm.h
@@ -79,6 +79,7 @@ struct pl111_drm_dev_private {
const struct pl111_variant_data *variant;
void (*variant_display_enable) (struct drm_device *drm, u32 format);
void (*variant_display_disable) (struct drm_device *drm);
+ bool use_device_memory;
};
int pl111_display_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index 4621259..454ff08 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -60,6 +60,7 @@
#include <linux/slab.h>
#include <linux/of.h>
#include <linux/of_graph.h>
+#include <linux/of_reserved_mem.h>
#include <drm/drmP.h>
#include <drm/drm_atomic_helper.h>
@@ -207,6 +208,24 @@ finish:
return ret;
}
+static struct drm_gem_object *
+pl111_gem_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt)
+{
+ struct pl111_drm_dev_private *priv = dev->dev_private;
+
+ /*
+ * When using device-specific reserved memory we can't import
+ * DMA buffers: those are passed by reference in any global
+ * memory and we can only handle a specific range of memory.
+ */
+ if (priv->use_device_memory)
+ return ERR_PTR(-EINVAL);
+
+ return drm_gem_cma_prime_import_sg_table(dev, attach, sgt);
+}
+
DEFINE_DRM_GEM_CMA_FOPS(drm_fops);
static struct drm_driver pl111_drm_driver = {
@@ -227,7 +246,7 @@ static struct drm_driver pl111_drm_driver = {
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import = drm_gem_prime_import,
- .gem_prime_import_sg_table = drm_gem_cma_prime_import_sg_table,
+ .gem_prime_import_sg_table = pl111_gem_import_sg_table,
.gem_prime_export = drm_gem_prime_export,
.gem_prime_get_sg_table = drm_gem_cma_prime_get_sg_table,
@@ -257,6 +276,12 @@ static int pl111_amba_probe(struct amba_device *amba_dev,
drm->dev_private = priv;
priv->variant = variant;
+ ret = of_reserved_mem_device_init(dev);
+ if (!ret) {
+ dev_info(dev, "using device-specific reserved memory\n");
+ priv->use_device_memory = true;
+ }
+
if (of_property_read_u32(dev->of_node, "max-memory-bandwidth",
&priv->memory_bw)) {
dev_info(dev, "no max memory bandwidth specified, assume unlimited\n");
@@ -275,7 +300,8 @@ static int pl111_amba_probe(struct amba_device *amba_dev,
priv->regs = devm_ioremap_resource(dev, &amba_dev->res);
if (IS_ERR(priv->regs)) {
dev_err(dev, "%s failed mmio\n", __func__);
- return PTR_ERR(priv->regs);
+ ret = PTR_ERR(priv->regs);
+ goto dev_unref;
}
/* This may override some variant settings */
@@ -305,11 +331,14 @@ static int pl111_amba_probe(struct amba_device *amba_dev,
dev_unref:
drm_dev_unref(drm);
+ of_reserved_mem_device_release(dev);
+
return ret;
}
static int pl111_amba_remove(struct amba_device *amba_dev)
{
+ struct device *dev = &amba_dev->dev;
struct drm_device *drm = amba_get_drvdata(amba_dev);
struct pl111_drm_dev_private *priv = drm->dev_private;
@@ -319,6 +348,7 @@ static int pl111_amba_remove(struct amba_device *amba_dev)
drm_panel_bridge_remove(priv->bridge);
drm_mode_config_cleanup(drm);
drm_dev_unref(drm);
+ of_reserved_mem_device_release(dev);
return 0;
}
diff --git a/drivers/gpu/drm/pl111/pl111_versatile.c b/drivers/gpu/drm/pl111/pl111_versatile.c
index 9302f51..b9baefd 100644
--- a/drivers/gpu/drm/pl111/pl111_versatile.c
+++ b/drivers/gpu/drm/pl111/pl111_versatile.c
@@ -1,12 +1,14 @@
#include <linux/amba/clcd-regs.h>
#include <linux/device.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
#include <linux/regmap.h>
#include <linux/mfd/syscon.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <drm/drmP.h>
#include "pl111_versatile.h"
+#include "pl111_vexpress.h"
#include "pl111_drm.h"
static struct regmap *versatile_syscon_map;
@@ -22,6 +24,7 @@ enum versatile_clcd {
REALVIEW_CLCD_PB11MP,
REALVIEW_CLCD_PBA8,
REALVIEW_CLCD_PBX,
+ VEXPRESS_CLCD_V2M,
};
static const struct of_device_id versatile_clcd_of_match[] = {
@@ -53,6 +56,10 @@ static const struct of_device_id versatile_clcd_of_match[] = {
.compatible = "arm,realview-pbx-syscon",
.data = (void *)REALVIEW_CLCD_PBX,
},
+ {
+ .compatible = "arm,vexpress-muxfpga",
+ .data = (void *)VEXPRESS_CLCD_V2M,
+ },
{},
};
@@ -286,12 +293,26 @@ static const struct pl111_variant_data pl111_realview = {
.fb_bpp = 16,
};
+/*
+ * Versatile Express PL111 variant, again we just push the maximum
+ * BPP to 16 to be able to get 1024x768 without saturating the memory
+ * bus. The clockdivider also seems broken on the Versatile Express.
+ */
+static const struct pl111_variant_data pl111_vexpress = {
+ .name = "PL111 Versatile Express",
+ .formats = pl111_realview_pixel_formats,
+ .nformats = ARRAY_SIZE(pl111_realview_pixel_formats),
+ .fb_bpp = 16,
+ .broken_clockdivider = true,
+};
+
int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
{
const struct of_device_id *clcd_id;
enum versatile_clcd versatile_clcd_type;
struct device_node *np;
struct regmap *map;
+ int ret;
np = of_find_matching_node_and_match(NULL, versatile_clcd_of_match,
&clcd_id);
@@ -301,7 +322,33 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
}
versatile_clcd_type = (enum versatile_clcd)clcd_id->data;
- map = syscon_node_to_regmap(np);
+ /* Versatile Express special handling */
+ if (versatile_clcd_type == VEXPRESS_CLCD_V2M) {
+ struct platform_device *pdev;
+
+ /* Registers a driver for the muxfpga */
+ ret = vexpress_muxfpga_init();
+ if (ret) {
+ dev_err(dev, "unable to initialize muxfpga driver\n");
+ return ret;
+ }
+
+ /* Call into deep Vexpress configuration API */
+ pdev = of_find_device_by_node(np);
+ if (!pdev) {
+ dev_err(dev, "can't find the sysreg device, deferring\n");
+ return -EPROBE_DEFER;
+ }
+ map = dev_get_drvdata(&pdev->dev);
+ if (!map) {
+ dev_err(dev, "sysreg has not yet probed\n");
+ platform_device_put(pdev);
+ return -EPROBE_DEFER;
+ }
+ } else {
+ map = syscon_node_to_regmap(np);
+ }
+
if (IS_ERR(map)) {
dev_err(dev, "no Versatile syscon regmap\n");
return PTR_ERR(map);
@@ -340,6 +387,13 @@ int pl111_versatile_init(struct device *dev, struct pl111_drm_dev_private *priv)
priv->variant_display_disable = pl111_realview_clcd_disable;
dev_info(dev, "set up callbacks for RealView PL111\n");
break;
+ case VEXPRESS_CLCD_V2M:
+ priv->variant = &pl111_vexpress;
+ dev_info(dev, "initializing Versatile Express PL111\n");
+ ret = pl111_vexpress_clcd_init(dev, priv, map);
+ if (ret)
+ return ret;
+ break;
default:
dev_info(dev, "unknown Versatile system controller\n");
break;
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.c b/drivers/gpu/drm/pl111/pl111_vexpress.c
new file mode 100644
index 0000000..a534b22
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_vexpress.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Versatile Express PL111 handling
+ * Copyright (C) 2018 Linus Walleij
+ *
+ * This module binds to the "arm,vexpress-muxfpga" device on the
+ * Versatile Express configuration bus and sets up which CLCD instance
+ * gets muxed out on the DVI bridge.
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/vexpress.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include "pl111_drm.h"
+#include "pl111_vexpress.h"
+
+#define VEXPRESS_FPGAMUX_MOTHERBOARD 0x00
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_1 0x01
+#define VEXPRESS_FPGAMUX_DAUGHTERBOARD_2 0x02
+
+int pl111_vexpress_clcd_init(struct device *dev,
+ struct pl111_drm_dev_private *priv,
+ struct regmap *map)
+{
+ struct device_node *root;
+ struct device_node *child;
+ struct device_node *ct_clcd = NULL;
+ bool has_coretile_clcd = false;
+ bool has_coretile_hdlcd = false;
+ bool mux_motherboard = true;
+ u32 val;
+ int ret;
+
+ /*
+ * Check if we have a CLCD or HDLCD on the core tile by checking if a
+ * CLCD or HDLCD is available in the root of the device tree.
+ */
+ root = of_find_node_by_path("/");
+ if (!root)
+ return -EINVAL;
+
+ for_each_available_child_of_node(root, child) {
+ if (of_device_is_compatible(child, "arm,pl111")) {
+ has_coretile_clcd = true;
+ ct_clcd = child;
+ break;
+ }
+ if (of_device_is_compatible(child, "arm,hdlcd")) {
+ has_coretile_hdlcd = true;
+ break;
+ }
+ }
+
+ /*
+ * If there is a coretile HDLCD and it has a driver,
+ * do not mux the CLCD on the motherboard to the DVI.
+ */
+ if (has_coretile_hdlcd && IS_ENABLED(CONFIG_DRM_HDLCD))
+ mux_motherboard = false;
+
+ /*
+ * On the Vexpress CA9 we let the CLCD on the coretile
+ * take precedence, so also in this case do not mux the
+ * motherboard to the DVI.
+ */
+ if (has_coretile_clcd)
+ mux_motherboard = false;
+
+ if (mux_motherboard) {
+ dev_info(dev, "DVI muxed to motherboard CLCD\n");
+ val = VEXPRESS_FPGAMUX_MOTHERBOARD;
+ } else if (ct_clcd == dev->of_node) {
+ dev_info(dev,
+ "DVI muxed to daughterboard 1 (core tile) CLCD\n");
+ val = VEXPRESS_FPGAMUX_DAUGHTERBOARD_1;
+ } else {
+ dev_info(dev, "core tile graphics present\n");
+ dev_info(dev, "this device will be deactivated\n");
+ return -ENODEV;
+ }
+
+ ret = regmap_write(map, 0, val);
+ if (ret) {
+ dev_err(dev, "error setting DVI muxmode\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+/*
+ * This sets up the regmap pointer that will then be retrieved by
+ * the detection code in pl111_versatile.c and passed in to the
+ * pl111_vexpress_clcd_init() function above.
+ */
+static int vexpress_muxfpga_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct regmap *map;
+
+ map = devm_regmap_init_vexpress_config(&pdev->dev);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ dev_set_drvdata(dev, map);
+
+ return 0;
+}
+
+static const struct of_device_id vexpress_muxfpga_match[] = {
+ { .compatible = "arm,vexpress-muxfpga", }
+};
+
+static struct platform_driver vexpress_muxfpga_driver = {
+ .driver = {
+ .name = "vexpress-muxfpga",
+ .of_match_table = of_match_ptr(vexpress_muxfpga_match),
+ },
+ .probe = vexpress_muxfpga_probe,
+};
+
+int vexpress_muxfpga_init(void)
+{
+ int ret;
+
+ ret = platform_driver_register(&vexpress_muxfpga_driver);
+ /* -EBUSY just means this driver is already registered */
+ if (ret == -EBUSY)
+ ret = 0;
+ return ret;
+}
diff --git a/drivers/gpu/drm/pl111/pl111_vexpress.h b/drivers/gpu/drm/pl111/pl111_vexpress.h
new file mode 100644
index 0000000..5d3681b
--- /dev/null
+++ b/drivers/gpu/drm/pl111/pl111_vexpress.h
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+struct device;
+struct pl111_drm_dev_private;
+struct regmap;
+
+#ifdef CONFIG_ARCH_VEXPRESS
+
+int pl111_vexpress_clcd_init(struct device *dev,
+ struct pl111_drm_dev_private *priv,
+ struct regmap *map);
+
+int vexpress_muxfpga_init(void);
+
+#else
+
+static inline int pl111_vexpress_clcd_init(struct device *dev,
+ struct pl111_drm_dev_private *priv,
+ struct regmap *map)
+{
+ return -ENODEV;
+}
+
+static inline int vexpress_muxfpga_init(void)
+{
+ return 0;
+}
+
+#endif
diff --git a/drivers/gpu/drm/qxl/qxl_cmd.c b/drivers/gpu/drm/qxl/qxl_cmd.c
index 01665b9..208af9f 100644
--- a/drivers/gpu/drm/qxl/qxl_cmd.c
+++ b/drivers/gpu/drm/qxl/qxl_cmd.c
@@ -339,12 +339,9 @@ int qxl_io_update_area(struct qxl_device *qdev, struct qxl_bo *surf,
surface_height = surf->surf.height;
if (area->left < 0 || area->top < 0 ||
- area->right > surface_width || area->bottom > surface_height) {
- qxl_io_log(qdev, "%s: not doing area update for "
- "%d, (%d,%d,%d,%d) (%d,%d)\n", __func__, surface_id, area->left,
- area->top, area->right, area->bottom, surface_width, surface_height);
+ area->right > surface_width || area->bottom > surface_height)
return -EINVAL;
- }
+
mutex_lock(&qdev->update_area_mutex);
qdev->ram_header->update_area = *area;
qdev->ram_header->update_surface = surface_id;
@@ -372,6 +369,7 @@ void qxl_io_flush_surfaces(struct qxl_device *qdev)
void qxl_io_destroy_primary(struct qxl_device *qdev)
{
wait_for_io_cmd(qdev, 0, QXL_IO_DESTROY_PRIMARY_ASYNC);
+ qdev->primary_created = false;
}
void qxl_io_create_primary(struct qxl_device *qdev,
@@ -397,6 +395,7 @@ void qxl_io_create_primary(struct qxl_device *qdev,
create->type = QXL_SURF_TYPE_PRIMARY;
wait_for_io_cmd(qdev, 0, QXL_IO_CREATE_PRIMARY_ASYNC);
+ qdev->primary_created = true;
}
void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id)
@@ -405,20 +404,6 @@ void qxl_io_memslot_add(struct qxl_device *qdev, uint8_t id)
wait_for_io_cmd(qdev, id, QXL_IO_MEMSLOT_ADD_ASYNC);
}
-void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...)
-{
- va_list args;
-
- va_start(args, fmt);
- vsnprintf(qdev->ram_header->log_buf, QXL_LOG_BUF_SIZE, fmt, args);
- va_end(args);
- /*
- * DO not do a DRM output here - this will call printk, which will
- * call back into qxl for rendering (qxl_fb)
- */
- outb(0, qdev->io_base + QXL_IO_LOG);
-}
-
void qxl_io_reset(struct qxl_device *qdev)
{
outb(0, qdev->io_base + QXL_IO_RESET);
@@ -426,19 +411,6 @@ void qxl_io_reset(struct qxl_device *qdev)
void qxl_io_monitors_config(struct qxl_device *qdev)
{
- qxl_io_log(qdev, "%s: %d [%dx%d+%d+%d]\n", __func__,
- qdev->monitors_config ?
- qdev->monitors_config->count : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].width : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].height : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].x : -1,
- qdev->monitors_config && qdev->monitors_config->count ?
- qdev->monitors_config->heads[0].y : -1
- );
-
wait_for_io_cmd(qdev, 0, QXL_IO_MONITORS_CONFIG_ASYNC);
}
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 820cbca..b8cda94 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -48,12 +48,8 @@ static void qxl_alloc_client_monitors_config(struct qxl_device *qdev, unsigned c
qdev->client_monitors_config = kzalloc(
sizeof(struct qxl_monitors_config) +
sizeof(struct qxl_head) * count, GFP_KERNEL);
- if (!qdev->client_monitors_config) {
- qxl_io_log(qdev,
- "%s: allocation failure for %u heads\n",
- __func__, count);
+ if (!qdev->client_monitors_config)
return;
- }
}
qdev->client_monitors_config->count = count;
}
@@ -74,12 +70,8 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev)
num_monitors = qdev->rom->client_monitors_config.count;
crc = crc32(0, (const uint8_t *)&qdev->rom->client_monitors_config,
sizeof(qdev->rom->client_monitors_config));
- if (crc != qdev->rom->client_monitors_config_crc) {
- qxl_io_log(qdev, "crc mismatch: have %X (%zd) != %X\n", crc,
- sizeof(qdev->rom->client_monitors_config),
- qdev->rom->client_monitors_config_crc);
+ if (crc != qdev->rom->client_monitors_config_crc)
return MONITORS_CONFIG_BAD_CRC;
- }
if (!num_monitors) {
DRM_DEBUG_KMS("no client monitors configured\n");
return status;
@@ -170,12 +162,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev)
udelay(5);
}
if (status == MONITORS_CONFIG_BAD_CRC) {
- qxl_io_log(qdev, "config: bad crc\n");
DRM_DEBUG_KMS("ignoring client monitors config: bad crc");
return;
}
if (status == MONITORS_CONFIG_UNCHANGED) {
- qxl_io_log(qdev, "config: unchanged\n");
DRM_DEBUG_KMS("ignoring client monitors config: unchanged");
return;
}
@@ -268,6 +258,89 @@ static int qxl_add_common_modes(struct drm_connector *connector,
return i - 1;
}
+static void qxl_send_monitors_config(struct qxl_device *qdev)
+{
+ int i;
+
+ BUG_ON(!qdev->ram_header->monitors_config);
+
+ if (qdev->monitors_config->count == 0)
+ return;
+
+ for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
+ struct qxl_head *head = &qdev->monitors_config->heads[i];
+
+ if (head->y > 8192 || head->x > 8192 ||
+ head->width > 8192 || head->height > 8192) {
+ DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
+ i, head->width, head->height,
+ head->x, head->y);
+ return;
+ }
+ }
+ qxl_io_monitors_config(qdev);
+}
+
+static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc,
+ const char *reason)
+{
+ struct drm_device *dev = crtc->dev;
+ struct qxl_device *qdev = dev->dev_private;
+ struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
+ struct qxl_head head;
+ int oldcount, i = qcrtc->index;
+
+ if (!qdev->primary_created) {
+ DRM_DEBUG_KMS("no primary surface, skip (%s)\n", reason);
+ return;
+ }
+
+ if (!qdev->monitors_config ||
+ qdev->monitors_config->max_allowed <= i)
+ return;
+
+ head.id = i;
+ head.flags = 0;
+ oldcount = qdev->monitors_config->count;
+ if (crtc->state->active) {
+ struct drm_display_mode *mode = &crtc->mode;
+ head.width = mode->hdisplay;
+ head.height = mode->vdisplay;
+ head.x = crtc->x;
+ head.y = crtc->y;
+ if (qdev->monitors_config->count < i + 1)
+ qdev->monitors_config->count = i + 1;
+ } else if (i > 0) {
+ head.width = 0;
+ head.height = 0;
+ head.x = 0;
+ head.y = 0;
+ if (qdev->monitors_config->count == i + 1)
+ qdev->monitors_config->count = i;
+ } else {
+ DRM_DEBUG_KMS("inactive head 0, skip (%s)\n", reason);
+ return;
+ }
+
+ if (head.width == qdev->monitors_config->heads[i].width &&
+ head.height == qdev->monitors_config->heads[i].height &&
+ head.x == qdev->monitors_config->heads[i].x &&
+ head.y == qdev->monitors_config->heads[i].y &&
+ oldcount == qdev->monitors_config->count)
+ return;
+
+ DRM_DEBUG_KMS("head %d, %dx%d, at +%d+%d, %s (%s)\n",
+ i, head.width, head.height, head.x, head.y,
+ crtc->state->active ? "on" : "off", reason);
+ if (oldcount != qdev->monitors_config->count)
+ DRM_DEBUG_KMS("active heads %d -> %d (%d total)\n",
+ oldcount, qdev->monitors_config->count,
+ qdev->monitors_config->max_allowed);
+
+ qdev->monitors_config->heads[i] = head;
+ qxl_send_monitors_config(qdev);
+}
+
static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
struct drm_crtc_state *old_crtc_state)
{
@@ -283,6 +356,8 @@ static void qxl_crtc_atomic_flush(struct drm_crtc *crtc,
drm_crtc_send_vblank_event(crtc, event);
spin_unlock_irqrestore(&dev->event_lock, flags);
}
+
+ qxl_crtc_update_monitors_config(crtc, "flush");
}
static void qxl_crtc_destroy(struct drm_crtc *crtc)
@@ -381,95 +456,19 @@ qxl_framebuffer_init(struct drm_device *dev,
return 0;
}
-static bool qxl_crtc_mode_fixup(struct drm_crtc *crtc,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- struct drm_device *dev = crtc->dev;
- struct qxl_device *qdev = dev->dev_private;
-
- qxl_io_log(qdev, "%s: (%d,%d) => (%d,%d)\n",
- __func__,
- mode->hdisplay, mode->vdisplay,
- adjusted_mode->hdisplay,
- adjusted_mode->vdisplay);
- return true;
-}
-
-static void
-qxl_send_monitors_config(struct qxl_device *qdev)
-{
- int i;
-
- BUG_ON(!qdev->ram_header->monitors_config);
-
- if (qdev->monitors_config->count == 0) {
- qxl_io_log(qdev, "%s: 0 monitors??\n", __func__);
- return;
- }
- for (i = 0 ; i < qdev->monitors_config->count ; ++i) {
- struct qxl_head *head = &qdev->monitors_config->heads[i];
-
- if (head->y > 8192 || head->x > 8192 ||
- head->width > 8192 || head->height > 8192) {
- DRM_ERROR("head %d wrong: %dx%d+%d+%d\n",
- i, head->width, head->height,
- head->x, head->y);
- return;
- }
- }
- qxl_io_monitors_config(qdev);
-}
-
-static void qxl_monitors_config_set(struct qxl_device *qdev,
- int index,
- unsigned x, unsigned y,
- unsigned width, unsigned height,
- unsigned surf_id)
-{
- DRM_DEBUG_KMS("%d:%dx%d+%d+%d\n", index, width, height, x, y);
- qdev->monitors_config->heads[index].x = x;
- qdev->monitors_config->heads[index].y = y;
- qdev->monitors_config->heads[index].width = width;
- qdev->monitors_config->heads[index].height = height;
- qdev->monitors_config->heads[index].surface_id = surf_id;
-
-}
-
-static void qxl_mode_set_nofb(struct drm_crtc *crtc)
-{
- struct qxl_device *qdev = crtc->dev->dev_private;
- struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- struct drm_display_mode *mode = &crtc->mode;
-
- DRM_DEBUG("Mode set (%d,%d)\n",
- mode->hdisplay, mode->vdisplay);
-
- qxl_monitors_config_set(qdev, qcrtc->index, 0, 0,
- mode->hdisplay, mode->vdisplay, 0);
-
-}
-
static void qxl_crtc_atomic_enable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
- DRM_DEBUG("\n");
+ qxl_crtc_update_monitors_config(crtc, "enable");
}
static void qxl_crtc_atomic_disable(struct drm_crtc *crtc,
struct drm_crtc_state *old_state)
{
- struct qxl_crtc *qcrtc = to_qxl_crtc(crtc);
- struct qxl_device *qdev = crtc->dev->dev_private;
-
- qxl_monitors_config_set(qdev, qcrtc->index, 0, 0, 0, 0, 0);
-
- qxl_send_monitors_config(qdev);
+ qxl_crtc_update_monitors_config(crtc, "disable");
}
static const struct drm_crtc_helper_funcs qxl_crtc_helper_funcs = {
- .mode_fixup = qxl_crtc_mode_fixup,
- .mode_set_nofb = qxl_mode_set_nofb,
.atomic_flush = qxl_crtc_atomic_flush,
.atomic_enable = qxl_crtc_atomic_enable,
.atomic_disable = qxl_crtc_atomic_disable,
@@ -613,12 +612,6 @@ static void qxl_primary_atomic_disable(struct drm_plane *plane,
}
}
-static int qxl_plane_atomic_check(struct drm_plane *plane,
- struct drm_plane_state *state)
-{
- return 0;
-}
-
static void qxl_cursor_atomic_update(struct drm_plane *plane,
struct drm_plane_state *old_state)
{
@@ -824,7 +817,6 @@ static const uint32_t qxl_cursor_plane_formats[] = {
};
static const struct drm_plane_helper_funcs qxl_cursor_helper_funcs = {
- .atomic_check = qxl_plane_atomic_check,
.atomic_update = qxl_cursor_atomic_update,
.atomic_disable = qxl_cursor_atomic_disable,
.prepare_fb = qxl_plane_prepare_fb,
@@ -949,81 +941,6 @@ free_mem:
return r;
}
-static void qxl_enc_dpms(struct drm_encoder *encoder, int mode)
-{
- DRM_DEBUG("\n");
-}
-
-static void qxl_enc_prepare(struct drm_encoder *encoder)
-{
- DRM_DEBUG("\n");
-}
-
-static void qxl_write_monitors_config_for_encoder(struct qxl_device *qdev,
- struct drm_encoder *encoder)
-{
- int i;
- struct qxl_output *output = drm_encoder_to_qxl_output(encoder);
- struct qxl_head *head;
- struct drm_display_mode *mode;
-
- BUG_ON(!encoder);
- /* TODO: ugly, do better */
- i = output->index;
- if (!qdev->monitors_config ||
- qdev->monitors_config->max_allowed <= i) {
- DRM_ERROR(
- "head number too large or missing monitors config: %p, %d",
- qdev->monitors_config,
- qdev->monitors_config ?
- qdev->monitors_config->max_allowed : -1);
- return;
- }
- if (!encoder->crtc) {
- DRM_ERROR("missing crtc on encoder %p\n", encoder);
- return;
- }
- if (i != 0)
- DRM_DEBUG("missing for multiple monitors: no head holes\n");
- head = &qdev->monitors_config->heads[i];
- head->id = i;
- if (encoder->crtc->enabled) {
- mode = &encoder->crtc->mode;
- head->width = mode->hdisplay;
- head->height = mode->vdisplay;
- head->x = encoder->crtc->x;
- head->y = encoder->crtc->y;
- if (qdev->monitors_config->count < i + 1)
- qdev->monitors_config->count = i + 1;
- } else {
- head->width = 0;
- head->height = 0;
- head->x = 0;
- head->y = 0;
- }
- DRM_DEBUG_KMS("setting head %d to +%d+%d %dx%d out of %d\n",
- i, head->x, head->y, head->width, head->height, qdev->monitors_config->count);
- head->flags = 0;
- /* TODO - somewhere else to call this for multiple monitors
- * (config_commit?) */
- qxl_send_monitors_config(qdev);
-}
-
-static void qxl_enc_commit(struct drm_encoder *encoder)
-{
- struct qxl_device *qdev = encoder->dev->dev_private;
-
- qxl_write_monitors_config_for_encoder(qdev, encoder);
- DRM_DEBUG("\n");
-}
-
-static void qxl_enc_mode_set(struct drm_encoder *encoder,
- struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- DRM_DEBUG("\n");
-}
-
static int qxl_conn_get_modes(struct drm_connector *connector)
{
unsigned pwidth = 1024;
@@ -1069,10 +986,6 @@ static struct drm_encoder *qxl_best_encoder(struct drm_connector *connector)
static const struct drm_encoder_helper_funcs qxl_enc_helper_funcs = {
- .dpms = qxl_enc_dpms,
- .prepare = qxl_enc_prepare,
- .mode_set = qxl_enc_mode_set,
- .commit = qxl_enc_commit,
};
static const struct drm_connector_helper_funcs qxl_connector_helper_funcs = {
@@ -1100,21 +1013,11 @@ static enum drm_connector_status qxl_conn_detect(
qxl_head_enabled(&qdev->client_monitors_config->heads[output->index]);
DRM_DEBUG("#%d connected: %d\n", output->index, connected);
- if (!connected)
- qxl_monitors_config_set(qdev, output->index, 0, 0, 0, 0, 0);
return connected ? connector_status_connected
: connector_status_disconnected;
}
-static int qxl_conn_set_property(struct drm_connector *connector,
- struct drm_property *property,
- uint64_t value)
-{
- DRM_DEBUG("\n");
- return 0;
-}
-
static void qxl_conn_destroy(struct drm_connector *connector)
{
struct qxl_output *qxl_output =
@@ -1129,7 +1032,6 @@ static const struct drm_connector_funcs qxl_connector_funcs = {
.dpms = drm_helper_connector_dpms,
.detect = qxl_conn_detect,
.fill_modes = drm_helper_probe_single_connector_modes,
- .set_property = qxl_conn_set_property,
.destroy = qxl_conn_destroy,
.reset = drm_atomic_helper_connector_reset,
.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 864b456..01220d3 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -299,9 +299,6 @@ struct qxl_device {
int monitors_config_height;
};
-/* forward declaration for QXL_INFO_IO */
-__printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...);
-
extern const struct drm_ioctl_desc qxl_ioctls[];
extern int qxl_max_ioctl;
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index 3388914..9a67526 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -185,8 +185,6 @@ static int qxlfb_framebuffer_dirty(struct drm_framebuffer *fb,
/*
* we are using a shadow draw buffer, at qdev->surface0_shadow
*/
- qxl_io_log(qdev, "dirty x[%d, %d], y[%d, %d]\n", clips->x1, clips->x2,
- clips->y1, clips->y2);
image->dx = clips->x1;
image->dy = clips->y1;
image->width = clips->x2 - clips->x1;
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 23a4010..3bb31ad 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -57,10 +57,9 @@ irqreturn_t qxl_irq_handler(int irq, void *arg)
* to avoid endless loops).
*/
qdev->irq_received_error++;
- qxl_io_log(qdev, "%s: driver is in bug mode.\n", __func__);
+ DRM_WARN("driver is in bug mode\n");
}
if (pending & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG) {
- qxl_io_log(qdev, "QXL_INTERRUPT_CLIENT_MONITORS_CONFIG\n");
schedule_work(&qdev->client_monitors_config_work);
}
qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index ee2340e3..86a1fb3 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -105,16 +105,16 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev)
static struct vm_operations_struct qxl_ttm_vm_ops;
static const struct vm_operations_struct *ttm_vm_ops;
-static int qxl_ttm_fault(struct vm_fault *vmf)
+static vm_fault_t qxl_ttm_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *bo;
- int r;
+ vm_fault_t ret;
bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
if (bo == NULL)
return VM_FAULT_NOPAGE;
- r = ttm_vm_ops->fault(vmf);
- return r;
+ ret = ttm_vm_ops->fault(vmf);
+ return ret;
}
int qxl_mmap(struct file *filp, struct vm_area_struct *vma)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
index c442053..f2a0bd1 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c
@@ -767,7 +767,8 @@ static irqreturn_t rcar_du_crtc_irq(int irq, void *arg)
* Initialization
*/
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+ unsigned int hwindex)
{
static const unsigned int mmio_offsets[] = {
DU0_REG_OFFSET, DU1_REG_OFFSET, DU2_REG_OFFSET, DU3_REG_OFFSET
@@ -775,7 +776,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
struct rcar_du_device *rcdu = rgrp->dev;
struct platform_device *pdev = to_platform_device(rcdu->dev);
- struct rcar_du_crtc *rcrtc = &rcdu->crtcs[index];
+ struct rcar_du_crtc *rcrtc = &rcdu->crtcs[swindex];
struct drm_crtc *crtc = &rcrtc->crtc;
struct drm_plane *primary;
unsigned int irqflags;
@@ -787,7 +788,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
/* Get the CRTC clock and the optional external clock. */
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
- sprintf(clk_name, "du.%u", index);
+ sprintf(clk_name, "du.%u", hwindex);
name = clk_name;
} else {
name = NULL;
@@ -795,16 +796,16 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
rcrtc->clock = devm_clk_get(rcdu->dev, name);
if (IS_ERR(rcrtc->clock)) {
- dev_err(rcdu->dev, "no clock for CRTC %u\n", index);
+ dev_err(rcdu->dev, "no clock for DU channel %u\n", hwindex);
return PTR_ERR(rcrtc->clock);
}
- sprintf(clk_name, "dclkin.%u", index);
+ sprintf(clk_name, "dclkin.%u", hwindex);
clk = devm_clk_get(rcdu->dev, clk_name);
if (!IS_ERR(clk)) {
rcrtc->extclock = clk;
} else if (PTR_ERR(rcrtc->clock) == -EPROBE_DEFER) {
- dev_info(rcdu->dev, "can't get external clock %u\n", index);
+ dev_info(rcdu->dev, "can't get external clock %u\n", hwindex);
return -EPROBE_DEFER;
}
@@ -813,13 +814,13 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
spin_lock_init(&rcrtc->vblank_lock);
rcrtc->group = rgrp;
- rcrtc->mmio_offset = mmio_offsets[index];
- rcrtc->index = index;
+ rcrtc->mmio_offset = mmio_offsets[hwindex];
+ rcrtc->index = hwindex;
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_VSP1_SOURCE))
primary = &rcrtc->vsp->planes[rcrtc->vsp_pipe].plane;
else
- primary = &rgrp->planes[index % 2].plane;
+ primary = &rgrp->planes[swindex % 2].plane;
ret = drm_crtc_init_with_planes(rcdu->ddev, crtc, primary,
NULL, &crtc_funcs, NULL);
@@ -833,7 +834,8 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
/* Register the interrupt handler. */
if (rcar_du_has(rcdu, RCAR_DU_FEATURE_CRTC_IRQ_CLOCK)) {
- irq = platform_get_irq(pdev, index);
+ /* The IRQ's are associated with the CRTC (sw)index. */
+ irq = platform_get_irq(pdev, swindex);
irqflags = 0;
} else {
irq = platform_get_irq(pdev, 0);
@@ -841,7 +843,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
}
if (irq < 0) {
- dev_err(rcdu->dev, "no IRQ for CRTC %u\n", index);
+ dev_err(rcdu->dev, "no IRQ for CRTC %u\n", swindex);
return irq;
}
@@ -849,7 +851,7 @@ int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index)
dev_name(rcdu->dev), rcrtc);
if (ret < 0) {
dev_err(rcdu->dev,
- "failed to register IRQ for CRTC %u\n", index);
+ "failed to register IRQ for CRTC %u\n", swindex);
return ret;
}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index fdc2bf9..84b5e23 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -80,7 +80,8 @@ enum rcar_du_output {
RCAR_DU_OUTPUT_MAX,
};
-int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
+int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int swindex,
+ unsigned int hwindex);
void rcar_du_crtc_suspend(struct rcar_du_crtc *rcrtc);
void rcar_du_crtc_resume(struct rcar_du_crtc *rcrtc);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index 3917d83..02aee6c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -40,7 +40,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7743_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7743 has one RGB output and one LVDS output
@@ -61,7 +61,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7745 has two RGB outputs
@@ -80,7 +80,7 @@ static const struct rcar_du_device_info rzg1_du_r8a7745_info = {
static const struct rcar_du_device_info rcar_du_r8a7779_info = {
.gen = 2,
.features = 0,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7779 has two RGB outputs and one (currently unsupported)
@@ -102,7 +102,7 @@ static const struct rcar_du_device_info rcar_du_r8a7790_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
.quirks = RCAR_DU_QUIRK_ALIGN_128B,
- .num_crtcs = 3,
+ .channels_mask = BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7790 has one RGB output, two LVDS outputs and one
@@ -129,7 +129,7 @@ static const struct rcar_du_device_info rcar_du_r8a7791_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A779[13] has one RGB output, one LVDS output and one
@@ -151,7 +151,7 @@ static const struct rcar_du_device_info rcar_du_r8a7792_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/* R8A7792 has two RGB outputs. */
[RCAR_DU_OUTPUT_DPAD0] = {
@@ -169,7 +169,7 @@ static const struct rcar_du_device_info rcar_du_r8a7794_info = {
.gen = 2,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS,
- .num_crtcs = 2,
+ .channels_mask = BIT(1) | BIT(0),
.routes = {
/*
* R8A7794 has two RGB outputs and one (currently unsupported)
@@ -191,7 +191,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 4,
+ .channels_mask = BIT(3) | BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7795 has one RGB output, two HDMI outputs and one
@@ -215,7 +215,7 @@ static const struct rcar_du_device_info rcar_du_r8a7795_info = {
},
},
.num_lvds = 1,
- .dpll_ch = BIT(1) | BIT(2),
+ .dpll_ch = BIT(2) | BIT(1),
};
static const struct rcar_du_device_info rcar_du_r8a7796_info = {
@@ -223,7 +223,7 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = {
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 3,
+ .channels_mask = BIT(2) | BIT(1) | BIT(0),
.routes = {
/*
* R8A7796 has one RGB output, one LVDS output and one HDMI
@@ -246,12 +246,40 @@ static const struct rcar_du_device_info rcar_du_r8a7796_info = {
.dpll_ch = BIT(1),
};
+static const struct rcar_du_device_info rcar_du_r8a77965_info = {
+ .gen = 3,
+ .features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
+ | RCAR_DU_FEATURE_EXT_CTRL_REGS
+ | RCAR_DU_FEATURE_VSP1_SOURCE,
+ .channels_mask = BIT(3) | BIT(1) | BIT(0),
+ .routes = {
+ /*
+ * R8A77965 has one RGB output, one LVDS output and one HDMI
+ * output.
+ */
+ [RCAR_DU_OUTPUT_DPAD0] = {
+ .possible_crtcs = BIT(2),
+ .port = 0,
+ },
+ [RCAR_DU_OUTPUT_HDMI0] = {
+ .possible_crtcs = BIT(1),
+ .port = 1,
+ },
+ [RCAR_DU_OUTPUT_LVDS0] = {
+ .possible_crtcs = BIT(0),
+ .port = 2,
+ },
+ },
+ .num_lvds = 1,
+ .dpll_ch = BIT(1),
+};
+
static const struct rcar_du_device_info rcar_du_r8a77970_info = {
.gen = 3,
.features = RCAR_DU_FEATURE_CRTC_IRQ_CLOCK
| RCAR_DU_FEATURE_EXT_CTRL_REGS
| RCAR_DU_FEATURE_VSP1_SOURCE,
- .num_crtcs = 1,
+ .channels_mask = BIT(0),
.routes = {
/* R8A77970 has one RGB output and one LVDS output. */
[RCAR_DU_OUTPUT_DPAD0] = {
@@ -277,6 +305,7 @@ static const struct of_device_id rcar_du_of_table[] = {
{ .compatible = "renesas,du-r8a7794", .data = &rcar_du_r8a7794_info },
{ .compatible = "renesas,du-r8a7795", .data = &rcar_du_r8a7795_info },
{ .compatible = "renesas,du-r8a7796", .data = &rcar_du_r8a7796_info },
+ { .compatible = "renesas,du-r8a77965", .data = &rcar_du_r8a77965_info },
{ .compatible = "renesas,du-r8a77970", .data = &rcar_du_r8a77970_info },
{ }
};
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 131d8e8..b3a25e8 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -52,7 +52,7 @@ struct rcar_du_output_routing {
* @gen: device generation (2 or 3)
* @features: device features (RCAR_DU_FEATURE_*)
* @quirks: device quirks (RCAR_DU_QUIRK_*)
- * @num_crtcs: total number of CRTCs
+ * @channels_mask: bit mask of available DU channels
* @routes: array of CRTC to output routes, indexed by output (RCAR_DU_OUTPUT_*)
* @num_lvds: number of internal LVDS encoders
*/
@@ -60,7 +60,7 @@ struct rcar_du_device_info {
unsigned int gen;
unsigned int features;
unsigned int quirks;
- unsigned int num_crtcs;
+ unsigned int channels_mask;
struct rcar_du_output_routing routes[RCAR_DU_OUTPUT_MAX];
unsigned int num_lvds;
unsigned int dpll_ch;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index 2f37ea9..d539cb2 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -46,10 +46,13 @@ void rcar_du_group_write(struct rcar_du_group *rgrp, u32 reg, u32 data)
static void rcar_du_group_setup_pins(struct rcar_du_group *rgrp)
{
- u32 defr6 = DEFR6_CODE | DEFR6_ODPM12_DISP;
+ u32 defr6 = DEFR6_CODE;
- if (rgrp->num_crtcs > 1)
- defr6 |= DEFR6_ODPM22_DISP;
+ if (rgrp->channels_mask & BIT(0))
+ defr6 |= DEFR6_ODPM02_DISP;
+
+ if (rgrp->channels_mask & BIT(1))
+ defr6 |= DEFR6_ODPM12_DISP;
rcar_du_group_write(rgrp, DEFR6, defr6);
}
@@ -80,10 +83,11 @@ static void rcar_du_group_setup_defr8(struct rcar_du_group *rgrp)
* On Gen3 VSPD routing can't be configured, but DPAD routing
* needs to be set despite having a single option available.
*/
- u32 crtc = ffs(possible_crtcs) - 1;
+ unsigned int rgb_crtc = ffs(possible_crtcs) - 1;
+ struct rcar_du_crtc *crtc = &rcdu->crtcs[rgb_crtc];
- if (crtc / 2 == rgrp->index)
- defr8 |= DEFR8_DRGBS_DU(crtc);
+ if (crtc->index / 2 == rgrp->index)
+ defr8 |= DEFR8_DRGBS_DU(crtc->index);
}
rcar_du_group_write(rgrp, DEFR8, defr8);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.h b/drivers/gpu/drm/rcar-du/rcar_du_group.h
index 5e3adc6..42105ae 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.h
@@ -25,6 +25,7 @@ struct rcar_du_device;
* @dev: the DU device
* @mmio_offset: registers offset in the device memory map
* @index: group index
+ * @channels_mask: bitmask of populated DU channels in this group
* @num_crtcs: number of CRTCs in this group (1 or 2)
* @use_count: number of users of the group (rcar_du_group_(get|put))
* @used_crtcs: number of CRTCs currently in use
@@ -39,6 +40,7 @@ struct rcar_du_group {
unsigned int mmio_offset;
unsigned int index;
+ unsigned int channels_mask;
unsigned int num_crtcs;
unsigned int use_count;
unsigned int used_crtcs;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index f4ac0f8..f0bc7cc 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -428,7 +428,7 @@ static int rcar_du_vsps_init(struct rcar_du_device *rcdu)
struct {
struct device_node *np;
unsigned int crtcs_mask;
- } vsps[RCAR_DU_MAX_VSPS] = { { 0, }, };
+ } vsps[RCAR_DU_MAX_VSPS] = { { NULL, }, };
unsigned int vsps_count = 0;
unsigned int cells;
unsigned int i;
@@ -507,6 +507,8 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
struct drm_fbdev_cma *fbdev;
unsigned int num_encoders;
unsigned int num_groups;
+ unsigned int swindex;
+ unsigned int hwindex;
unsigned int i;
int ret;
@@ -520,7 +522,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
dev->mode_config.funcs = &rcar_du_mode_config_funcs;
dev->mode_config.helper_private = &rcar_du_mode_config_helper;
- rcdu->num_crtcs = rcdu->info->num_crtcs;
+ rcdu->num_crtcs = hweight8(rcdu->info->channels_mask);
ret = rcar_du_properties_init(rcdu);
if (ret < 0)
@@ -530,7 +532,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
* Initialize vertical blanking interrupts handling. Start with vblank
* disabled for all CRTCs.
*/
- ret = drm_vblank_init(dev, (1 << rcdu->info->num_crtcs) - 1);
+ ret = drm_vblank_init(dev, (1 << rcdu->num_crtcs) - 1);
if (ret < 0)
return ret;
@@ -545,7 +547,10 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
rgrp->dev = rcdu;
rgrp->mmio_offset = mmio_offsets[i];
rgrp->index = i;
- rgrp->num_crtcs = min(rcdu->num_crtcs - 2 * i, 2U);
+ /* Extract the channel mask for this group only. */
+ rgrp->channels_mask = (rcdu->info->channels_mask >> (2 * i))
+ & GENMASK(1, 0);
+ rgrp->num_crtcs = hweight8(rgrp->channels_mask);
/*
* If we have more than one CRTCs in this group pre-associate
@@ -572,10 +577,16 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
}
/* Create the CRTCs. */
- for (i = 0; i < rcdu->num_crtcs; ++i) {
- struct rcar_du_group *rgrp = &rcdu->groups[i / 2];
+ for (swindex = 0, hwindex = 0; swindex < rcdu->num_crtcs; ++hwindex) {
+ struct rcar_du_group *rgrp;
+
+ /* Skip unpopulated DU channels. */
+ if (!(rcdu->info->channels_mask & BIT(hwindex)))
+ continue;
+
+ rgrp = &rcdu->groups[hwindex / 2];
- ret = rcar_du_crtc_create(rgrp, i);
+ ret = rcar_du_crtc_create(rgrp, swindex++, hwindex);
if (ret < 0)
return ret;
}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_of.c b/drivers/gpu/drm/rcar-du/rcar_du_of.c
index 68a0b82..afef696 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_of.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_of.c
@@ -18,6 +18,7 @@
#include "rcar_du_crtc.h"
#include "rcar_du_drv.h"
+#include "rcar_du_of.h"
/* -----------------------------------------------------------------------------
* Generic Overlay Handling
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_regs.h b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
index d5bae99..9dfd220 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_regs.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_regs.h
@@ -187,14 +187,14 @@
#define DEFR6 0x000e8
#define DEFR6_CODE (0x7778 << 16)
-#define DEFR6_ODPM22_DSMR (0 << 10)
-#define DEFR6_ODPM22_DISP (2 << 10)
-#define DEFR6_ODPM22_CDE (3 << 10)
-#define DEFR6_ODPM22_MASK (3 << 10)
-#define DEFR6_ODPM12_DSMR (0 << 8)
-#define DEFR6_ODPM12_DISP (2 << 8)
-#define DEFR6_ODPM12_CDE (3 << 8)
-#define DEFR6_ODPM12_MASK (3 << 8)
+#define DEFR6_ODPM12_DSMR (0 << 10)
+#define DEFR6_ODPM12_DISP (2 << 10)
+#define DEFR6_ODPM12_CDE (3 << 10)
+#define DEFR6_ODPM12_MASK (3 << 10)
+#define DEFR6_ODPM02_DSMR (0 << 8)
+#define DEFR6_ODPM02_DISP (2 << 8)
+#define DEFR6_ODPM02_CDE (3 << 8)
+#define DEFR6_ODPM02_MASK (3 << 8)
#define DEFR6_TCNE1 (1 << 6)
#define DEFR6_TCNE0 (1 << 4)
#define DEFR6_MLOS1 (1 << 2)
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
index b3bec01..27a4408 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_vsp.c
@@ -17,6 +17,7 @@
#include <drm/drm_crtc_helper.h>
#include <drm/drm_fb_cma_helper.h>
#include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
#include <drm/drm_plane_helper.h>
#include <linux/bitops.h>
@@ -237,6 +238,10 @@ static int rcar_du_vsp_plane_prepare_fb(struct drm_plane *plane,
}
}
+ ret = drm_gem_fb_prepare_fb(plane, state);
+ if (ret)
+ goto fail;
+
return 0;
fail:
@@ -299,18 +304,17 @@ static const struct drm_plane_helper_funcs rcar_du_vsp_plane_helper_funcs = {
static struct drm_plane_state *
rcar_du_vsp_plane_atomic_duplicate_state(struct drm_plane *plane)
{
- struct rcar_du_vsp_plane_state *state;
struct rcar_du_vsp_plane_state *copy;
if (WARN_ON(!plane->state))
return NULL;
- state = to_rcar_vsp_plane_state(plane->state);
- copy = kmemdup(state, sizeof(*state), GFP_KERNEL);
+ copy = kzalloc(sizeof(*copy), GFP_KERNEL);
if (copy == NULL)
return NULL;
__drm_atomic_helper_plane_duplicate_state(plane, &copy->state);
+ copy->alpha = to_rcar_vsp_plane_state(plane->state)->alpha;
return &copy->state;
}
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index fe3faa7..2121345 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -76,6 +76,9 @@
#define VOP_WIN_GET_YRGBADDR(vop, win) \
vop_readl(vop, win->base + win->phy->yrgb_mst.offset)
+#define VOP_WIN_TO_INDEX(vop_win) \
+ ((vop_win) - (vop_win)->vop->win)
+
#define to_vop(x) container_of(x, struct vop, crtc)
#define to_vop_win(x) container_of(x, struct vop_win, base)
@@ -708,6 +711,7 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
dma_addr_t dma_addr;
uint32_t val;
bool rb_swap;
+ int win_index = VOP_WIN_TO_INDEX(vop_win);
int format;
/*
@@ -777,7 +781,14 @@ static void vop_plane_atomic_update(struct drm_plane *plane,
rb_swap = has_rb_swapped(fb->format->format);
VOP_WIN_SET(vop, win, rb_swap, rb_swap);
- if (fb->format->has_alpha) {
+ /*
+ * Blending win0 with the background color doesn't seem to work
+ * correctly. We only get the background color, no matter the contents
+ * of the win0 framebuffer. However, blending pre-multiplied color
+ * with the default opaque black default background color is a no-op,
+ * so we can just disable blending to get the correct result.
+ */
+ if (fb->format->has_alpha && win_index > 0) {
VOP_WIN_SET(vop, win, dst_alpha_ctl,
DST_FACTOR_M0(ALPHA_SRC_INVERSE));
val = SRC_ALPHA_EN(1) | SRC_COLOR_M0(ALPHA_SRC_PRE_MUL) |
diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile
index 4aebfc7..9fc349f 100644
--- a/drivers/gpu/drm/selftests/Makefile
+++ b/drivers/gpu/drm/selftests/Makefile
@@ -1 +1 @@
-obj-$(CONFIG_DRM_DEBUG_MM_SELFTEST) += test-drm_mm.o
+obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm-helper.o
diff --git a/drivers/gpu/drm/selftests/drm_helper_selftests.h b/drivers/gpu/drm/selftests/drm_helper_selftests.h
new file mode 100644
index 0000000..9771290
--- /dev/null
+++ b/drivers/gpu/drm/selftests/drm_helper_selftests.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* List each unit test as selftest(name, function)
+ *
+ * The name is used as both an enum and expanded as igt__name to create
+ * a module parameter. It must be unique and legal for a C identifier.
+ *
+ * Tests are executed in order by igt/drm_selftests_helper
+ */
+selftest(check_plane_state, igt_check_plane_state)
diff --git a/drivers/gpu/drm/selftests/test-drm-helper.c b/drivers/gpu/drm/selftests/test-drm-helper.c
new file mode 100644
index 0000000..a015712
--- /dev/null
+++ b/drivers/gpu/drm/selftests/test-drm-helper.c
@@ -0,0 +1,247 @@
+/*
+ * Test cases for the drm_kms_helper functions
+ */
+
+#define pr_fmt(fmt) "drm_kms_helper: " fmt
+
+#include <linux/module.h>
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_modes.h>
+
+#define TESTS "drm_helper_selftests.h"
+#include "drm_selftest.h"
+
+#define FAIL(test, msg, ...) \
+ do { \
+ if (test) { \
+ pr_err("%s/%u: " msg, __FUNCTION__, __LINE__, ##__VA_ARGS__); \
+ return -EINVAL; \
+ } \
+ } while (0)
+
+#define FAIL_ON(x) FAIL((x), "%s", "FAIL_ON(" __stringify(x) ")\n")
+
+static void set_src(struct drm_plane_state *plane_state,
+ unsigned src_x, unsigned src_y,
+ unsigned src_w, unsigned src_h)
+{
+ plane_state->src_x = src_x;
+ plane_state->src_y = src_y;
+ plane_state->src_w = src_w;
+ plane_state->src_h = src_h;
+}
+
+static bool check_src_eq(struct drm_plane_state *plane_state,
+ unsigned src_x, unsigned src_y,
+ unsigned src_w, unsigned src_h)
+{
+ if (plane_state->src.x1 < 0) {
+ pr_err("src x coordinate %x should never be below 0.\n", plane_state->src.x1);
+ drm_rect_debug_print("src: ", &plane_state->src, true);
+ return false;
+ }
+ if (plane_state->src.y1 < 0) {
+ pr_err("src y coordinate %x should never be below 0.\n", plane_state->src.y1);
+ drm_rect_debug_print("src: ", &plane_state->src, true);
+ return false;
+ }
+
+ if (plane_state->src.x1 != src_x ||
+ plane_state->src.y1 != src_y ||
+ drm_rect_width(&plane_state->src) != src_w ||
+ drm_rect_height(&plane_state->src) != src_h) {
+ drm_rect_debug_print("src: ", &plane_state->src, true);
+ return false;
+ }
+
+ return true;
+}
+
+static void set_crtc(struct drm_plane_state *plane_state,
+ int crtc_x, int crtc_y,
+ unsigned crtc_w, unsigned crtc_h)
+{
+ plane_state->crtc_x = crtc_x;
+ plane_state->crtc_y = crtc_y;
+ plane_state->crtc_w = crtc_w;
+ plane_state->crtc_h = crtc_h;
+}
+
+static bool check_crtc_eq(struct drm_plane_state *plane_state,
+ int crtc_x, int crtc_y,
+ unsigned crtc_w, unsigned crtc_h)
+{
+ if (plane_state->dst.x1 != crtc_x ||
+ plane_state->dst.y1 != crtc_y ||
+ drm_rect_width(&plane_state->dst) != crtc_w ||
+ drm_rect_height(&plane_state->dst) != crtc_h) {
+ drm_rect_debug_print("dst: ", &plane_state->dst, false);
+
+ return false;
+ }
+
+ return true;
+}
+
+static int igt_check_plane_state(void *ignored)
+{
+ int ret;
+
+ const struct drm_crtc_state crtc_state = {
+ .crtc = ZERO_SIZE_PTR,
+ .enable = true,
+ .active = true,
+ .mode = {
+ DRM_MODE("1024x768", 0, 65000, 1024, 1048,
+ 1184, 1344, 0, 768, 771, 777, 806, 0,
+ DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC)
+ },
+ };
+ struct drm_framebuffer fb = {
+ .width = 2048,
+ .height = 2048
+ };
+ struct drm_plane_state plane_state = {
+ .crtc = ZERO_SIZE_PTR,
+ .fb = &fb,
+ .rotation = DRM_MODE_ROTATE_0
+ };
+
+ /* Simple clipping, no scaling. */
+ set_src(&plane_state, 0, 0, fb.width << 16, fb.height << 16);
+ set_crtc(&plane_state, 0, 0, fb.width, fb.height);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(ret < 0, "Simple clipping check should pass\n");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1024 << 16, 768 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+ /* Rotated clipping + reflection, no scaling. */
+ plane_state.rotation = DRM_MODE_ROTATE_90 | DRM_MODE_REFLECT_X;
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(ret < 0, "Rotated clipping check should pass\n");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 768 << 16, 1024 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+ plane_state.rotation = DRM_MODE_ROTATE_0;
+
+ /* Check whether positioning works correctly. */
+ set_src(&plane_state, 0, 0, 1023 << 16, 767 << 16);
+ set_crtc(&plane_state, 0, 0, 1023, 767);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(!ret, "Should not be able to position on the crtc with can_position=false\n");
+
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ DRM_PLANE_HELPER_NO_SCALING,
+ true, false);
+ FAIL(ret < 0, "Simple positioning should work\n");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 1023 << 16, 767 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1023, 767));
+
+ /* Simple scaling tests. */
+ set_src(&plane_state, 0, 0, 512 << 16, 384 << 16);
+ set_crtc(&plane_state, 0, 0, 1024, 768);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ 0x8001,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(!ret, "Upscaling out of range should fail.\n");
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ 0x8000,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(ret < 0, "Upscaling exactly 2x should work\n");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 512 << 16, 384 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+ set_src(&plane_state, 0, 0, 2048 << 16, 1536 << 16);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ 0x1ffff, false, false);
+ FAIL(!ret, "Downscaling out of range should fail.\n");
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ 0x20000, false, false);
+ FAIL(ret < 0, "Should succeed with exact scaling limit\n");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2048 << 16, 1536 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+ /* Testing rounding errors. */
+ set_src(&plane_state, 0, 0, 0x40001, 0x40001);
+ set_crtc(&plane_state, 1022, 766, 4, 4);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ 0x10001,
+ true, false);
+ FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2));
+
+ set_src(&plane_state, 0x20001, 0x20001, 0x4040001, 0x3040001);
+ set_crtc(&plane_state, -2, -2, 1028, 772);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ DRM_PLANE_HELPER_NO_SCALING,
+ 0x10001,
+ false, false);
+ FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0x40002, 0x40002, 1024 << 16, 768 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+ set_src(&plane_state, 0, 0, 0x3ffff, 0x3ffff);
+ set_crtc(&plane_state, 1022, 766, 4, 4);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ 0xffff,
+ DRM_PLANE_HELPER_NO_SCALING,
+ true, false);
+ FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+ FAIL_ON(!plane_state.visible);
+ /* Should not be rounded to 0x20001, which would be upscaling. */
+ FAIL_ON(!check_src_eq(&plane_state, 0, 0, 2 << 16, 2 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 1022, 766, 2, 2));
+
+ set_src(&plane_state, 0x1ffff, 0x1ffff, 0x403ffff, 0x303ffff);
+ set_crtc(&plane_state, -2, -2, 1028, 772);
+ ret = drm_atomic_helper_check_plane_state(&plane_state, &crtc_state,
+ 0xffff,
+ DRM_PLANE_HELPER_NO_SCALING,
+ false, false);
+ FAIL(ret < 0, "Should succeed by clipping to exact multiple");
+ FAIL_ON(!plane_state.visible);
+ FAIL_ON(!check_src_eq(&plane_state, 0x3fffe, 0x3fffe, 1024 << 16, 768 << 16));
+ FAIL_ON(!check_crtc_eq(&plane_state, 0, 0, 1024, 768));
+
+ return 0;
+}
+
+#include "drm_selftest.c"
+
+static int __init test_drm_helper_init(void)
+{
+ int err;
+
+ err = run_selftests(selftests, ARRAY_SIZE(selftests), NULL);
+
+ return err > 0 ? 0 : err;
+}
+
+module_init(test_drm_helper_init);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 21e50d7..5824e6a 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -357,7 +357,7 @@ int sti_crtc_init(struct drm_device *drm_dev, struct sti_mixer *mixer,
res = drm_crtc_init_with_planes(drm_dev, crtc, primary, cursor,
&sti_crtc_funcs, NULL);
if (res) {
- DRM_ERROR("Can't initialze CRTC\n");
+ DRM_ERROR("Can't initialize CRTC\n");
return -EINVAL;
}
diff --git a/drivers/gpu/drm/stm/ltdc.c b/drivers/gpu/drm/stm/ltdc.c
index e3121d9..d997a60 100644
--- a/drivers/gpu/drm/stm/ltdc.c
+++ b/drivers/gpu/drm/stm/ltdc.c
@@ -445,6 +445,43 @@ static void ltdc_crtc_atomic_disable(struct drm_crtc *crtc,
reg_set(ldev->regs, LTDC_SRCR, SRCR_IMR);
}
+#define CLK_TOLERANCE_HZ 50
+
+static enum drm_mode_status
+ltdc_crtc_mode_valid(struct drm_crtc *crtc,
+ const struct drm_display_mode *mode)
+{
+ struct ltdc_device *ldev = crtc_to_ltdc(crtc);
+ int target = mode->clock * 1000;
+ int target_min = target - CLK_TOLERANCE_HZ;
+ int target_max = target + CLK_TOLERANCE_HZ;
+ int result;
+
+ /*
+ * Accept all "preferred" modes:
+ * - this is important for panels because panel clock tolerances are
+ * bigger than hdmi ones and there is no reason to not accept them
+ * (the fps may vary a little but it is not a problem).
+ * - the hdmi preferred mode will be accepted too, but userland will
+ * be able to use others hdmi "valid" modes if necessary.
+ */
+ if (mode->type & DRM_MODE_TYPE_PREFERRED)
+ return MODE_OK;
+
+ result = clk_round_rate(ldev->pixel_clk, target);
+
+ DRM_DEBUG_DRIVER("clk rate target %d, available %d\n", target, result);
+
+ /*
+ * Filter modes according to the clock value, particularly useful for
+ * hdmi modes that require precise pixel clocks.
+ */
+ if (result < target_min || result > target_max)
+ return MODE_CLOCK_RANGE;
+
+ return MODE_OK;
+}
+
static bool ltdc_crtc_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
@@ -559,6 +596,7 @@ static void ltdc_crtc_atomic_flush(struct drm_crtc *crtc,
}
static const struct drm_crtc_helper_funcs ltdc_crtc_helper_funcs = {
+ .mode_valid = ltdc_crtc_mode_valid,
.mode_fixup = ltdc_crtc_mode_fixup,
.mode_set_nofb = ltdc_crtc_mode_set_nofb,
.atomic_flush = ltdc_crtc_atomic_flush,
@@ -822,13 +860,13 @@ static struct drm_plane *ltdc_plane_create(struct drm_device *ddev,
plane = devm_kzalloc(dev, sizeof(*plane), GFP_KERNEL);
if (!plane)
- return 0;
+ return NULL;
ret = drm_universal_plane_init(ddev, plane, possible_crtcs,
&ltdc_plane_funcs, formats, nb_fmt,
NULL, type, NULL);
if (ret < 0)
- return 0;
+ return NULL;
drm_plane_helper_add(plane, &ltdc_plane_helper_funcs);
@@ -987,14 +1025,13 @@ int ltdc_load(struct drm_device *ddev)
&bridge[i]);
/*
- * If at least one endpoint is ready, continue probing,
- * else if at least one endpoint is -EPROBE_DEFER and
- * there is no previous ready endpoints, defer probing.
+ * If at least one endpoint is -EPROBE_DEFER, defer probing,
+ * else if at least one endpoint is ready, continue probing.
*/
- if (!ret)
+ if (ret == -EPROBE_DEFER)
+ return ret;
+ else if (!ret)
endpoint_not_ready = 0;
- else if (ret == -EPROBE_DEFER && endpoint_not_ready)
- endpoint_not_ready = -EPROBE_DEFER;
}
if (endpoint_not_ready)
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index 1b278a2..1067e70 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -224,7 +224,7 @@ static void tilcdc_crtc_set_clk(struct drm_crtc *crtc)
ret = clk_set_rate(priv->clk, req_rate * clkdiv);
clk_rate = clk_get_rate(priv->clk);
- if (ret < 0) {
+ if (ret < 0 || tilcdc_pclk_diff(req_rate, clk_rate) > 5) {
/*
* If we fail to set the clock rate (some architectures don't
* use the common clock framework yet and may not implement
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
index d5ef651..015d03f 100644
--- a/drivers/gpu/drm/tinydrm/mi0283qt.c
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -85,24 +85,6 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe,
/* Memory Access Control */
mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, MIPI_DCS_PIXEL_FMT_16BIT);
- switch (mipi->rotation) {
- default:
- addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
- ILI9341_MADCTL_MX;
- break;
- case 90:
- addr_mode = ILI9341_MADCTL_MY;
- break;
- case 180:
- addr_mode = ILI9341_MADCTL_MV;
- break;
- case 270:
- addr_mode = ILI9341_MADCTL_MX;
- break;
- }
- addr_mode |= ILI9341_MADCTL_BGR;
- mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
-
/* Frame Rate */
mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b);
@@ -128,6 +110,29 @@ static void mi0283qt_enable(struct drm_simple_display_pipe *pipe,
msleep(100);
out_enable:
+ /* The PiTFT (ili9340) has a hardware reset circuit that
+ * resets only on power-on and not on each reboot through
+ * a gpio like the rpi-display does.
+ * As a result, we need to always apply the rotation value
+ * regardless of the display "on/off" state.
+ */
+ switch (mipi->rotation) {
+ default:
+ addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
+ ILI9341_MADCTL_MX;
+ break;
+ case 90:
+ addr_mode = ILI9341_MADCTL_MY;
+ break;
+ case 180:
+ addr_mode = ILI9341_MADCTL_MV;
+ break;
+ case 270:
+ addr_mode = ILI9341_MADCTL_MX;
+ break;
+ }
+ addr_mode |= ILI9341_MADCTL_BGR;
+ mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
mipi_dbi_enable_flush(mipi, crtc_state, plane_state);
}
diff --git a/drivers/gpu/drm/v3d/Kconfig b/drivers/gpu/drm/v3d/Kconfig
new file mode 100644
index 0000000..a0c0259
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Kconfig
@@ -0,0 +1,9 @@
+config DRM_V3D
+ tristate "Broadcom V3D 3.x and newer"
+ depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST
+ depends on DRM
+ depends on COMMON_CLK
+ select DRM_SCHED
+ help
+ Choose this option if you have a system that has a Broadcom
+ V3D 3.x or newer GPU, such as BCM7268.
diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
new file mode 100644
index 0000000..34446e1
--- /dev/null
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -0,0 +1,18 @@
+# Please keep these build lists sorted!
+
+# core driver code
+v3d-y := \
+ v3d_bo.o \
+ v3d_drv.o \
+ v3d_fence.o \
+ v3d_gem.o \
+ v3d_irq.o \
+ v3d_mmu.o \
+ v3d_trace_points.o \
+ v3d_sched.o
+
+v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
+
+obj-$(CONFIG_DRM_V3D) += v3d.o
+
+CFLAGS_v3d_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
new file mode 100644
index 0000000..7b1e2a5
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+/**
+ * DOC: V3D GEM BO management support
+ *
+ * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the
+ * GPU and the bus, allowing us to use shmem objects for our storage
+ * instead of CMA.
+ *
+ * Physically contiguous objects may still be imported to V3D, but the
+ * driver doesn't allocate physically contiguous objects on its own.
+ * Display engines requiring physically contiguous allocations should
+ * look into Mesa's "renderonly" support (as used by the Mesa pl111
+ * driver) for an example of how to integrate with V3D.
+ *
+ * Long term, we should support evicting pages from the MMU when under
+ * memory pressure (thus the v3d_bo_get_pages() refcounting), but
+ * that's not a high priority since our systems tend to not have swap.
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/pfn_t.h>
+
+#include "v3d_drv.h"
+#include "uapi/drm/v3d_drm.h"
+
+/* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps
+ * it for DMA.
+ */
+static int
+v3d_bo_get_pages(struct v3d_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->base;
+ struct drm_device *dev = obj->dev;
+ int npages = obj->size >> PAGE_SHIFT;
+ int ret = 0;
+
+ mutex_lock(&bo->lock);
+ if (bo->pages_refcount++ != 0)
+ goto unlock;
+
+ if (!obj->import_attach) {
+ bo->pages = drm_gem_get_pages(obj);
+ if (IS_ERR(bo->pages)) {
+ ret = PTR_ERR(bo->pages);
+ goto unlock;
+ }
+
+ bo->sgt = drm_prime_pages_to_sg(bo->pages, npages);
+ if (IS_ERR(bo->sgt)) {
+ ret = PTR_ERR(bo->sgt);
+ goto put_pages;
+ }
+
+ /* Map the pages for use by the GPU. */
+ dma_map_sg(dev->dev, bo->sgt->sgl,
+ bo->sgt->nents, DMA_BIDIRECTIONAL);
+ } else {
+ bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL);
+ if (!bo->pages)
+ goto put_pages;
+
+ drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages,
+ NULL, npages);
+
+ /* Note that dma-bufs come in mapped. */
+ }
+
+ mutex_unlock(&bo->lock);
+
+ return 0;
+
+put_pages:
+ drm_gem_put_pages(obj, bo->pages, true, true);
+ bo->pages = NULL;
+unlock:
+ bo->pages_refcount--;
+ mutex_unlock(&bo->lock);
+ return ret;
+}
+
+static void
+v3d_bo_put_pages(struct v3d_bo *bo)
+{
+ struct drm_gem_object *obj = &bo->base;
+
+ mutex_lock(&bo->lock);
+ if (--bo->pages_refcount == 0) {
+ if (!obj->import_attach) {
+ dma_unmap_sg(obj->dev->dev, bo->sgt->sgl,
+ bo->sgt->nents, DMA_BIDIRECTIONAL);
+ sg_free_table(bo->sgt);
+ kfree(bo->sgt);
+ drm_gem_put_pages(obj, bo->pages, true, true);
+ } else {
+ kfree(bo->pages);
+ }
+ }
+ mutex_unlock(&bo->lock);
+}
+
+static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev,
+ size_t unaligned_size)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+ size_t size = roundup(unaligned_size, PAGE_SIZE);
+ int ret;
+
+ if (size == 0)
+ return ERR_PTR(-EINVAL);
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+ if (!bo)
+ return ERR_PTR(-ENOMEM);
+ obj = &bo->base;
+
+ INIT_LIST_HEAD(&bo->vmas);
+ INIT_LIST_HEAD(&bo->unref_head);
+ mutex_init(&bo->lock);
+
+ ret = drm_gem_object_init(dev, obj, size);
+ if (ret)
+ goto free_bo;
+
+ spin_lock(&v3d->mm_lock);
+ ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node,
+ obj->size >> PAGE_SHIFT,
+ GMP_GRANULARITY >> PAGE_SHIFT, 0, 0);
+ spin_unlock(&v3d->mm_lock);
+ if (ret)
+ goto free_obj;
+
+ return bo;
+
+free_obj:
+ drm_gem_object_release(obj);
+free_bo:
+ kfree(bo);
+ return ERR_PTR(ret);
+}
+
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+ size_t unaligned_size)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+ int ret;
+
+ bo = v3d_bo_create_struct(dev, unaligned_size);
+ if (IS_ERR(bo))
+ return bo;
+ obj = &bo->base;
+
+ bo->resv = &bo->_resv;
+ reservation_object_init(bo->resv);
+
+ ret = v3d_bo_get_pages(bo);
+ if (ret)
+ goto free_mm;
+
+ v3d_mmu_insert_ptes(bo);
+
+ mutex_lock(&v3d->bo_lock);
+ v3d->bo_stats.num_allocated++;
+ v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT;
+ mutex_unlock(&v3d->bo_lock);
+
+ return bo;
+
+free_mm:
+ spin_lock(&v3d->mm_lock);
+ drm_mm_remove_node(&bo->node);
+ spin_unlock(&v3d->mm_lock);
+
+ drm_gem_object_release(obj);
+ kfree(bo);
+ return ERR_PTR(ret);
+}
+
+/* Called DRM core on the last userspace/kernel unreference of the
+ * BO.
+ */
+void v3d_free_object(struct drm_gem_object *obj)
+{
+ struct v3d_dev *v3d = to_v3d_dev(obj->dev);
+ struct v3d_bo *bo = to_v3d_bo(obj);
+
+ mutex_lock(&v3d->bo_lock);
+ v3d->bo_stats.num_allocated--;
+ v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT;
+ mutex_unlock(&v3d->bo_lock);
+
+ reservation_object_fini(&bo->_resv);
+
+ v3d_bo_put_pages(bo);
+
+ if (obj->import_attach)
+ drm_prime_gem_destroy(obj, bo->sgt);
+
+ v3d_mmu_remove_ptes(bo);
+ spin_lock(&v3d->mm_lock);
+ drm_mm_remove_node(&bo->node);
+ spin_unlock(&v3d->mm_lock);
+
+ mutex_destroy(&bo->lock);
+
+ drm_gem_object_release(obj);
+ kfree(bo);
+}
+
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj)
+{
+ struct v3d_bo *bo = to_v3d_bo(obj);
+
+ return bo->resv;
+}
+
+static void
+v3d_set_mmap_vma_flags(struct vm_area_struct *vma)
+{
+ vma->vm_flags &= ~VM_PFNMAP;
+ vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+}
+
+int v3d_gem_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct v3d_bo *bo = to_v3d_bo(obj);
+ unsigned long pfn;
+ pgoff_t pgoff;
+ int ret;
+
+ /* We don't use vmf->pgoff since that has the fake offset: */
+ pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
+ pfn = page_to_pfn(bo->pages[pgoff]);
+
+ ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV));
+
+ switch (ret) {
+ case -EAGAIN:
+ case 0:
+ case -ERESTARTSYS:
+ case -EINTR:
+ case -EBUSY:
+ /*
+ * EBUSY is ok: this just means that another thread
+ * already did the job.
+ */
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ return VM_FAULT_OOM;
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+}
+
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ int ret;
+
+ ret = drm_gem_mmap(filp, vma);
+ if (ret)
+ return ret;
+
+ v3d_set_mmap_vma_flags(vma);
+
+ return ret;
+}
+
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ int ret;
+
+ ret = drm_gem_mmap_obj(obj, obj->size, vma);
+ if (ret < 0)
+ return ret;
+
+ v3d_set_mmap_vma_flags(vma);
+
+ return 0;
+}
+
+struct sg_table *
+v3d_prime_get_sg_table(struct drm_gem_object *obj)
+{
+ struct v3d_bo *bo = to_v3d_bo(obj);
+ int npages = obj->size >> PAGE_SHIFT;
+
+ return drm_prime_pages_to_sg(bo->pages, npages);
+}
+
+struct drm_gem_object *
+v3d_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt)
+{
+ struct drm_gem_object *obj;
+ struct v3d_bo *bo;
+
+ bo = v3d_bo_create_struct(dev, attach->dmabuf->size);
+ if (IS_ERR(bo))
+ return ERR_CAST(bo);
+ obj = &bo->base;
+
+ bo->resv = attach->dmabuf->resv;
+
+ bo->sgt = sgt;
+ v3d_bo_get_pages(bo);
+
+ v3d_mmu_insert_ptes(bo);
+
+ return obj;
+}
+
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_create_bo *args = data;
+ struct v3d_bo *bo = NULL;
+ int ret;
+
+ if (args->flags != 0) {
+ DRM_INFO("unknown create_bo flags: %d\n", args->flags);
+ return -EINVAL;
+ }
+
+ bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size));
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ args->offset = bo->node.start << PAGE_SHIFT;
+
+ ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle);
+ drm_gem_object_put_unlocked(&bo->base);
+
+ return ret;
+}
+
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_mmap_bo *args = data;
+ struct drm_gem_object *gem_obj;
+ int ret;
+
+ if (args->flags != 0) {
+ DRM_INFO("unknown mmap_bo flags: %d\n", args->flags);
+ return -EINVAL;
+ }
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+
+ ret = drm_gem_create_mmap_offset(gem_obj);
+ if (ret == 0)
+ args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
+
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_v3d_get_bo_offset *args = data;
+ struct drm_gem_object *gem_obj;
+ struct v3d_bo *bo;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -ENOENT;
+ }
+ bo = to_v3d_bo(gem_obj);
+
+ args->offset = bo->node.start << PAGE_SHIFT;
+
+ drm_gem_object_put_unlocked(gem_obj);
+ return 0;
+}
diff --git a/drivers/gpu/drm/v3d/v3d_debugfs.c b/drivers/gpu/drm/v3d/v3d_debugfs.c
new file mode 100644
index 0000000..4db62c5
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_debugfs.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <linux/circ_buf.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
+#include <linux/seq_file.h>
+#include <drm/drmP.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define REGDEF(reg) { reg, #reg }
+struct v3d_reg_def {
+ u32 reg;
+ const char *name;
+};
+
+static const struct v3d_reg_def v3d_hub_reg_defs[] = {
+ REGDEF(V3D_HUB_AXICFG),
+ REGDEF(V3D_HUB_UIFCFG),
+ REGDEF(V3D_HUB_IDENT0),
+ REGDEF(V3D_HUB_IDENT1),
+ REGDEF(V3D_HUB_IDENT2),
+ REGDEF(V3D_HUB_IDENT3),
+ REGDEF(V3D_HUB_INT_STS),
+ REGDEF(V3D_HUB_INT_MSK_STS),
+};
+
+static const struct v3d_reg_def v3d_gca_reg_defs[] = {
+ REGDEF(V3D_GCA_SAFE_SHUTDOWN),
+ REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK),
+};
+
+static const struct v3d_reg_def v3d_core_reg_defs[] = {
+ REGDEF(V3D_CTL_IDENT0),
+ REGDEF(V3D_CTL_IDENT1),
+ REGDEF(V3D_CTL_IDENT2),
+ REGDEF(V3D_CTL_MISCCFG),
+ REGDEF(V3D_CTL_INT_STS),
+ REGDEF(V3D_CTL_INT_MSK_STS),
+ REGDEF(V3D_CLE_CT0CS),
+ REGDEF(V3D_CLE_CT0CA),
+ REGDEF(V3D_CLE_CT0EA),
+ REGDEF(V3D_CLE_CT1CS),
+ REGDEF(V3D_CLE_CT1CA),
+ REGDEF(V3D_CLE_CT1EA),
+
+ REGDEF(V3D_PTB_BPCA),
+ REGDEF(V3D_PTB_BPCS),
+
+ REGDEF(V3D_MMU_CTL),
+ REGDEF(V3D_MMU_VIO_ADDR),
+
+ REGDEF(V3D_GMP_STATUS),
+ REGDEF(V3D_GMP_CFG),
+ REGDEF(V3D_GMP_VIO_ADDR),
+};
+
+static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ int i, core;
+
+ for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg,
+ V3D_READ(v3d_hub_reg_defs[i].reg));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) {
+ seq_printf(m, "%s (0x%04x): 0x%08x\n",
+ v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg,
+ V3D_GCA_READ(v3d_gca_reg_defs[i].reg));
+ }
+
+ for (core = 0; core < v3d->cores; core++) {
+ for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) {
+ seq_printf(m, "core %d %s (0x%04x): 0x%08x\n",
+ core,
+ v3d_core_reg_defs[i].name,
+ v3d_core_reg_defs[i].reg,
+ V3D_CORE_READ(core,
+ v3d_core_reg_defs[i].reg));
+ }
+ }
+
+ return 0;
+}
+
+static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ u32 ident0, ident1, ident2, ident3, cores;
+ int ret, core;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (ret < 0)
+ return ret;
+
+ ident0 = V3D_READ(V3D_HUB_IDENT0);
+ ident1 = V3D_READ(V3D_HUB_IDENT1);
+ ident2 = V3D_READ(V3D_HUB_IDENT2);
+ ident3 = V3D_READ(V3D_HUB_IDENT3);
+ cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+
+ seq_printf(m, "Revision: %d.%d.%d.%d\n",
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER),
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV),
+ V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV),
+ V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX));
+ seq_printf(m, "MMU: %s\n",
+ (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no");
+ seq_printf(m, "TFU: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no");
+ seq_printf(m, "TSY: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no");
+ seq_printf(m, "MSO: %s\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no");
+ seq_printf(m, "L3C: %s (%dkb)\n",
+ (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no",
+ V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB));
+
+ for (core = 0; core < cores; core++) {
+ u32 misccfg;
+ u32 nslc, ntmu, qups;
+
+ ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0);
+ ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1);
+ ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2);
+ misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG);
+
+ nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC);
+ ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU);
+ qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS);
+
+ seq_printf(m, "Core %d:\n", core);
+ seq_printf(m, " Revision: %d.%d\n",
+ V3D_GET_FIELD(ident0, V3D_IDENT0_VER),
+ V3D_GET_FIELD(ident1, V3D_IDENT1_REV));
+ seq_printf(m, " Slices: %d\n", nslc);
+ seq_printf(m, " TMUs: %d\n", nslc * ntmu);
+ seq_printf(m, " QPUs: %d\n", nslc * qups);
+ seq_printf(m, " Semaphores: %d\n",
+ V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM));
+ seq_printf(m, " BCG int: %d\n",
+ (ident2 & V3D_IDENT2_BCG_INT) != 0);
+ seq_printf(m, " Override TMU: %d\n",
+ (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0);
+ }
+
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+
+ return 0;
+}
+
+static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *)m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+
+ mutex_lock(&v3d->bo_lock);
+ seq_printf(m, "allocated bos: %d\n",
+ v3d->bo_stats.num_allocated);
+ seq_printf(m, "allocated bo size (kb): %ld\n",
+ (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10));
+ mutex_unlock(&v3d->bo_lock);
+
+ return 0;
+}
+
+static const struct drm_info_list v3d_debugfs_list[] = {
+ {"v3d_ident", v3d_v3d_debugfs_ident, 0},
+ {"v3d_regs", v3d_v3d_debugfs_regs, 0},
+ {"bo_stats", v3d_debugfs_bo_stats, 0},
+};
+
+int
+v3d_debugfs_init(struct drm_minor *minor)
+{
+ return drm_debugfs_create_files(v3d_debugfs_list,
+ ARRAY_SIZE(v3d_debugfs_list),
+ minor->debugfs_root, minor);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
new file mode 100644
index 0000000..38e8041
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D Graphics Driver
+ *
+ * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs.
+ * For V3D 2.x support, see the VC4 driver.
+ *
+ * Currently only single-core rendering using the binner and renderer
+ * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD
+ * (compute shader dispatch) are not yet supported.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <drm/drm_fb_cma_helper.h>
+#include <drm/drm_fb_helper.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define DRIVER_NAME "v3d"
+#define DRIVER_DESC "Broadcom V3D graphics"
+#define DRIVER_DATE "20180419"
+#define DRIVER_MAJOR 1
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 0
+
+#ifdef CONFIG_PM
+static int v3d_runtime_suspend(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+
+ v3d_irq_disable(v3d);
+
+ clk_disable_unprepare(v3d->clk);
+
+ return 0;
+}
+
+static int v3d_runtime_resume(struct device *dev)
+{
+ struct drm_device *drm = dev_get_drvdata(dev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+ int ret;
+
+ ret = clk_prepare_enable(v3d->clk);
+ if (ret != 0)
+ return ret;
+
+ /* XXX: VPM base */
+
+ v3d_mmu_set_page_table(v3d);
+ v3d_irq_enable(v3d);
+
+ return 0;
+}
+#endif
+
+static const struct dev_pm_ops v3d_v3d_pm_ops = {
+ SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL)
+};
+
+static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct drm_v3d_get_param *args = data;
+ int ret;
+ static const u32 reg_map[] = {
+ [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2,
+ [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1,
+ [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2,
+ };
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need
+ * to explicitly allow it in the "the register in our
+ * parameter map" check.
+ */
+ if (args->param < ARRAY_SIZE(reg_map) &&
+ (reg_map[args->param] ||
+ args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) {
+ u32 offset = reg_map[args->param];
+
+ if (args->value != 0)
+ return -EINVAL;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 &&
+ args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) {
+ args->value = V3D_CORE_READ(0, offset);
+ } else {
+ args->value = V3D_READ(offset);
+ }
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+ return 0;
+ }
+
+ /* Any params that aren't just register reads would go here. */
+
+ DRM_DEBUG("Unknown parameter %d\n", args->param);
+ return -EINVAL;
+}
+
+static int
+v3d_open(struct drm_device *dev, struct drm_file *file)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv;
+ int i;
+
+ v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL);
+ if (!v3d_priv)
+ return -ENOMEM;
+
+ v3d_priv->v3d = v3d;
+
+ for (i = 0; i < V3D_MAX_QUEUES; i++) {
+ drm_sched_entity_init(&v3d->queue[i].sched,
+ &v3d_priv->sched_entity[i],
+ &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL],
+ 32, NULL);
+ }
+
+ file->driver_priv = v3d_priv;
+
+ return 0;
+}
+
+static void
+v3d_postclose(struct drm_device *dev, struct drm_file *file)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file->driver_priv;
+ enum v3d_queue q;
+
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ drm_sched_entity_fini(&v3d->queue[q].sched,
+ &v3d_priv->sched_entity[q]);
+ }
+
+ kfree(v3d_priv);
+}
+
+static const struct file_operations v3d_drm_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = v3d_mmap,
+ .poll = drm_poll,
+ .read = drm_read,
+ .compat_ioctl = drm_compat_ioctl,
+ .llseek = noop_llseek,
+};
+
+/* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP
+ * protection between clients. Note that render nodes would be be
+ * able to submit CLs that could access BOs from clients authenticated
+ * with the master node.
+ */
+static const struct drm_ioctl_desc v3d_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH),
+ DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW),
+};
+
+static const struct vm_operations_struct v3d_vm_ops = {
+ .fault = v3d_gem_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static struct drm_driver v3d_drm_driver = {
+ .driver_features = (DRIVER_GEM |
+ DRIVER_RENDER |
+ DRIVER_PRIME |
+ DRIVER_SYNCOBJ),
+
+ .open = v3d_open,
+ .postclose = v3d_postclose,
+
+#if defined(CONFIG_DEBUG_FS)
+ .debugfs_init = v3d_debugfs_init,
+#endif
+
+ .gem_free_object_unlocked = v3d_free_object,
+ .gem_vm_ops = &v3d_vm_ops,
+
+ .prime_handle_to_fd = drm_gem_prime_handle_to_fd,
+ .prime_fd_to_handle = drm_gem_prime_fd_to_handle,
+ .gem_prime_import = drm_gem_prime_import,
+ .gem_prime_export = drm_gem_prime_export,
+ .gem_prime_res_obj = v3d_prime_res_obj,
+ .gem_prime_get_sg_table = v3d_prime_get_sg_table,
+ .gem_prime_import_sg_table = v3d_prime_import_sg_table,
+ .gem_prime_mmap = v3d_prime_mmap,
+
+ .ioctls = v3d_drm_ioctls,
+ .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls),
+ .fops = &v3d_drm_fops,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static const struct of_device_id v3d_of_match[] = {
+ { .compatible = "brcm,7268-v3d" },
+ { .compatible = "brcm,7278-v3d" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, v3d_of_match);
+
+static int
+map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name)
+{
+ struct resource *res =
+ platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name);
+
+ *regs = devm_ioremap_resource(v3d->dev, res);
+ return PTR_ERR_OR_ZERO(*regs);
+}
+
+static int v3d_platform_drm_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct drm_device *drm;
+ struct v3d_dev *v3d;
+ int ret;
+ u32 ident1;
+
+ dev->coherent_dma_mask = DMA_BIT_MASK(36);
+
+ v3d = kzalloc(sizeof(*v3d), GFP_KERNEL);
+ if (!v3d)
+ return -ENOMEM;
+ v3d->dev = dev;
+ v3d->pdev = pdev;
+ drm = &v3d->drm;
+
+ ret = map_regs(v3d, &v3d->bridge_regs, "bridge");
+ if (ret)
+ goto dev_free;
+
+ ret = map_regs(v3d, &v3d->hub_regs, "hub");
+ if (ret)
+ goto dev_free;
+
+ ret = map_regs(v3d, &v3d->core_regs[0], "core0");
+ if (ret)
+ goto dev_free;
+
+ ident1 = V3D_READ(V3D_HUB_IDENT1);
+ v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 +
+ V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV));
+ v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES);
+ WARN_ON(v3d->cores > 1); /* multicore not yet implemented */
+
+ if (v3d->ver < 41) {
+ ret = map_regs(v3d, &v3d->gca_regs, "gca");
+ if (ret)
+ goto dev_free;
+ }
+
+ v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!v3d->mmu_scratch) {
+ dev_err(dev, "Failed to allocate MMU scratch page\n");
+ ret = -ENOMEM;
+ goto dev_free;
+ }
+
+ pm_runtime_use_autosuspend(dev);
+ pm_runtime_set_autosuspend_delay(dev, 50);
+ pm_runtime_enable(dev);
+
+ ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev);
+ if (ret)
+ goto dma_free;
+
+ platform_set_drvdata(pdev, drm);
+ drm->dev_private = v3d;
+
+ ret = v3d_gem_init(drm);
+ if (ret)
+ goto dev_destroy;
+
+ v3d_irq_init(v3d);
+
+ ret = drm_dev_register(drm, 0);
+ if (ret)
+ goto gem_destroy;
+
+ return 0;
+
+gem_destroy:
+ v3d_gem_destroy(drm);
+dev_destroy:
+ drm_dev_put(drm);
+dma_free:
+ dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+dev_free:
+ kfree(v3d);
+ return ret;
+}
+
+static int v3d_platform_drm_remove(struct platform_device *pdev)
+{
+ struct drm_device *drm = platform_get_drvdata(pdev);
+ struct v3d_dev *v3d = to_v3d_dev(drm);
+
+ drm_dev_unregister(drm);
+
+ v3d_gem_destroy(drm);
+
+ drm_dev_put(drm);
+
+ dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr);
+
+ return 0;
+}
+
+static struct platform_driver v3d_platform_driver = {
+ .probe = v3d_platform_drm_probe,
+ .remove = v3d_platform_drm_remove,
+ .driver = {
+ .name = "v3d",
+ .of_match_table = v3d_of_match,
+ },
+};
+
+static int __init v3d_drm_register(void)
+{
+ return platform_driver_register(&v3d_platform_driver);
+}
+
+static void __exit v3d_drm_unregister(void)
+{
+ platform_driver_unregister(&v3d_platform_driver);
+}
+
+module_init(v3d_drm_register);
+module_exit(v3d_drm_unregister);
+
+MODULE_ALIAS("platform:v3d-drm");
+MODULE_DESCRIPTION("Broadcom V3D DRM Driver");
+MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
new file mode 100644
index 0000000..a043ac3
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#include <linux/reservation.h>
+#include <drm/drmP.h>
+#include <drm/drm_encoder.h>
+#include <drm/drm_gem.h>
+#include <drm/gpu_scheduler.h>
+
+#define GMP_GRANULARITY (128 * 1024)
+
+/* Enum for each of the V3D queues. We maintain various queue
+ * tracking as an array because at some point we'll want to support
+ * the TFU (texture formatting unit) as another queue.
+ */
+enum v3d_queue {
+ V3D_BIN,
+ V3D_RENDER,
+};
+
+#define V3D_MAX_QUEUES (V3D_RENDER + 1)
+
+struct v3d_queue_state {
+ struct drm_gpu_scheduler sched;
+
+ u64 fence_context;
+ u64 emit_seqno;
+ u64 finished_seqno;
+};
+
+struct v3d_dev {
+ struct drm_device drm;
+
+ /* Short representation (e.g. 33, 41) of the V3D tech version
+ * and revision.
+ */
+ int ver;
+
+ struct device *dev;
+ struct platform_device *pdev;
+ void __iomem *hub_regs;
+ void __iomem *core_regs[3];
+ void __iomem *bridge_regs;
+ void __iomem *gca_regs;
+ struct clk *clk;
+
+ /* Virtual and DMA addresses of the single shared page table. */
+ volatile u32 *pt;
+ dma_addr_t pt_paddr;
+
+ /* Virtual and DMA addresses of the MMU's scratch page. When
+ * a read or write is invalid in the MMU, it will be
+ * redirected here.
+ */
+ void *mmu_scratch;
+ dma_addr_t mmu_scratch_paddr;
+
+ /* Number of V3D cores. */
+ u32 cores;
+
+ /* Allocator managing the address space. All units are in
+ * number of pages.
+ */
+ struct drm_mm mm;
+ spinlock_t mm_lock;
+
+ struct work_struct overflow_mem_work;
+
+ struct v3d_exec_info *bin_job;
+ struct v3d_exec_info *render_job;
+
+ struct v3d_queue_state queue[V3D_MAX_QUEUES];
+
+ /* Spinlock used to synchronize the overflow memory
+ * management against bin job submission.
+ */
+ spinlock_t job_lock;
+
+ /* Protects bo_stats */
+ struct mutex bo_lock;
+
+ /* Lock taken when resetting the GPU, to keep multiple
+ * processes from trying to park the scheduler threads and
+ * reset at once.
+ */
+ struct mutex reset_lock;
+
+ struct {
+ u32 num_allocated;
+ u32 pages_allocated;
+ } bo_stats;
+};
+
+static inline struct v3d_dev *
+to_v3d_dev(struct drm_device *dev)
+{
+ return (struct v3d_dev *)dev->dev_private;
+}
+
+/* The per-fd struct, which tracks the MMU mappings. */
+struct v3d_file_priv {
+ struct v3d_dev *v3d;
+
+ struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
+};
+
+/* Tracks a mapping of a BO into a per-fd address space */
+struct v3d_vma {
+ struct v3d_page_table *pt;
+ struct list_head list; /* entry in v3d_bo.vmas */
+};
+
+struct v3d_bo {
+ struct drm_gem_object base;
+
+ struct mutex lock;
+
+ struct drm_mm_node node;
+
+ u32 pages_refcount;
+ struct page **pages;
+ struct sg_table *sgt;
+ void *vaddr;
+
+ struct list_head vmas; /* list of v3d_vma */
+
+ /* List entry for the BO's position in
+ * v3d_exec_info->unref_list
+ */
+ struct list_head unref_head;
+
+ /* normally (resv == &_resv) except for imported bo's */
+ struct reservation_object *resv;
+ struct reservation_object _resv;
+};
+
+static inline struct v3d_bo *
+to_v3d_bo(struct drm_gem_object *bo)
+{
+ return (struct v3d_bo *)bo;
+}
+
+struct v3d_fence {
+ struct dma_fence base;
+ struct drm_device *dev;
+ /* v3d seqno for signaled() test */
+ u64 seqno;
+ enum v3d_queue queue;
+};
+
+static inline struct v3d_fence *
+to_v3d_fence(struct dma_fence *fence)
+{
+ return (struct v3d_fence *)fence;
+}
+
+#define V3D_READ(offset) readl(v3d->hub_regs + offset)
+#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset)
+
+#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset)
+#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset)
+
+#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset)
+#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset)
+
+#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset)
+#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset)
+
+struct v3d_job {
+ struct drm_sched_job base;
+
+ struct v3d_exec_info *exec;
+
+ /* An optional fence userspace can pass in for the job to depend on. */
+ struct dma_fence *in_fence;
+
+ /* v3d fence to be signaled by IRQ handler when the job is complete. */
+ struct dma_fence *done_fence;
+
+ /* GPU virtual addresses of the start/end of the CL job. */
+ u32 start, end;
+};
+
+struct v3d_exec_info {
+ struct v3d_dev *v3d;
+
+ struct v3d_job bin, render;
+
+ /* Fence for when the scheduler considers the binner to be
+ * done, for render to depend on.
+ */
+ struct dma_fence *bin_done_fence;
+
+ struct kref refcount;
+
+ /* This is the array of BOs that were looked up at the start of exec. */
+ struct v3d_bo **bo;
+ u32 bo_count;
+
+ /* List of overflow BOs used in the job that need to be
+ * released once the job is complete.
+ */
+ struct list_head unref_list;
+
+ /* Submitted tile memory allocation start/size, tile state. */
+ u32 qma, qms, qts;
+};
+
+/**
+ * _wait_for - magic (register) wait macro
+ *
+ * Does the right thing for modeset paths when run under kdgb or similar atomic
+ * contexts. Note that it's important that we check the condition again after
+ * having timed out, since the timeout could be due to preemption or similar and
+ * we've never had a chance to check the condition before the timeout.
+ */
+#define wait_for(COND, MS) ({ \
+ unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \
+ int ret__ = 0; \
+ while (!(COND)) { \
+ if (time_after(jiffies, timeout__)) { \
+ if (!(COND)) \
+ ret__ = -ETIMEDOUT; \
+ break; \
+ } \
+ msleep(1); \
+ } \
+ ret__; \
+})
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+ /* nsecs_to_jiffies64() does not guard against overflow */
+ if (NSEC_PER_SEC % HZ &&
+ div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+ return MAX_JIFFY_OFFSET;
+
+ return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+/* v3d_bo.c */
+void v3d_free_object(struct drm_gem_object *gem_obj);
+struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
+ size_t size);
+int v3d_create_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_gem_fault(struct vm_fault *vmf);
+int v3d_mmap(struct file *filp, struct vm_area_struct *vma);
+struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj);
+int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
+struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev,
+ struct dma_buf_attachment *attach,
+ struct sg_table *sgt);
+
+/* v3d_debugfs.c */
+int v3d_debugfs_init(struct drm_minor *minor);
+
+/* v3d_fence.c */
+extern const struct dma_fence_ops v3d_fence_ops;
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue);
+
+/* v3d_gem.c */
+int v3d_gem_init(struct drm_device *dev);
+void v3d_gem_destroy(struct drm_device *dev);
+int v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+int v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv);
+void v3d_exec_put(struct v3d_exec_info *exec);
+void v3d_reset(struct v3d_dev *v3d);
+void v3d_invalidate_caches(struct v3d_dev *v3d);
+void v3d_flush_caches(struct v3d_dev *v3d);
+
+/* v3d_irq.c */
+void v3d_irq_init(struct v3d_dev *v3d);
+void v3d_irq_enable(struct v3d_dev *v3d);
+void v3d_irq_disable(struct v3d_dev *v3d);
+void v3d_irq_reset(struct v3d_dev *v3d);
+
+/* v3d_mmu.c */
+int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo,
+ u32 *offset);
+int v3d_mmu_set_page_table(struct v3d_dev *v3d);
+void v3d_mmu_insert_ptes(struct v3d_bo *bo);
+void v3d_mmu_remove_ptes(struct v3d_bo *bo);
+
+/* v3d_sched.c */
+int v3d_sched_init(struct v3d_dev *v3d);
+void v3d_sched_fini(struct v3d_dev *v3d);
diff --git a/drivers/gpu/drm/v3d/v3d_fence.c b/drivers/gpu/drm/v3d/v3d_fence.c
new file mode 100644
index 0000000..087d49c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_fence.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#include "v3d_drv.h"
+
+struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue)
+{
+ struct v3d_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ fence->dev = &v3d->drm;
+ fence->queue = queue;
+ fence->seqno = ++v3d->queue[queue].emit_seqno;
+ dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock,
+ v3d->queue[queue].fence_context, fence->seqno);
+
+ return &fence->base;
+}
+
+static const char *v3d_fence_get_driver_name(struct dma_fence *fence)
+{
+ return "v3d";
+}
+
+static const char *v3d_fence_get_timeline_name(struct dma_fence *fence)
+{
+ struct v3d_fence *f = to_v3d_fence(fence);
+
+ if (f->queue == V3D_BIN)
+ return "v3d-bin";
+ else
+ return "v3d-render";
+}
+
+static bool v3d_fence_enable_signaling(struct dma_fence *fence)
+{
+ return true;
+}
+
+static bool v3d_fence_signaled(struct dma_fence *fence)
+{
+ struct v3d_fence *f = to_v3d_fence(fence);
+ struct v3d_dev *v3d = to_v3d_dev(f->dev);
+
+ return v3d->queue[f->queue].finished_seqno >= f->seqno;
+}
+
+const struct dma_fence_ops v3d_fence_ops = {
+ .get_driver_name = v3d_fence_get_driver_name,
+ .get_timeline_name = v3d_fence_get_timeline_name,
+ .enable_signaling = v3d_fence_enable_signaling,
+ .signaled = v3d_fence_signaled,
+ .wait = dma_fence_default_wait,
+ .release = dma_fence_free,
+};
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
new file mode 100644
index 0000000..b513f91
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -0,0 +1,668 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+#include <drm/drmP.h>
+#include <drm/drm_syncobj.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/sched/signal.h>
+
+#include "uapi/drm/v3d_drm.h"
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static void
+v3d_init_core(struct v3d_dev *v3d, int core)
+{
+ /* Set OVRTMUOUT, which means that the texture sampler uniform
+ * configuration's tmu output type field is used, instead of
+ * using the hardware default behavior based on the texture
+ * type. If you want the default behavior, you can still put
+ * "2" in the indirect texture state's output_type field.
+ */
+ V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
+
+ /* Whenever we flush the L2T cache, we always want to flush
+ * the whole thing.
+ */
+ V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
+ V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
+}
+
+/* Sets invariant state for the HW. */
+static void
+v3d_init_hw_state(struct v3d_dev *v3d)
+{
+ v3d_init_core(v3d, 0);
+}
+
+static void
+v3d_idle_axi(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ);
+
+ if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) &
+ (V3D_GMP_STATUS_RD_COUNT_MASK |
+ V3D_GMP_STATUS_WR_COUNT_MASK |
+ V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
+ DRM_ERROR("Failed to wait for safe GMP shutdown\n");
+ }
+}
+
+static void
+v3d_idle_gca(struct v3d_dev *v3d)
+{
+ if (v3d->ver >= 41)
+ return;
+
+ V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
+
+ if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
+ V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
+ V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
+ DRM_ERROR("Failed to wait for safe GCA shutdown\n");
+ }
+}
+
+static void
+v3d_reset_v3d(struct v3d_dev *v3d)
+{
+ int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
+
+ if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
+ V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
+
+ /* GFXH-1383: The SW_INIT may cause a stray write to address 0
+ * of the unit, so reset it to its power-on value here.
+ */
+ V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
+ } else {
+ WARN_ON_ONCE(V3D_GET_FIELD(version,
+ V3D_TOP_GR_BRIDGE_MAJOR) != 7);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
+ V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
+ V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
+ }
+
+ v3d_init_hw_state(v3d);
+}
+
+void
+v3d_reset(struct v3d_dev *v3d)
+{
+ struct drm_device *dev = &v3d->drm;
+
+ DRM_ERROR("Resetting GPU.\n");
+ trace_v3d_reset_begin(dev);
+
+ /* XXX: only needed for safe powerdown, not reset. */
+ if (false)
+ v3d_idle_axi(v3d, 0);
+
+ v3d_idle_gca(v3d);
+ v3d_reset_v3d(v3d);
+
+ v3d_mmu_set_page_table(v3d);
+ v3d_irq_reset(v3d);
+
+ trace_v3d_reset_end(dev);
+}
+
+static void
+v3d_flush_l3(struct v3d_dev *v3d)
+{
+ if (v3d->ver < 41) {
+ u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
+
+ V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+ gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
+
+ if (v3d->ver < 33) {
+ V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
+ gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
+ }
+ }
+}
+
+/* Invalidates the (read-only) L2 cache. */
+static void
+v3d_invalidate_l2(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
+ V3D_L2CACTL_L2CCLR |
+ V3D_L2CACTL_L2CENA);
+}
+
+static void
+v3d_invalidate_l1td(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L1T write combiner flush\n");
+ }
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_flush_l2t(struct v3d_dev *v3d, int core)
+{
+ v3d_invalidate_l1td(v3d, core);
+
+ V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
+ V3D_L2TCACTL_L2TFLS |
+ V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L2T flush\n");
+ }
+}
+
+/* Invalidates the slice caches. These are read-only caches. */
+static void
+v3d_invalidate_slices(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
+ V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
+}
+
+/* Invalidates texture L2 cachelines */
+static void
+v3d_invalidate_l2t(struct v3d_dev *v3d, int core)
+{
+ V3D_CORE_WRITE(core,
+ V3D_CTL_L2TCACTL,
+ V3D_L2TCACTL_L2TFLS |
+ V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM));
+ if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
+ V3D_L2TCACTL_L2TFLS), 100)) {
+ DRM_ERROR("Timeout waiting for L2T invalidate\n");
+ }
+}
+
+void
+v3d_invalidate_caches(struct v3d_dev *v3d)
+{
+ v3d_flush_l3(v3d);
+
+ v3d_invalidate_l2(v3d, 0);
+ v3d_invalidate_slices(v3d, 0);
+ v3d_flush_l2t(v3d, 0);
+}
+
+void
+v3d_flush_caches(struct v3d_dev *v3d)
+{
+ v3d_invalidate_l1td(v3d, 0);
+ v3d_invalidate_l2t(v3d, 0);
+}
+
+static void
+v3d_attach_object_fences(struct v3d_exec_info *exec)
+{
+ struct dma_fence *out_fence = &exec->render.base.s_fence->finished;
+ struct v3d_bo *bo;
+ int i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ /* XXX: Use shared fences for read-only objects. */
+ reservation_object_add_excl_fence(bo->resv, out_fence);
+ }
+}
+
+static void
+v3d_unlock_bo_reservations(struct drm_device *dev,
+ struct v3d_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int i;
+
+ for (i = 0; i < exec->bo_count; i++) {
+ struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ ww_acquire_fini(acquire_ctx);
+}
+
+/* Takes the reservation lock on all the BOs being referenced, so that
+ * at queue submit time we can update the reservations.
+ *
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
+ * (all of which are on exec->unref_list). They're entirely private
+ * to v3d, so we don't attach dma-buf fences to them.
+ */
+static int
+v3d_lock_bo_reservations(struct drm_device *dev,
+ struct v3d_exec_info *exec,
+ struct ww_acquire_ctx *acquire_ctx)
+{
+ int contended_lock = -1;
+ int i, ret;
+ struct v3d_bo *bo;
+
+ ww_acquire_init(acquire_ctx, &reservation_ww_class);
+
+retry:
+ if (contended_lock != -1) {
+ bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+ ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
+ acquire_ctx);
+ if (ret) {
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ for (i = 0; i < exec->bo_count; i++) {
+ if (i == contended_lock)
+ continue;
+
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
+ if (ret) {
+ int j;
+
+ for (j = 0; j < i; j++) {
+ bo = to_v3d_bo(&exec->bo[j]->base);
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (contended_lock != -1 && contended_lock >= i) {
+ bo = to_v3d_bo(&exec->bo[contended_lock]->base);
+
+ ww_mutex_unlock(&bo->resv->lock);
+ }
+
+ if (ret == -EDEADLK) {
+ contended_lock = i;
+ goto retry;
+ }
+
+ ww_acquire_done(acquire_ctx);
+ return ret;
+ }
+ }
+
+ ww_acquire_done(acquire_ctx);
+
+ /* Reserve space for our shared (read-only) fence references,
+ * before we commit the CL to the hardware.
+ */
+ for (i = 0; i < exec->bo_count; i++) {
+ bo = to_v3d_bo(&exec->bo[i]->base);
+
+ ret = reservation_object_reserve_shared(bo->resv);
+ if (ret) {
+ v3d_unlock_bo_reservations(dev, exec, acquire_ctx);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects
+ * referenced by the job.
+ * @dev: DRM device
+ * @file_priv: DRM file for this fd
+ * @exec: V3D job being set up
+ *
+ * The command validator needs to reference BOs by their index within
+ * the submitted job's BO list. This does the validation of the job's
+ * BO list and reference counting for the lifetime of the job.
+ *
+ * Note that this function doesn't need to unreference the BOs on
+ * failure, because that will happen at v3d_exec_cleanup() time.
+ */
+static int
+v3d_cl_lookup_bos(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_v3d_submit_cl *args,
+ struct v3d_exec_info *exec)
+{
+ u32 *handles;
+ int ret = 0;
+ int i;
+
+ exec->bo_count = args->bo_handle_count;
+
+ if (!exec->bo_count) {
+ /* See comment on bo_index for why we have to check
+ * this.
+ */
+ DRM_DEBUG("Rendering requires BOs\n");
+ return -EINVAL;
+ }
+
+ exec->bo = kvmalloc_array(exec->bo_count,
+ sizeof(struct drm_gem_cma_object *),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!exec->bo) {
+ DRM_DEBUG("Failed to allocate validated BO pointers\n");
+ return -ENOMEM;
+ }
+
+ handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL);
+ if (!handles) {
+ ret = -ENOMEM;
+ DRM_DEBUG("Failed to allocate incoming GEM handles\n");
+ goto fail;
+ }
+
+ if (copy_from_user(handles,
+ (void __user *)(uintptr_t)args->bo_handles,
+ exec->bo_count * sizeof(u32))) {
+ ret = -EFAULT;
+ DRM_DEBUG("Failed to copy in GEM handles\n");
+ goto fail;
+ }
+
+ spin_lock(&file_priv->table_lock);
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
+ handles[i]);
+ if (!bo) {
+ DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
+ i, handles[i]);
+ ret = -ENOENT;
+ spin_unlock(&file_priv->table_lock);
+ goto fail;
+ }
+ drm_gem_object_get(bo);
+ exec->bo[i] = to_v3d_bo(bo);
+ }
+ spin_unlock(&file_priv->table_lock);
+
+fail:
+ kvfree(handles);
+ return ret;
+}
+
+static void
+v3d_exec_cleanup(struct kref *ref)
+{
+ struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info,
+ refcount);
+ struct v3d_dev *v3d = exec->v3d;
+ unsigned int i;
+ struct v3d_bo *bo, *save;
+
+ dma_fence_put(exec->bin.in_fence);
+ dma_fence_put(exec->render.in_fence);
+
+ dma_fence_put(exec->bin.done_fence);
+ dma_fence_put(exec->render.done_fence);
+
+ dma_fence_put(exec->bin_done_fence);
+
+ for (i = 0; i < exec->bo_count; i++)
+ drm_gem_object_put_unlocked(&exec->bo[i]->base);
+ kvfree(exec->bo);
+
+ list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) {
+ drm_gem_object_put_unlocked(&bo->base);
+ }
+
+ pm_runtime_mark_last_busy(v3d->dev);
+ pm_runtime_put_autosuspend(v3d->dev);
+
+ kfree(exec);
+}
+
+void v3d_exec_put(struct v3d_exec_info *exec)
+{
+ kref_put(&exec->refcount, v3d_exec_cleanup);
+}
+
+int
+v3d_wait_bo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ int ret;
+ struct drm_v3d_wait_bo *args = data;
+ struct drm_gem_object *gem_obj;
+ struct v3d_bo *bo;
+ ktime_t start = ktime_get();
+ u64 delta_ns;
+ unsigned long timeout_jiffies =
+ nsecs_to_jiffies_timeout(args->timeout_ns);
+
+ if (args->pad != 0)
+ return -EINVAL;
+
+ gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+ if (!gem_obj) {
+ DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+ return -EINVAL;
+ }
+ bo = to_v3d_bo(gem_obj);
+
+ ret = reservation_object_wait_timeout_rcu(bo->resv,
+ true, true,
+ timeout_jiffies);
+
+ if (ret == 0)
+ ret = -ETIME;
+ else if (ret > 0)
+ ret = 0;
+
+ /* Decrement the user's timeout, in case we got interrupted
+ * such that the ioctl will be restarted.
+ */
+ delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start));
+ if (delta_ns < args->timeout_ns)
+ args->timeout_ns -= delta_ns;
+ else
+ args->timeout_ns = 0;
+
+ /* Asked to wait beyond the jiffie/scheduler precision? */
+ if (ret == -ETIME && args->timeout_ns)
+ ret = -EAGAIN;
+
+ drm_gem_object_put_unlocked(gem_obj);
+
+ return ret;
+}
+
+/**
+ * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D.
+ * @dev: DRM device
+ * @data: ioctl argument
+ * @file_priv: DRM file for this fd
+ *
+ * This is the main entrypoint for userspace to submit a 3D frame to
+ * the GPU. Userspace provides the binner command list (if
+ * applicable), and the kernel sets up the render command list to draw
+ * to the framebuffer described in the ioctl, using the command lists
+ * that the 3D engine's binner will produce.
+ */
+int
+v3d_submit_cl_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ struct v3d_file_priv *v3d_priv = file_priv->driver_priv;
+ struct drm_v3d_submit_cl *args = data;
+ struct v3d_exec_info *exec;
+ struct ww_acquire_ctx acquire_ctx;
+ struct drm_syncobj *sync_out;
+ int ret = 0;
+
+ if (args->pad != 0) {
+ DRM_INFO("pad must be zero: %d\n", args->pad);
+ return -EINVAL;
+ }
+
+ exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
+ if (!exec)
+ return -ENOMEM;
+
+ ret = pm_runtime_get_sync(v3d->dev);
+ if (ret < 0) {
+ kfree(exec);
+ return ret;
+ }
+
+ kref_init(&exec->refcount);
+
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl,
+ &exec->bin.in_fence);
+ if (ret == -EINVAL)
+ goto fail;
+
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl,
+ &exec->render.in_fence);
+ if (ret == -EINVAL)
+ goto fail;
+
+ exec->qma = args->qma;
+ exec->qms = args->qms;
+ exec->qts = args->qts;
+ exec->bin.exec = exec;
+ exec->bin.start = args->bcl_start;
+ exec->bin.end = args->bcl_end;
+ exec->render.exec = exec;
+ exec->render.start = args->rcl_start;
+ exec->render.end = args->rcl_end;
+ exec->v3d = v3d;
+ INIT_LIST_HEAD(&exec->unref_list);
+
+ ret = v3d_cl_lookup_bos(dev, file_priv, args, exec);
+ if (ret)
+ goto fail;
+
+ ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx);
+ if (ret)
+ goto fail;
+
+ if (exec->bin.start != exec->bin.end) {
+ ret = drm_sched_job_init(&exec->bin.base,
+ &v3d->queue[V3D_BIN].sched,
+ &v3d_priv->sched_entity[V3D_BIN],
+ v3d_priv);
+ if (ret)
+ goto fail_unreserve;
+
+ exec->bin_done_fence =
+ dma_fence_get(&exec->bin.base.s_fence->finished);
+
+ kref_get(&exec->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&exec->bin.base,
+ &v3d_priv->sched_entity[V3D_BIN]);
+ }
+
+ ret = drm_sched_job_init(&exec->render.base,
+ &v3d->queue[V3D_RENDER].sched,
+ &v3d_priv->sched_entity[V3D_RENDER],
+ v3d_priv);
+ if (ret)
+ goto fail_unreserve;
+
+ kref_get(&exec->refcount); /* put by scheduler job completion */
+ drm_sched_entity_push_job(&exec->render.base,
+ &v3d_priv->sched_entity[V3D_RENDER]);
+
+ v3d_attach_object_fences(exec);
+
+ v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+
+ /* Update the return sync object for the */
+ sync_out = drm_syncobj_find(file_priv, args->out_sync);
+ if (sync_out) {
+ drm_syncobj_replace_fence(sync_out,
+ &exec->render.base.s_fence->finished);
+ drm_syncobj_put(sync_out);
+ }
+
+ v3d_exec_put(exec);
+
+ return 0;
+
+fail_unreserve:
+ v3d_unlock_bo_reservations(dev, exec, &acquire_ctx);
+fail:
+ v3d_exec_put(exec);
+
+ return ret;
+}
+
+int
+v3d_gem_init(struct drm_device *dev)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ u32 pt_size = 4096 * 1024;
+ int ret, i;
+
+ for (i = 0; i < V3D_MAX_QUEUES; i++)
+ v3d->queue[i].fence_context = dma_fence_context_alloc(1);
+
+ spin_lock_init(&v3d->mm_lock);
+ spin_lock_init(&v3d->job_lock);
+ mutex_init(&v3d->bo_lock);
+ mutex_init(&v3d->reset_lock);
+
+ /* Note: We don't allocate address 0. Various bits of HW
+ * treat 0 as special, such as the occlusion query counters
+ * where 0 means "disabled".
+ */
+ drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1);
+
+ v3d->pt = dma_alloc_wc(v3d->dev, pt_size,
+ &v3d->pt_paddr,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
+ if (!v3d->pt) {
+ drm_mm_takedown(&v3d->mm);
+ dev_err(v3d->dev,
+ "Failed to allocate page tables. "
+ "Please ensure you have CMA enabled.\n");
+ return -ENOMEM;
+ }
+
+ v3d_init_hw_state(v3d);
+ v3d_mmu_set_page_table(v3d);
+
+ ret = v3d_sched_init(v3d);
+ if (ret) {
+ drm_mm_takedown(&v3d->mm);
+ dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt,
+ v3d->pt_paddr);
+ }
+
+ return 0;
+}
+
+void
+v3d_gem_destroy(struct drm_device *dev)
+{
+ struct v3d_dev *v3d = to_v3d_dev(dev);
+ enum v3d_queue q;
+
+ v3d_sched_fini(v3d);
+
+ /* Waiting for exec to finish would need to be done before
+ * unregistering V3D.
+ */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ WARN_ON(v3d->queue[q].emit_seqno !=
+ v3d->queue[q].finished_seqno);
+ }
+
+ drm_mm_takedown(&v3d->mm);
+
+ dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
new file mode 100644
index 0000000..77e1fa0
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2014-2018 Broadcom */
+
+/**
+ * DOC: Interrupt management for the V3D engine
+ *
+ * When we take a binning or rendering flush done interrupt, we need
+ * to signal the fence for that job so that the scheduler can queue up
+ * the next one and unblock any waiters.
+ *
+ * When we take the binner out of memory interrupt, we need to
+ * allocate some new memory and pass it to the binner so that the
+ * current job can make progress.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \
+ V3D_INT_FLDONE | \
+ V3D_INT_FRDONE | \
+ V3D_INT_GMPV))
+
+#define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \
+ V3D_HUB_INT_MMU_PTI | \
+ V3D_HUB_INT_MMU_CAP))
+
+static void
+v3d_overflow_mem_work(struct work_struct *work)
+{
+ struct v3d_dev *v3d =
+ container_of(work, struct v3d_dev, overflow_mem_work);
+ struct drm_device *dev = &v3d->drm;
+ struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024);
+ unsigned long irqflags;
+
+ if (IS_ERR(bo)) {
+ DRM_ERROR("Couldn't allocate binner overflow mem\n");
+ return;
+ }
+
+ /* We lost a race, and our work task came in after the bin job
+ * completed and exited. This can happen because the HW
+ * signals OOM before it's fully OOM, so the binner might just
+ * barely complete.
+ *
+ * If we lose the race and our work task comes in after a new
+ * bin job got scheduled, that's fine. We'll just give them
+ * some binner pool anyway.
+ */
+ spin_lock_irqsave(&v3d->job_lock, irqflags);
+ if (!v3d->bin_job) {
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ goto out;
+ }
+
+ drm_gem_object_get(&bo->base);
+ list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list);
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+ V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT);
+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size);
+
+out:
+ drm_gem_object_put_unlocked(&bo->base);
+}
+
+static irqreturn_t
+v3d_irq(int irq, void *arg)
+{
+ struct v3d_dev *v3d = arg;
+ u32 intsts;
+ irqreturn_t status = IRQ_NONE;
+
+ intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS);
+
+ /* Acknowledge the interrupts we're handling here. */
+ V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts);
+
+ if (intsts & V3D_INT_OUTOMEM) {
+ /* Note that the OOM status is edge signaled, so the
+ * interrupt won't happen again until the we actually
+ * add more memory.
+ */
+ schedule_work(&v3d->overflow_mem_work);
+ status = IRQ_HANDLED;
+ }
+
+ if (intsts & V3D_INT_FLDONE) {
+ v3d->queue[V3D_BIN].finished_seqno++;
+ dma_fence_signal(v3d->bin_job->bin.done_fence);
+ status = IRQ_HANDLED;
+ }
+
+ if (intsts & V3D_INT_FRDONE) {
+ v3d->queue[V3D_RENDER].finished_seqno++;
+ dma_fence_signal(v3d->render_job->render.done_fence);
+
+ status = IRQ_HANDLED;
+ }
+
+ /* We shouldn't be triggering these if we have GMP in
+ * always-allowed mode.
+ */
+ if (intsts & V3D_INT_GMPV)
+ dev_err(v3d->dev, "GMP violation\n");
+
+ return status;
+}
+
+static irqreturn_t
+v3d_hub_irq(int irq, void *arg)
+{
+ struct v3d_dev *v3d = arg;
+ u32 intsts;
+ irqreturn_t status = IRQ_NONE;
+
+ intsts = V3D_READ(V3D_HUB_INT_STS);
+
+ /* Acknowledge the interrupts we're handling here. */
+ V3D_WRITE(V3D_HUB_INT_CLR, intsts);
+
+ if (intsts & (V3D_HUB_INT_MMU_WRV |
+ V3D_HUB_INT_MMU_PTI |
+ V3D_HUB_INT_MMU_CAP)) {
+ u32 axi_id = V3D_READ(V3D_MMU_VIO_ID);
+ u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8;
+
+ dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n",
+ axi_id, (long long)vio_addr,
+ ((intsts & V3D_HUB_INT_MMU_WRV) ?
+ ", write violation" : ""),
+ ((intsts & V3D_HUB_INT_MMU_PTI) ?
+ ", pte invalid" : ""),
+ ((intsts & V3D_HUB_INT_MMU_CAP) ?
+ ", cap exceeded" : ""));
+ status = IRQ_HANDLED;
+ }
+
+ return status;
+}
+
+void
+v3d_irq_init(struct v3d_dev *v3d)
+{
+ int ret, core;
+
+ INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work);
+
+ /* Clear any pending interrupts someone might have left around
+ * for us.
+ */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+ V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+ ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0),
+ v3d_hub_irq, IRQF_SHARED,
+ "v3d_hub", v3d);
+ ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1),
+ v3d_irq, IRQF_SHARED,
+ "v3d_core0", v3d);
+ if (ret)
+ dev_err(v3d->dev, "IRQ setup failed: %d\n", ret);
+
+ v3d_irq_enable(v3d);
+}
+
+void
+v3d_irq_enable(struct v3d_dev *v3d)
+{
+ int core;
+
+ /* Enable our set of interrupts, masking out any others. */
+ for (core = 0; core < v3d->cores; core++) {
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS);
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS);
+ }
+
+ V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS);
+ V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS);
+}
+
+void
+v3d_irq_disable(struct v3d_dev *v3d)
+{
+ int core;
+
+ /* Disable all interrupts. */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0);
+ V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0);
+
+ /* Clear any pending interrupts we might have left. */
+ for (core = 0; core < v3d->cores; core++)
+ V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS);
+ V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS);
+
+ cancel_work_sync(&v3d->overflow_mem_work);
+}
+
+/** Reinitializes interrupt registers when a GPU reset is performed. */
+void v3d_irq_reset(struct v3d_dev *v3d)
+{
+ v3d_irq_enable(v3d);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
new file mode 100644
index 0000000..b00f97c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D MMU
+ *
+ * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has
+ * a single level of page tables for the V3D's 4GB address space to
+ * map to AXI bus addresses, thus it could need up to 4MB of
+ * physically contiguous memory to store the PTEs.
+ *
+ * Because the 4MB of contiguous memory for page tables is precious,
+ * and switching between them is expensive, we load all BOs into the
+ * same 4GB address space.
+ *
+ * To protect clients from each other, we should use the GMP to
+ * quickly mask out (at 128kb granularity) what pages are available to
+ * each client. This is not yet implemented.
+ */
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+
+#define V3D_MMU_PAGE_SHIFT 12
+
+/* Note: All PTEs for the 1MB superpage must be filled with the
+ * superpage bit set.
+ */
+#define V3D_PTE_SUPERPAGE BIT(31)
+#define V3D_PTE_WRITEABLE BIT(29)
+#define V3D_PTE_VALID BIT(28)
+
+static int v3d_mmu_flush_all(struct v3d_dev *v3d)
+{
+ int ret;
+
+ /* Make sure that another flush isn't already running when we
+ * start this one.
+ */
+ ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+ V3D_MMU_CTL_TLB_CLEARING), 100);
+ if (ret)
+ dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n");
+
+ V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
+ V3D_MMU_CTL_TLB_CLEAR);
+
+ V3D_WRITE(V3D_MMUC_CONTROL,
+ V3D_MMUC_CONTROL_FLUSH |
+ V3D_MMUC_CONTROL_ENABLE);
+
+ ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+ V3D_MMU_CTL_TLB_CLEARING), 100);
+ if (ret) {
+ dev_err(v3d->dev, "TLB clear wait idle failed\n");
+ return ret;
+ }
+
+ ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
+ V3D_MMUC_CONTROL_FLUSHING), 100);
+ if (ret)
+ dev_err(v3d->dev, "MMUC flush wait idle failed\n");
+
+ return ret;
+}
+
+int v3d_mmu_set_page_table(struct v3d_dev *v3d)
+{
+ V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT);
+ V3D_WRITE(V3D_MMU_CTL,
+ V3D_MMU_CTL_ENABLE |
+ V3D_MMU_CTL_PT_INVALID |
+ V3D_MMU_CTL_PT_INVALID_ABORT |
+ V3D_MMU_CTL_WRITE_VIOLATION_ABORT |
+ V3D_MMU_CTL_CAP_EXCEEDED_ABORT);
+ V3D_WRITE(V3D_MMU_ILLEGAL_ADDR,
+ (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) |
+ V3D_MMU_ILLEGAL_ADDR_ENABLE);
+ V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE);
+
+ return v3d_mmu_flush_all(v3d);
+}
+
+void v3d_mmu_insert_ptes(struct v3d_bo *bo)
+{
+ struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+ u32 page = bo->node.start;
+ u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
+ unsigned int count;
+ struct scatterlist *sgl;
+
+ for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) {
+ u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT;
+ u32 pte = page_prot | page_address;
+ u32 i;
+
+ BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >=
+ BIT(24));
+
+ for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++)
+ v3d->pt[page++] = pte + i;
+ }
+
+ WARN_ON_ONCE(page - bo->node.start !=
+ bo->base.size >> V3D_MMU_PAGE_SHIFT);
+
+ if (v3d_mmu_flush_all(v3d))
+ dev_err(v3d->dev, "MMU flush timeout\n");
+}
+
+void v3d_mmu_remove_ptes(struct v3d_bo *bo)
+{
+ struct v3d_dev *v3d = to_v3d_dev(bo->base.dev);
+ u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT;
+ u32 page;
+
+ for (page = bo->node.start; page < bo->node.start + npages; page++)
+ v3d->pt[page] = 0;
+
+ if (v3d_mmu_flush_all(v3d))
+ dev_err(v3d->dev, "MMU flush timeout\n");
+}
diff --git a/drivers/gpu/drm/v3d/v3d_regs.h b/drivers/gpu/drm/v3d/v3d_regs.h
new file mode 100644
index 0000000..fc13282
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_regs.h
@@ -0,0 +1,295 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2017-2018 Broadcom */
+
+#ifndef V3D_REGS_H
+#define V3D_REGS_H
+
+#include <linux/bitops.h>
+
+#define V3D_MASK(high, low) ((u32)GENMASK(high, low))
+/* Using the GNU statement expression extension */
+#define V3D_SET_FIELD(value, field) \
+ ({ \
+ u32 fieldval = (value) << field##_SHIFT; \
+ WARN_ON((fieldval & ~field##_MASK) != 0); \
+ fieldval & field##_MASK; \
+ })
+
+#define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \
+ field##_SHIFT)
+
+/* Hub registers for shared hardware between V3D cores. */
+
+#define V3D_HUB_AXICFG 0x00000
+# define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0)
+# define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0
+#define V3D_HUB_UIFCFG 0x00004
+#define V3D_HUB_IDENT0 0x00008
+
+#define V3D_HUB_IDENT1 0x0000c
+# define V3D_HUB_IDENT1_WITH_MSO BIT(19)
+# define V3D_HUB_IDENT1_WITH_TSY BIT(18)
+# define V3D_HUB_IDENT1_WITH_TFU BIT(17)
+# define V3D_HUB_IDENT1_WITH_L3C BIT(16)
+# define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12)
+# define V3D_HUB_IDENT1_NHOSTS_SHIFT 12
+# define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8)
+# define V3D_HUB_IDENT1_NCORES_SHIFT 8
+# define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4)
+# define V3D_HUB_IDENT1_REV_SHIFT 4
+# define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0)
+# define V3D_HUB_IDENT1_TVER_SHIFT 0
+
+#define V3D_HUB_IDENT2 0x00010
+# define V3D_HUB_IDENT2_WITH_MMU BIT(8)
+# define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0)
+# define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0
+
+#define V3D_HUB_IDENT3 0x00014
+# define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8)
+# define V3D_HUB_IDENT3_IPREV_SHIFT 8
+# define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0)
+# define V3D_HUB_IDENT3_IPIDX_SHIFT 0
+
+#define V3D_HUB_INT_STS 0x00050
+#define V3D_HUB_INT_SET 0x00054
+#define V3D_HUB_INT_CLR 0x00058
+#define V3D_HUB_INT_MSK_STS 0x0005c
+#define V3D_HUB_INT_MSK_SET 0x00060
+#define V3D_HUB_INT_MSK_CLR 0x00064
+# define V3D_HUB_INT_MMU_WRV BIT(5)
+# define V3D_HUB_INT_MMU_PTI BIT(4)
+# define V3D_HUB_INT_MMU_CAP BIT(3)
+# define V3D_HUB_INT_MSO BIT(2)
+# define V3D_HUB_INT_TFUC BIT(1)
+# define V3D_HUB_INT_TFUF BIT(0)
+
+#define V3D_GCA_CACHE_CTRL 0x0000c
+# define V3D_GCA_CACHE_CTRL_FLUSH BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN 0x000b0
+# define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0)
+
+#define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4
+# define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3
+
+# define V3D_TOP_GR_BRIDGE_REVISION 0x00000
+# define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8)
+# define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8
+# define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0)
+# define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0
+
+/* 7268 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008
+# define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0)
+/* 7278 reset reg */
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c
+# define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0)
+
+/* Per-MMU registers. */
+
+#define V3D_MMUC_CONTROL 0x01000
+# define V3D_MMUC_CONTROL_CLEAR BIT(3)
+# define V3D_MMUC_CONTROL_FLUSHING BIT(2)
+# define V3D_MMUC_CONTROL_FLUSH BIT(1)
+# define V3D_MMUC_CONTROL_ENABLE BIT(0)
+
+#define V3D_MMU_CTL 0x01200
+# define V3D_MMU_CTL_CAP_EXCEEDED BIT(27)
+# define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26)
+# define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25)
+# define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24)
+# define V3D_MMU_CTL_PT_INVALID BIT(20)
+# define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19)
+# define V3D_MMU_CTL_PT_INVALID_INT BIT(18)
+# define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17)
+# define V3D_MMU_CTL_WRITE_VIOLATION BIT(16)
+# define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11)
+# define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10)
+# define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9)
+# define V3D_MMU_CTL_TLB_CLEARING BIT(7)
+# define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3)
+# define V3D_MMU_CTL_TLB_CLEAR BIT(2)
+# define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1)
+# define V3D_MMU_CTL_ENABLE BIT(0)
+
+#define V3D_MMU_PT_PA_BASE 0x01204
+#define V3D_MMU_HIT 0x01208
+#define V3D_MMU_MISSES 0x0120c
+#define V3D_MMU_STALLS 0x01210
+
+#define V3D_MMU_ADDR_CAP 0x01214
+# define V3D_MMU_ADDR_CAP_ENABLE BIT(31)
+# define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0)
+# define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0
+
+#define V3D_MMU_SHOOT_DOWN 0x01218
+# define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29)
+# define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28)
+# define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0)
+# define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0
+
+#define V3D_MMU_BYPASS_START 0x0121c
+#define V3D_MMU_BYPASS_END 0x01220
+
+/* AXI ID of the access that faulted */
+#define V3D_MMU_VIO_ID 0x0122c
+
+/* Address for illegal PTEs to return */
+#define V3D_MMU_ILLEGAL_ADDR 0x01230
+# define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31)
+
+/* Address that faulted */
+#define V3D_MMU_VIO_ADDR 0x01234
+
+/* Per-V3D-core registers */
+
+#define V3D_CTL_IDENT0 0x00000
+# define V3D_IDENT0_VER_MASK V3D_MASK(31, 24)
+# define V3D_IDENT0_VER_SHIFT 24
+
+#define V3D_CTL_IDENT1 0x00004
+/* Multiples of 1kb */
+# define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28)
+# define V3D_IDENT1_VPM_SIZE_SHIFT 28
+# define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16)
+# define V3D_IDENT1_NSEM_SHIFT 16
+# define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12)
+# define V3D_IDENT1_NTMU_SHIFT 12
+# define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8)
+# define V3D_IDENT1_QUPS_SHIFT 8
+# define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4)
+# define V3D_IDENT1_NSLC_SHIFT 4
+# define V3D_IDENT1_REV_MASK V3D_MASK(3, 0)
+# define V3D_IDENT1_REV_SHIFT 0
+
+#define V3D_CTL_IDENT2 0x00008
+# define V3D_IDENT2_BCG_INT BIT(28)
+
+#define V3D_CTL_MISCCFG 0x00018
+# define V3D_MISCCFG_OVRTMUOUT BIT(0)
+
+#define V3D_CTL_L2CACTL 0x00020
+# define V3D_L2CACTL_L2CCLR BIT(2)
+# define V3D_L2CACTL_L2CDIS BIT(1)
+# define V3D_L2CACTL_L2CENA BIT(0)
+
+#define V3D_CTL_SLCACTL 0x00024
+# define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24)
+# define V3D_SLCACTL_TVCCS_SHIFT 24
+# define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16)
+# define V3D_SLCACTL_TDCCS_SHIFT 16
+# define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8)
+# define V3D_SLCACTL_UCC_SHIFT 8
+# define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0)
+# define V3D_SLCACTL_ICC_SHIFT 0
+
+#define V3D_CTL_L2TCACTL 0x00030
+# define V3D_L2TCACTL_TMUWCF BIT(8)
+# define V3D_L2TCACTL_L2T_NO_WM BIT(4)
+# define V3D_L2TCACTL_FLM_FLUSH 0
+# define V3D_L2TCACTL_FLM_CLEAR 1
+# define V3D_L2TCACTL_FLM_CLEAN 2
+# define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1)
+# define V3D_L2TCACTL_FLM_SHIFT 1
+# define V3D_L2TCACTL_L2TFLS BIT(0)
+#define V3D_CTL_L2TFLSTA 0x00034
+#define V3D_CTL_L2TFLEND 0x00038
+
+#define V3D_CTL_INT_STS 0x00050
+#define V3D_CTL_INT_SET 0x00054
+#define V3D_CTL_INT_CLR 0x00058
+#define V3D_CTL_INT_MSK_STS 0x0005c
+#define V3D_CTL_INT_MSK_SET 0x00060
+#define V3D_CTL_INT_MSK_CLR 0x00064
+# define V3D_INT_QPU_MASK V3D_MASK(27, 16)
+# define V3D_INT_QPU_SHIFT 16
+# define V3D_INT_GMPV BIT(5)
+# define V3D_INT_TRFB BIT(4)
+# define V3D_INT_SPILLUSE BIT(3)
+# define V3D_INT_OUTOMEM BIT(2)
+# define V3D_INT_FLDONE BIT(1)
+# define V3D_INT_FRDONE BIT(0)
+
+#define V3D_CLE_CT0CS 0x00100
+#define V3D_CLE_CT1CS 0x00104
+#define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n)
+#define V3D_CLE_CT0EA 0x00108
+#define V3D_CLE_CT1EA 0x0010c
+#define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n)
+#define V3D_CLE_CT0CA 0x00110
+#define V3D_CLE_CT1CA 0x00114
+#define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n)
+#define V3D_CLE_CT0RA 0x00118
+#define V3D_CLE_CT1RA 0x0011c
+#define V3D_CLE_CT0LC 0x00120
+#define V3D_CLE_CT1LC 0x00124
+#define V3D_CLE_CT0PC 0x00128
+#define V3D_CLE_CT1PC 0x0012c
+#define V3D_CLE_PCS 0x00130
+#define V3D_CLE_BFC 0x00134
+#define V3D_CLE_RFC 0x00138
+#define V3D_CLE_TFBC 0x0013c
+#define V3D_CLE_TFIT 0x00140
+#define V3D_CLE_CT1CFG 0x00144
+#define V3D_CLE_CT1TILECT 0x00148
+#define V3D_CLE_CT1TSKIP 0x0014c
+#define V3D_CLE_CT1PTCT 0x00150
+#define V3D_CLE_CT0SYNC 0x00154
+#define V3D_CLE_CT1SYNC 0x00158
+#define V3D_CLE_CT0QTS 0x0015c
+# define V3D_CLE_CT0QTS_ENABLE BIT(1)
+#define V3D_CLE_CT0QBA 0x00160
+#define V3D_CLE_CT1QBA 0x00164
+#define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n)
+#define V3D_CLE_CT0QEA 0x00168
+#define V3D_CLE_CT1QEA 0x0016c
+#define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n)
+#define V3D_CLE_CT0QMA 0x00170
+#define V3D_CLE_CT0QMS 0x00174
+#define V3D_CLE_CT1QCFG 0x00178
+/* If set without ETPROC, entirely skip tiles with no primitives. */
+# define V3D_CLE_QCFG_ETFILT BIT(7)
+/* If set with ETFILT, just write the clear color to tiles with no
+ * primitives.
+ */
+# define V3D_CLE_QCFG_ETPROC BIT(6)
+# define V3D_CLE_QCFG_ETSFLUSH BIT(1)
+# define V3D_CLE_QCFG_MCDIS BIT(0)
+
+#define V3D_PTB_BPCA 0x00300
+#define V3D_PTB_BPCS 0x00304
+#define V3D_PTB_BPOA 0x00308
+#define V3D_PTB_BPOS 0x0030c
+
+#define V3D_PTB_BXCF 0x00310
+# define V3D_PTB_BXCF_RWORDERDISA BIT(1)
+# define V3D_PTB_BXCF_CLIPDISA BIT(0)
+
+#define V3D_GMP_STATUS 0x00800
+# define V3D_GMP_STATUS_GMPRST BIT(31)
+# define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24)
+# define V3D_GMP_STATUS_WR_COUNT_SHIFT 24
+# define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16)
+# define V3D_GMP_STATUS_RD_COUNT_SHIFT 16
+# define V3D_GMP_STATUS_WR_ACTIVE BIT(5)
+# define V3D_GMP_STATUS_RD_ACTIVE BIT(4)
+# define V3D_GMP_STATUS_CFG_BUSY BIT(3)
+# define V3D_GMP_STATUS_CNTOVF BIT(2)
+# define V3D_GMP_STATUS_INVPROT BIT(1)
+# define V3D_GMP_STATUS_VIO BIT(0)
+
+#define V3D_GMP_CFG 0x00804
+# define V3D_GMP_CFG_LBURSTEN BIT(3)
+# define V3D_GMP_CFG_PGCRSEN BIT()
+# define V3D_GMP_CFG_STOP_REQ BIT(1)
+# define V3D_GMP_CFG_PROT_ENABLE BIT(0)
+
+#define V3D_GMP_VIO_ADDR 0x00808
+#define V3D_GMP_VIO_TYPE 0x0080c
+#define V3D_GMP_TABLE_ADDR 0x00810
+#define V3D_GMP_CLEAR_LOAD 0x00814
+#define V3D_GMP_PRESERVE_LOAD 0x00818
+#define V3D_GMP_VALID_LINES 0x00820
+
+#endif /* V3D_REGS_H */
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
new file mode 100644
index 0000000..b07bece
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2018 Broadcom */
+
+/**
+ * DOC: Broadcom V3D scheduling
+ *
+ * The shared DRM GPU scheduler is used to coordinate submitting jobs
+ * to the hardware. Each DRM fd (roughly a client process) gets its
+ * own scheduler entity, which will process jobs in order. The GPU
+ * scheduler will round-robin between clients to submit the next job.
+ *
+ * For simplicity, and in order to keep latency low for interactive
+ * jobs when bulk background jobs are queued up, we submit a new job
+ * to the HW only when it has completed the last one, instead of
+ * filling up the CT[01]Q FIFOs with jobs. Similarly, we use
+ * v3d_job_dependency() to manage the dependency between bin and
+ * render, instead of having the clients submit jobs with using the
+ * HW's semaphores to interlock between them.
+ */
+
+#include <linux/kthread.h>
+
+#include "v3d_drv.h"
+#include "v3d_regs.h"
+#include "v3d_trace.h"
+
+static struct v3d_job *
+to_v3d_job(struct drm_sched_job *sched_job)
+{
+ return container_of(sched_job, struct v3d_job, base);
+}
+
+static void
+v3d_job_free(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+
+ v3d_exec_put(job->exec);
+}
+
+/**
+ * Returns the fences that the bin job depends on, one by one.
+ * v3d_job_run() won't be called until all of them have been signaled.
+ */
+static struct dma_fence *
+v3d_job_dependency(struct drm_sched_job *sched_job,
+ struct drm_sched_entity *s_entity)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ struct dma_fence *fence;
+
+ fence = job->in_fence;
+ if (fence) {
+ job->in_fence = NULL;
+ return fence;
+ }
+
+ if (q == V3D_RENDER) {
+ /* If we had a bin job, the render job definitely depends on
+ * it. We first have to wait for bin to be scheduled, so that
+ * its done_fence is created.
+ */
+ fence = exec->bin_done_fence;
+ if (fence) {
+ exec->bin_done_fence = NULL;
+ return fence;
+ }
+ }
+
+ /* XXX: Wait on a fence for switching the GMP if necessary,
+ * and then do so.
+ */
+
+ return fence;
+}
+
+static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER;
+ struct v3d_dev *v3d = exec->v3d;
+ struct drm_device *dev = &v3d->drm;
+ struct dma_fence *fence;
+ unsigned long irqflags;
+
+ if (unlikely(job->base.s_fence->finished.error))
+ return NULL;
+
+ /* Lock required around bin_job update vs
+ * v3d_overflow_mem_work().
+ */
+ spin_lock_irqsave(&v3d->job_lock, irqflags);
+ if (q == V3D_BIN) {
+ v3d->bin_job = job->exec;
+
+ /* Clear out the overflow allocation, so we don't
+ * reuse the overflow attached to a previous job.
+ */
+ V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0);
+ } else {
+ v3d->render_job = job->exec;
+ }
+ spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+
+ /* Can we avoid this flush when q==RENDER? We need to be
+ * careful of scheduling, though -- imagine job0 rendering to
+ * texture and job1 reading, and them being executed as bin0,
+ * bin1, render0, render1, so that render1's flush at bin time
+ * wasn't enough.
+ */
+ v3d_invalidate_caches(v3d);
+
+ fence = v3d_fence_create(v3d, q);
+ if (!fence)
+ return fence;
+
+ if (job->done_fence)
+ dma_fence_put(job->done_fence);
+ job->done_fence = dma_fence_get(fence);
+
+ trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno,
+ job->start, job->end);
+
+ if (q == V3D_BIN) {
+ if (exec->qma) {
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma);
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms);
+ }
+ if (exec->qts) {
+ V3D_CORE_WRITE(0, V3D_CLE_CT0QTS,
+ V3D_CLE_CT0QTS_ENABLE |
+ exec->qts);
+ }
+ } else {
+ /* XXX: Set the QCFG */
+ }
+
+ /* Set the current and end address of the control list.
+ * Writing the end register is what starts the job.
+ */
+ V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start);
+ V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end);
+
+ return fence;
+}
+
+static void
+v3d_job_timedout(struct drm_sched_job *sched_job)
+{
+ struct v3d_job *job = to_v3d_job(sched_job);
+ struct v3d_exec_info *exec = job->exec;
+ struct v3d_dev *v3d = exec->v3d;
+ enum v3d_queue q;
+
+ mutex_lock(&v3d->reset_lock);
+
+ /* block scheduler */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ struct drm_gpu_scheduler *sched = &v3d->queue[q].sched;
+
+ kthread_park(sched->thread);
+ drm_sched_hw_job_reset(sched, (sched_job->sched == sched ?
+ sched_job : NULL));
+ }
+
+ /* get the GPU back into the init state */
+ v3d_reset(v3d);
+
+ /* Unblock schedulers and restart their jobs. */
+ for (q = 0; q < V3D_MAX_QUEUES; q++) {
+ drm_sched_job_recovery(&v3d->queue[q].sched);
+ kthread_unpark(v3d->queue[q].sched.thread);
+ }
+
+ mutex_unlock(&v3d->reset_lock);
+}
+
+static const struct drm_sched_backend_ops v3d_sched_ops = {
+ .dependency = v3d_job_dependency,
+ .run_job = v3d_job_run,
+ .timedout_job = v3d_job_timedout,
+ .free_job = v3d_job_free
+};
+
+int
+v3d_sched_init(struct v3d_dev *v3d)
+{
+ int hw_jobs_limit = 1;
+ int job_hang_limit = 0;
+ int hang_limit_ms = 500;
+ int ret;
+
+ ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
+ &v3d_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_bin");
+ if (ret) {
+ dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret);
+ return ret;
+ }
+
+ ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
+ &v3d_sched_ops,
+ hw_jobs_limit, job_hang_limit,
+ msecs_to_jiffies(hang_limit_ms),
+ "v3d_render");
+ if (ret) {
+ dev_err(v3d->dev, "Failed to create render scheduler: %d.",
+ ret);
+ drm_sched_fini(&v3d->queue[V3D_BIN].sched);
+ return ret;
+ }
+
+ return 0;
+}
+
+void
+v3d_sched_fini(struct v3d_dev *v3d)
+{
+ enum v3d_queue q;
+
+ for (q = 0; q < V3D_MAX_QUEUES; q++)
+ drm_sched_fini(&v3d->queue[q].sched);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_trace.h b/drivers/gpu/drm/v3d/v3d_trace.h
new file mode 100644
index 0000000..85dd351
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015-2018 Broadcom */
+
+#if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _V3D_TRACE_H_
+
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM v3d
+#define TRACE_INCLUDE_FILE v3d_trace
+
+TRACE_EVENT(v3d_submit_cl,
+ TP_PROTO(struct drm_device *dev, bool is_render,
+ uint64_t seqno,
+ u32 ctnqba, u32 ctnqea),
+ TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(bool, is_render)
+ __field(u64, seqno)
+ __field(u32, ctnqba)
+ __field(u32, ctnqea)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ __entry->is_render = is_render;
+ __entry->seqno = seqno;
+ __entry->ctnqba = ctnqba;
+ __entry->ctnqea = ctnqea;
+ ),
+
+ TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x",
+ __entry->dev,
+ __entry->is_render ? "RCL" : "BCL",
+ __entry->seqno,
+ __entry->ctnqba,
+ __entry->ctnqea)
+);
+
+TRACE_EVENT(v3d_reset_begin,
+ TP_PROTO(struct drm_device *dev),
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ ),
+
+ TP_printk("dev=%u",
+ __entry->dev)
+);
+
+TRACE_EVENT(v3d_reset_end,
+ TP_PROTO(struct drm_device *dev),
+ TP_ARGS(dev),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = dev->primary->index;
+ ),
+
+ TP_printk("dev=%u",
+ __entry->dev)
+);
+
+#endif /* _V3D_TRACE_H_ */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/v3d/v3d_trace_points.c b/drivers/gpu/drm/v3d/v3d_trace_points.c
new file mode 100644
index 0000000..482922d
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_trace_points.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2015 Broadcom */
+
+#include "v3d_drv.h"
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "v3d_trace.h"
+#endif
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 83d3b79..c8650bb 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -741,6 +741,7 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data)
struct vc4_async_flip_state {
struct drm_crtc *crtc;
struct drm_framebuffer *fb;
+ struct drm_framebuffer *old_fb;
struct drm_pending_vblank_event *event;
struct vc4_seqno_cb cb;
@@ -770,6 +771,23 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
drm_crtc_vblank_put(crtc);
drm_framebuffer_put(flip_state->fb);
+
+ /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
+ * when the planes are updated through the async update path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ if (flip_state->old_fb) {
+ struct drm_gem_cma_object *cma_bo;
+ struct vc4_bo *bo;
+
+ cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+ bo = to_vc4_bo(&cma_bo->base);
+ vc4_bo_dec_usecnt(bo);
+ drm_framebuffer_put(flip_state->old_fb);
+ }
+
kfree(flip_state);
up(&vc4->async_modeset);
@@ -794,9 +812,22 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+ /* Increment the BO usecnt here, so that we never end up with an
+ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+ * plane is later updated through the non-async path.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made prepare_fb()
+ * logic.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
- if (!flip_state)
+ if (!flip_state) {
+ vc4_bo_dec_usecnt(bo);
return -ENOMEM;
+ }
drm_framebuffer_get(fb);
flip_state->fb = fb;
@@ -807,10 +838,23 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
ret = down_interruptible(&vc4->async_modeset);
if (ret) {
drm_framebuffer_put(fb);
+ vc4_bo_dec_usecnt(bo);
kfree(flip_state);
return ret;
}
+ /* Save the current FB before it's replaced by the new one in
+ * drm_atomic_set_fb_for_plane(). We'll need the old FB in
+ * vc4_async_page_flip_complete() to decrement the BO usecnt and keep
+ * it consistent.
+ * FIXME: we should move to generic async-page-flip when it's
+ * available, so that we can get rid of this hand-made cleanup_fb()
+ * logic.
+ */
+ flip_state->old_fb = plane->state->fb;
+ if (flip_state->old_fb)
+ drm_framebuffer_get(flip_state->old_fb);
+
WARN_ON(drm_crtc_vblank_get(crtc) != 0);
/* Immediately update the plane's legacy fb pointer, so that later
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 40ddeaa..d9b8b70 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -175,7 +175,8 @@ static struct drm_driver vc4_drm_driver = {
DRIVER_GEM |
DRIVER_HAVE_IRQ |
DRIVER_RENDER |
- DRIVER_PRIME),
+ DRIVER_PRIME |
+ DRIVER_SYNCOBJ),
.lastclose = drm_fb_helper_lastclose,
.open = vc4_open,
.postclose = vc4_close,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 22589d3..554a4e8 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -11,6 +11,7 @@
#include <drm/drm_encoder.h>
#include <drm/drm_gem_cma_helper.h>
#include <drm/drm_atomic.h>
+#include <drm/drm_syncobj.h>
#include "uapi/drm/vc4_drm.h"
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
index 94085f8..8aa8978 100644
--- a/drivers/gpu/drm/vc4/vc4_dsi.c
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -753,6 +753,11 @@ static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps)
(dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) |
(dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0));
int ret;
+ bool ulps_currently_enabled = (DSI_PORT_READ(PHY_AFEC0) &
+ DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS));
+
+ if (ulps == ulps_currently_enabled)
+ return;
DSI_PORT_WRITE(STAT, stat_ulps);
DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps);
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index 2107b0d..7910b9a 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -27,6 +27,7 @@
#include <linux/device.h>
#include <linux/io.h>
#include <linux/sched/signal.h>
+#include <linux/dma-fence-array.h>
#include "uapi/drm/vc4_drm.h"
#include "vc4_drv.h"
@@ -655,7 +656,8 @@ retry:
*/
static int
vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
- struct ww_acquire_ctx *acquire_ctx)
+ struct ww_acquire_ctx *acquire_ctx,
+ struct drm_syncobj *out_sync)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *renderjob;
@@ -678,6 +680,9 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
fence->seqno = exec->seqno;
exec->fence = &fence->base;
+ if (out_sync)
+ drm_syncobj_replace_fence(out_sync, exec->fence);
+
vc4_update_bo_seqnos(exec, seqno);
vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
@@ -1113,8 +1118,10 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_file *vc4file = file_priv->driver_priv;
struct drm_vc4_submit_cl *args = data;
+ struct drm_syncobj *out_sync = NULL;
struct vc4_exec_info *exec;
struct ww_acquire_ctx acquire_ctx;
+ struct dma_fence *in_fence;
int ret = 0;
if ((args->flags & ~(VC4_SUBMIT_CL_USE_CLEAR_COLOR |
@@ -1126,7 +1133,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
}
if (args->pad2 != 0) {
- DRM_DEBUG("->pad2 must be set to zero\n");
+ DRM_DEBUG("Invalid pad: 0x%08x\n", args->pad2);
return -EINVAL;
}
@@ -1164,6 +1171,29 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
}
}
+ if (args->in_sync) {
+ ret = drm_syncobj_find_fence(file_priv, args->in_sync,
+ &in_fence);
+ if (ret)
+ goto fail;
+
+ /* When the fence (or fence array) is exclusively from our
+ * context we can skip the wait since jobs are executed in
+ * order of their submission through this ioctl and this can
+ * only have fences from a prior job.
+ */
+ if (!dma_fence_match_context(in_fence,
+ vc4->dma_fence_context)) {
+ ret = dma_fence_wait(in_fence, true);
+ if (ret) {
+ dma_fence_put(in_fence);
+ goto fail;
+ }
+ }
+
+ dma_fence_put(in_fence);
+ }
+
if (exec->args->bin_cl_size != 0) {
ret = vc4_get_bcl(dev, exec);
if (ret)
@@ -1181,12 +1211,33 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
if (ret)
goto fail;
+ if (args->out_sync) {
+ out_sync = drm_syncobj_find(file_priv, args->out_sync);
+ if (!out_sync) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* We replace the fence in out_sync in vc4_queue_submit since
+ * the render job could execute immediately after that call.
+ * If it finishes before our ioctl processing resumes the
+ * render job fence could already have been freed.
+ */
+ }
+
/* Clear this out of the struct we'll be putting in the queue,
* since it's part of our stack.
*/
exec->args = NULL;
- ret = vc4_queue_submit(dev, exec, &acquire_ctx);
+ ret = vc4_queue_submit(dev, exec, &acquire_ctx, out_sync);
+
+ /* The syncobj isn't part of the exec data and we need to free our
+ * reference even if job submission failed.
+ */
+ if (out_sync)
+ drm_syncobj_put(out_sync);
+
if (ret)
goto fail;
diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index bfc2fa7..e47e294 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -218,8 +218,7 @@ try_again:
* overall CMA pool before they make scenes complicated enough to run
* out of bin space.
*/
-int
-vc4_allocate_bin_bo(struct drm_device *drm)
+static int vc4_allocate_bin_bo(struct drm_device *drm)
{
struct vc4_dev *vc4 = to_vc4_dev(drm);
struct vc4_v3d *v3d = vc4->v3d;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 70e1a88..97f37c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -159,14 +159,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
DRM_RENDER_ALLOW),
VMW_IOCTL_DEF(VMW_CURSOR_BYPASS,
vmw_kms_cursor_bypass_ioctl,
- DRM_MASTER | DRM_CONTROL_ALLOW),
+ DRM_MASTER),
VMW_IOCTL_DEF(VMW_CONTROL_STREAM, vmw_overlay_ioctl,
- DRM_MASTER | DRM_CONTROL_ALLOW),
+ DRM_MASTER),
VMW_IOCTL_DEF(VMW_CLAIM_STREAM, vmw_stream_claim_ioctl,
- DRM_MASTER | DRM_CONTROL_ALLOW),
+ DRM_MASTER),
VMW_IOCTL_DEF(VMW_UNREF_STREAM, vmw_stream_unref_ioctl,
- DRM_MASTER | DRM_CONTROL_ALLOW),
+ DRM_MASTER),
VMW_IOCTL_DEF(VMW_CREATE_CONTEXT, vmw_context_define_ioctl,
DRM_AUTH | DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 2582ffd..ba0cdb7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -441,11 +441,11 @@ static int vmwgfx_set_config_internal(struct drm_mode_set *set)
struct drm_crtc *crtc = set->crtc;
struct drm_framebuffer *fb;
struct drm_crtc *tmp;
- struct drm_modeset_acquire_ctx *ctx;
struct drm_device *dev = set->crtc->dev;
+ struct drm_modeset_acquire_ctx ctx;
int ret;
- ctx = dev->mode_config.acquire_ctx;
+ drm_modeset_acquire_init(&ctx, 0);
restart:
/*
@@ -458,7 +458,7 @@ restart:
fb = set->fb;
- ret = crtc->funcs->set_config(set, ctx);
+ ret = crtc->funcs->set_config(set, &ctx);
if (ret == 0) {
crtc->primary->crtc = crtc;
crtc->primary->fb = fb;
@@ -473,20 +473,13 @@ restart:
}
if (ret == -EDEADLK) {
- dev->mode_config.acquire_ctx = NULL;
-
-retry_locking:
- drm_modeset_backoff(ctx);
-
- ret = drm_modeset_lock_all_ctx(dev, ctx);
- if (ret)
- goto retry_locking;
-
- dev->mode_config.acquire_ctx = ctx;
-
+ drm_modeset_backoff(&ctx);
goto restart;
}
+ drm_modeset_drop_locks(&ctx);
+ drm_modeset_acquire_fini(&ctx);
+
return ret;
}
@@ -624,7 +617,6 @@ static int vmw_fb_set_par(struct fb_info *info)
}
mutex_lock(&par->bo_mutex);
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_fb_kms_framebuffer(info);
if (ret)
goto out_unlock;
@@ -657,7 +649,6 @@ out_unlock:
drm_mode_destroy(vmw_priv->dev, old_mode);
par->set_mode = mode;
- drm_modeset_unlock_all(vmw_priv->dev);
mutex_unlock(&par->bo_mutex);
return ret;
@@ -713,18 +704,14 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
par->max_width = fb_width;
par->max_height = fb_height;
- drm_modeset_lock_all(vmw_priv->dev);
ret = vmw_kms_fbdev_init_data(vmw_priv, 0, par->max_width,
par->max_height, &par->con,
&par->crtc, &init_mode);
- if (ret) {
- drm_modeset_unlock_all(vmw_priv->dev);
+ if (ret)
goto err_kms;
- }
info->var.xres = init_mode->hdisplay;
info->var.yres = init_mode->vdisplay;
- drm_modeset_unlock_all(vmw_priv->dev);
/*
* Create buffers and alloc memory
@@ -832,7 +819,9 @@ int vmw_fb_close(struct vmw_private *vmw_priv)
cancel_delayed_work_sync(&par->local_work);
unregister_framebuffer(info);
+ mutex_lock(&par->bo_mutex);
(void) vmw_fb_kms_detach(par, true, true);
+ mutex_unlock(&par->bo_mutex);
vfree(par->vmalloc);
framebuffer_release(info);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 6728c62..01f2dc9 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2595,6 +2595,7 @@ void vmw_kms_helper_resource_finish(struct vmw_validation_ctx *ctx,
vmw_kms_helper_buffer_finish(res->dev_priv, NULL, ctx->buf,
out_fence, NULL);
+ vmw_dmabuf_unreference(&ctx->buf);
vmw_resource_unreserve(res, false, NULL, 0);
mutex_unlock(&res->dev_priv->cmdbuf_mutex);
}
@@ -2680,7 +2681,9 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
struct vmw_display_unit *du;
struct drm_display_mode *mode;
int i = 0;
+ int ret = 0;
+ mutex_lock(&dev_priv->dev->mode_config.mutex);
list_for_each_entry(con, &dev_priv->dev->mode_config.connector_list,
head) {
if (i == unit)
@@ -2691,7 +2694,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (i != unit) {
DRM_ERROR("Could not find initial display unit.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
if (list_empty(&con->modes))
@@ -2699,7 +2703,8 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
if (list_empty(&con->modes)) {
DRM_ERROR("Could not find initial display mode.\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out_unlock;
}
du = vmw_connector_to_du(con);
@@ -2720,7 +2725,10 @@ int vmw_kms_fbdev_init_data(struct vmw_private *dev_priv,
head);
}
- return 0;
+ out_unlock:
+ mutex_unlock(&dev_priv->dev->mode_config.mutex);
+
+ return ret;
}
/**
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 1b0ea9a..3345ac7 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -188,8 +188,8 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
buf_cfg.be_alloc = front_info->cfg.be_alloc;
shbuf = xen_drm_front_shbuf_alloc(&buf_cfg);
- if (!shbuf)
- return -ENOMEM;
+ if (IS_ERR(shbuf))
+ return PTR_ERR(shbuf);
ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie);
if (ret < 0) {
@@ -543,8 +543,8 @@ static int xen_drm_drv_init(struct xen_drm_front_info *front_info)
front_info->drm_info = drm_info;
drm_dev = drm_dev_alloc(&xen_drm_driver, dev);
- if (!drm_dev) {
- ret = -ENOMEM;
+ if (IS_ERR(drm_dev)) {
+ ret = PTR_ERR(drm_dev);
goto fail;
}
@@ -778,7 +778,7 @@ static int xen_drv_remove(struct xenbus_device *dev)
*/
while ((xenbus_read_unsigned(front_info->xb_dev->otherend, "state",
XenbusStateUnknown) != XenbusStateInitWait) &&
- to--)
+ --to)
msleep(10);
if (!to) {
diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
index d570525..8099cb3 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
@@ -383,7 +383,7 @@ xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg)
buf = kzalloc(sizeof(*buf), GFP_KERNEL);
if (!buf)
- return NULL;
+ return ERR_PTR(-ENOMEM);
if (cfg->be_alloc)
buf->ops = &backend_ops;
diff --git a/drivers/video/hdmi.c b/drivers/video/hdmi.c
index 111a0ab..38716eb5 100644
--- a/drivers/video/hdmi.c
+++ b/drivers/video/hdmi.c
@@ -93,6 +93,9 @@ ssize_t hdmi_avi_infoframe_pack(struct hdmi_avi_infoframe *frame, void *buffer,
if (size < length)
return -ENOSPC;
+ if (frame->picture_aspect > HDMI_PICTURE_ASPECT_16_9)
+ return -EINVAL;
+
memset(buffer, 0, size);
ptr[0] = frame->type;
OpenPOWER on IntegriCloud