diff options
author | Dave Airlie <airlied@redhat.com> | 2016-02-19 11:13:01 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2016-02-19 11:13:01 +1000 |
commit | 5263925c092d137a0830ca4afe692366127dca4e (patch) | |
tree | 49ce726b058d36f5b5d21156716ab0153f443243 | |
parent | 08244c00859f25036417ea7b790cfa73e43443fc (diff) | |
parent | 390be2824fa4211c2e973c69b72e04000559bba3 (diff) | |
download | op-kernel-dev-5263925c092d137a0830ca4afe692366127dca4e.zip op-kernel-dev-5263925c092d137a0830ca4afe692366127dca4e.tar.gz |
Merge branch 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux into drm-next
First radeon and amdgpu pull request for 4.6. Highlights:
- ACP support for APUs with i2s audio
- CS ioctl optimizations
- GPU scheduler optimizations
- GPUVM optimizations
- Initial GPU reset support (not enabled yet)
- New powerplay sysfs interface for manually selecting clocks
- Powerplay fixes
- Virtualization fixes
- Removal of hw semaphore support
- Lots of other misc fixes and cleanups
* 'drm-next-4.6' of git://people.freedesktop.org/~agd5f/linux: (118 commits)
drm/amdgpu: Don't call interval_tree_remove in amdgpu_mn_destroy
drm/amdgpu: Fix race condition in amdgpu_mn_unregister
drm/amdgpu: cleanup gem init/finit
drm/amdgpu: rework GEM info printing
drm/amdgpu: print the GPU offset as well in gem_info
drm/amdgpu: optionally print the pin count in gem_info as well
drm/amdgpu: print the BO size only once in amdgpu_gem_info
drm/amdgpu: print pid as integer
drm/amdgpu: remove page flip work queue v3
drm/amdgpu: stop blocking for page filp fences
drm/amdgpu: stop calling amdgpu_gpu_reset from the flip code
drm/amdgpu: remove fence reset detection leftovers
drm/amdgpu: Fix race condition in MMU notifier release
drm/radeon: Fix WARN_ON if DRM_DP_AUX_CHARDEV is enabled
drm/amdgpu/vi: move uvd tiling config setup into uvd code
drm/amdgpu/vi: move sdma tiling config setup into sdma code
drm/amdgpu/cik: move uvd tiling config setup into uvd code
drm/amdgpu/cik: move sdma tiling config setup into sdma code
drm/amdgpu/gfx7: rework gpu_init()
drm/amdgpu/gfx: clean up harvest configuration (v2)
...
86 files changed, 5486 insertions, 4365 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 08706f0..f2a74d0 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -172,6 +172,8 @@ config DRM_AMDGPU source "drivers/gpu/drm/amd/amdgpu/Kconfig" source "drivers/gpu/drm/amd/powerplay/Kconfig" +source "drivers/gpu/drm/amd/acp/Kconfig" + source "drivers/gpu/drm/nouveau/Kconfig" config DRM_I810 diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig new file mode 100644 index 0000000..2b07813 --- /dev/null +++ b/drivers/gpu/drm/amd/acp/Kconfig @@ -0,0 +1,11 @@ +menu "ACP Configuration" + +config DRM_AMD_ACP + bool "Enable ACP IP support" + default y + select MFD_CORE + select PM_GENERIC_DOMAINS if PM + help + Choose this option to enable ACP IP support for AMD SOCs. + +endmenu diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile new file mode 100644 index 0000000..8363cb5 --- /dev/null +++ b/drivers/gpu/drm/amd/acp/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the ACP, which is a sub-component +# of AMDSOC/AMDGPU drm driver. +# It provides the HW control for ACP related functionalities. + +subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include + +AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o diff --git a/drivers/gpu/drm/amd/acp/acp_hw.c b/drivers/gpu/drm/amd/acp/acp_hw.c new file mode 100644 index 0000000..7af83f1 --- /dev/null +++ b/drivers/gpu/drm/amd/acp/acp_hw.c @@ -0,0 +1,50 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/device.h> +#include <linux/delay.h> +#include <linux/errno.h> + +#include "acp_gfx_if.h" + +#define ACP_MODE_I2S 0 +#define ACP_MODE_AZ 1 + +#define mmACP_AZALIA_I2S_SELECT 0x51d4 + +int amd_acp_hw_init(void *cgs_device, + unsigned acp_version_major, unsigned acp_version_minor) +{ + unsigned int acp_mode = ACP_MODE_I2S; + + if ((acp_version_major == 2) && (acp_version_minor == 2)) + acp_mode = cgs_read_register(cgs_device, + mmACP_AZALIA_I2S_SELECT); + + if (acp_mode != ACP_MODE_I2S) + return -ENODEV; + + return 0; +} diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h new file mode 100644 index 0000000..bccf47b --- /dev/null +++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h @@ -0,0 +1,34 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * +*/ + +#ifndef _ACP_GFX_IF_H +#define _ACP_GFX_IF_H + +#include <linux/types.h> +#include "cgs_linux.h" +#include "cgs_common.h" + +int amd_acp_hw_init(void *cgs_device, + unsigned acp_version_major, unsigned acp_version_minor); + +#endif /* _ACP_GFX_IF_H */ diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 20c9539..c7fcdce 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_PATH)/include \ -I$(FULL_AMD_PATH)/amdgpu \ -I$(FULL_AMD_PATH)/scheduler \ - -I$(FULL_AMD_PATH)/powerplay/inc + -I$(FULL_AMD_PATH)/powerplay/inc \ + -I$(FULL_AMD_PATH)/acp/include amdgpu-y := amdgpu_drv.o @@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \ amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ - atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \ + atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o @@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o amdgpu-y += \ ../scheduler/gpu_scheduler.o \ ../scheduler/sched_fence.o \ - amdgpu_sched.o + amdgpu_job.o + +# ACP componet +ifneq ($(CONFIG_DRM_AMD_ACP),) +amdgpu-y += amdgpu_acp.o + +AMDACPPATH := ../acp +include $(FULL_AMD_PATH)/acp/Makefile + +amdgpu-y += $(AMD_ACP_FILES) +endif amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 82edf95..f5bac97 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -53,6 +53,7 @@ #include "amdgpu_ucode.h" #include "amdgpu_gds.h" #include "amd_powerplay.h" +#include "amdgpu_acp.h" #include "gpu_scheduler.h" @@ -74,7 +75,6 @@ extern int amdgpu_dpm; extern int amdgpu_smc_load_fw; extern int amdgpu_aspm; extern int amdgpu_runtime_pm; -extern int amdgpu_hard_reset; extern unsigned amdgpu_ip_block_mask; extern int amdgpu_bapm; extern int amdgpu_deep_color; @@ -82,10 +82,8 @@ extern int amdgpu_vm_size; extern int amdgpu_vm_block_size; extern int amdgpu_vm_fault_stop; extern int amdgpu_vm_debug; -extern int amdgpu_enable_scheduler; extern int amdgpu_sched_jobs; extern int amdgpu_sched_hw_submission; -extern int amdgpu_enable_semaphores; extern int amdgpu_powerplay; #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -106,9 +104,6 @@ extern int amdgpu_powerplay; /* max number of IP instances */ #define AMDGPU_MAX_SDMA_INSTANCES 2 -/* number of hw syncs before falling back on blocking */ -#define AMDGPU_NUM_SYNCS 4 - /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (8 << 20) @@ -189,7 +184,6 @@ struct amdgpu_fence; struct amdgpu_ib; struct amdgpu_vm; struct amdgpu_ring; -struct amdgpu_semaphore; struct amdgpu_cs_parser; struct amdgpu_job; struct amdgpu_irq_src; @@ -287,7 +281,7 @@ struct amdgpu_vm_pte_funcs { unsigned count); /* write pte one entry at a time with addr mapping */ void (*write_pte)(struct amdgpu_ib *ib, - uint64_t pe, + const dma_addr_t *pages_addr, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); /* for linear pte/pde updates without addr mapping */ @@ -295,8 +289,6 @@ struct amdgpu_vm_pte_funcs { uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags); - /* pad the indirect buffer to the necessary number of dw */ - void (*pad_ib)(struct amdgpu_ib *ib); }; /* provided by the gmc block */ @@ -334,9 +326,6 @@ struct amdgpu_ring_funcs { struct amdgpu_ib *ib); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); - bool (*emit_semaphore)(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait); void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr); void (*emit_hdp_flush)(struct amdgpu_ring *ring); @@ -349,6 +338,8 @@ struct amdgpu_ring_funcs { int (*test_ib)(struct amdgpu_ring *ring); /* insert NOP packets */ void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); + /* pad the indirect buffer to the necessary number of dw */ + void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib); }; /* @@ -394,7 +385,7 @@ struct amdgpu_fence_driver { uint64_t gpu_addr; volatile uint32_t *cpu_addr; /* sync_seq is protected by ring emission lock */ - uint64_t sync_seq[AMDGPU_MAX_RINGS]; + uint64_t sync_seq; atomic64_t last_seq; bool initialized; struct amdgpu_irq_src *irq_src; @@ -447,11 +438,6 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); -bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, - struct amdgpu_ring *ring); -void amdgpu_fence_note_sync(struct amdgpu_fence *fence, - struct amdgpu_ring *ring); - /* * TTM. */ @@ -470,6 +456,8 @@ struct amdgpu_mman { /* buffer handling */ const struct amdgpu_buffer_funcs *buffer_funcs; struct amdgpu_ring *buffer_funcs_ring; + /* Scheduler entity for buffer moves */ + struct amd_sched_entity entity; }; int amdgpu_copy_buffer(struct amdgpu_ring *ring, @@ -484,8 +472,6 @@ struct amdgpu_bo_list_entry { struct amdgpu_bo *robj; struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; - unsigned prefered_domains; - unsigned allowed_domains; uint32_t priority; }; @@ -522,7 +508,8 @@ struct amdgpu_bo { /* Protected by gem.mutex */ struct list_head list; /* Protected by tbo.reserved */ - u32 initial_domain; + u32 prefered_domains; + u32 allowed_domains; struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1]; struct ttm_placement placement; struct ttm_buffer_object tbo; @@ -544,7 +531,6 @@ struct amdgpu_bo { struct amdgpu_bo *parent; struct ttm_bo_kmap_obj dma_buf_vmap; - pid_t pid; struct amdgpu_mn *mn; struct list_head mn_list; }; @@ -621,13 +607,7 @@ struct amdgpu_sa_bo { /* * GEM objects. */ -struct amdgpu_gem { - struct mutex mutex; - struct list_head objects; -}; - -int amdgpu_gem_init(struct amdgpu_device *adev); -void amdgpu_gem_fini(struct amdgpu_device *adev); +void amdgpu_gem_force_release(struct amdgpu_device *adev); int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, int alignment, u32 initial_domain, u64 flags, bool kernel, @@ -639,32 +619,10 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, int amdgpu_mode_dumb_mmap(struct drm_file *filp, struct drm_device *dev, uint32_t handle, uint64_t *offset_p); - -/* - * Semaphores. - */ -struct amdgpu_semaphore { - struct amdgpu_sa_bo *sa_bo; - signed waiters; - uint64_t gpu_addr; -}; - -int amdgpu_semaphore_create(struct amdgpu_device *adev, - struct amdgpu_semaphore **semaphore); -bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore); -bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore); -void amdgpu_semaphore_free(struct amdgpu_device *adev, - struct amdgpu_semaphore **semaphore, - struct fence *fence); - /* * Synchronization */ struct amdgpu_sync { - struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS]; - struct fence *sync_to[AMDGPU_MAX_RINGS]; DECLARE_HASHTABLE(fences, 4); struct fence *last_vm_update; }; @@ -676,12 +634,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, void *owner); -int amdgpu_sync_rings(struct amdgpu_sync *sync, - struct amdgpu_ring *ring); struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_wait(struct amdgpu_sync *sync); -void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct fence *fence); +void amdgpu_sync_free(struct amdgpu_sync *sync); /* * GART structures, functions & helpers @@ -799,6 +754,7 @@ struct amdgpu_flip_work { struct fence *excl; unsigned shared_count; struct fence **shared; + struct fence_cb cb; }; @@ -811,12 +767,11 @@ struct amdgpu_ib { uint32_t length_dw; uint64_t gpu_addr; uint32_t *ptr; - struct amdgpu_ring *ring; struct amdgpu_fence *fence; struct amdgpu_user_fence *user; + bool grabbed_vmid; struct amdgpu_vm *vm; struct amdgpu_ctx *ctx; - struct amdgpu_sync sync; uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; @@ -835,13 +790,14 @@ enum amdgpu_ring_type { extern struct amd_sched_backend_ops amdgpu_sched_ops; -int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_ib *ibs, - unsigned num_ibs, - int (*free_job)(struct amdgpu_job *), - void *owner, - struct fence **fence); +int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, + struct amdgpu_job **job); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, + struct amdgpu_job **job); +void amdgpu_job_free(struct amdgpu_job *job); +int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, + struct amd_sched_entity *entity, void *owner, + struct fence **f); struct amdgpu_ring { struct amdgpu_device *adev; @@ -850,7 +806,6 @@ struct amdgpu_ring { struct amd_gpu_scheduler sched; spinlock_t fence_lock; - struct mutex *ring_lock; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; unsigned rptr_offs; @@ -859,7 +814,7 @@ struct amdgpu_ring { unsigned wptr; unsigned wptr_old; unsigned ring_size; - unsigned ring_free_dw; + unsigned max_dw; int count_dw; uint64_t gpu_addr; uint32_t align_mask; @@ -867,8 +822,6 @@ struct amdgpu_ring { bool ready; u32 nop; u32 idx; - u64 last_semaphore_signal_addr; - u64 last_semaphore_wait_addr; u32 me; u32 pipe; u32 queue; @@ -881,7 +834,6 @@ struct amdgpu_ring { struct amdgpu_ctx *current_ctx; enum amdgpu_ring_type type; char name[16]; - bool is_pte_ring; }; /* @@ -932,6 +884,8 @@ struct amdgpu_vm_id { }; struct amdgpu_vm { + /* tree of virtual addresses mapped */ + spinlock_t it_lock; struct rb_root va; /* protecting invalidated */ @@ -956,30 +910,40 @@ struct amdgpu_vm { /* for id and flush management per ring */ struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS]; - /* for interval tree */ - spinlock_t it_lock; + /* protecting freed */ spinlock_t freed_lock; + + /* Scheduler entity for page table updates */ + struct amd_sched_entity entity; +}; + +struct amdgpu_vm_manager_id { + struct list_head list; + struct fence *active; + atomic_long_t owner; }; struct amdgpu_vm_manager { - struct { - struct fence *active; - atomic_long_t owner; - } ids[AMDGPU_NUM_VM]; + /* Handling of VMIDs */ + struct mutex lock; + unsigned num_ids; + struct list_head ids_lru; + struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM]; uint32_t max_pfn; - /* number of VMIDs */ - unsigned nvm; /* vram base address for page table entry */ u64 vram_base_offset; /* is vm enabled? */ bool enabled; /* vm pte handling */ const struct amdgpu_vm_pte_funcs *vm_pte_funcs; - struct amdgpu_ring *vm_pte_funcs_ring; + struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; + unsigned vm_pte_num_rings; + atomic_t vm_pte_next_ring; }; +void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); @@ -990,14 +954,11 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates); void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync); + struct amdgpu_sync *sync, struct fence *fence); void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, struct fence *updates); -void amdgpu_vm_fence(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct fence *fence); -uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr); +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_clear_freed(struct amdgpu_device *adev, @@ -1023,7 +984,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t addr); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); -int amdgpu_vm_free_job(struct amdgpu_job *job); /* * context related structures @@ -1051,10 +1011,6 @@ struct amdgpu_ctx_mgr { struct idr ctx_handles; }; -int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, - struct amdgpu_ctx *ctx); -void amdgpu_ctx_fini(struct amdgpu_ctx *ctx); - struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); int amdgpu_ctx_put(struct amdgpu_ctx *ctx); @@ -1096,6 +1052,8 @@ struct amdgpu_bo_list { struct amdgpu_bo_list * amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id); +void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, + struct list_head *validated); void amdgpu_bo_list_put(struct amdgpu_bo_list *list); void amdgpu_bo_list_free(struct amdgpu_bo_list *list); @@ -1169,6 +1127,7 @@ struct amdgpu_gca_config { unsigned multi_gpu_tile_size; unsigned mc_arb_ramcfg; unsigned gb_addr_config; + unsigned num_rbs; uint32_t tile_mode_array[32]; uint32_t macrotile_mode_array[16]; @@ -1211,23 +1170,21 @@ struct amdgpu_gfx { unsigned ce_ram_size; }; -int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, +int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib); -int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_ib *ib, void *owner); +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ib, void *owner, + struct fence *last_vm_update, + struct fence **f); int amdgpu_ib_pool_init(struct amdgpu_device *adev); void amdgpu_ib_pool_fini(struct amdgpu_device *adev); int amdgpu_ib_ring_tests(struct amdgpu_device *adev); -/* Ring access between begin & end cannot sleep */ -void amdgpu_ring_free_size(struct amdgpu_ring *ring); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); -int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); -void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); -void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring); unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, uint32_t **data); int amdgpu_ring_restore(struct amdgpu_ring *ring, @@ -1246,47 +1203,57 @@ struct amdgpu_cs_chunk { uint32_t chunk_id; uint32_t length_dw; uint32_t *kdata; - void __user *user_ptr; }; struct amdgpu_cs_parser { struct amdgpu_device *adev; struct drm_file *filp; struct amdgpu_ctx *ctx; - struct amdgpu_bo_list *bo_list; + /* chunks */ unsigned nchunks; struct amdgpu_cs_chunk *chunks; - /* relocations */ - struct amdgpu_bo_list_entry vm_pd; - struct list_head validated; - struct fence *fence; - struct amdgpu_ib *ibs; - uint32_t num_ibs; + /* scheduler job object */ + struct amdgpu_job *job; - struct ww_acquire_ctx ticket; + /* buffer objects */ + struct ww_acquire_ctx ticket; + struct amdgpu_bo_list *bo_list; + struct amdgpu_bo_list_entry vm_pd; + struct list_head validated; + struct fence *fence; + uint64_t bytes_moved_threshold; + uint64_t bytes_moved; /* user fence */ - struct amdgpu_user_fence uf; struct amdgpu_bo_list_entry uf_entry; }; struct amdgpu_job { struct amd_sched_job base; struct amdgpu_device *adev; + struct amdgpu_ring *ring; + struct amdgpu_sync sync; struct amdgpu_ib *ibs; uint32_t num_ibs; void *owner; struct amdgpu_user_fence uf; - int (*free_job)(struct amdgpu_job *job); }; #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) -static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx) +static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, + uint32_t ib_idx, int idx) { - return p->ibs[ib_idx].ptr[idx]; + return p->job->ibs[ib_idx].ptr[idx]; +} + +static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, + uint32_t ib_idx, int idx, + uint32_t value) +{ + p->job->ibs[ib_idx].ptr[idx] = value; } /* @@ -1538,6 +1505,7 @@ enum amdgpu_dpm_forced_level { AMDGPU_DPM_FORCED_LEVEL_AUTO = 0, AMDGPU_DPM_FORCED_LEVEL_LOW = 1, AMDGPU_DPM_FORCED_LEVEL_HIGH = 2, + AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3, }; struct amdgpu_vce_state { @@ -1667,6 +1635,7 @@ struct amdgpu_uvd { struct amdgpu_ring ring; struct amdgpu_irq_src irq; bool address_64_bit; + struct amd_sched_entity entity; }; /* @@ -1691,6 +1660,7 @@ struct amdgpu_vce { struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS]; struct amdgpu_irq_src irq; unsigned harvest_config; + struct amd_sched_entity entity; }; /* @@ -1925,6 +1895,18 @@ void amdgpu_cgs_destroy_device(void *cgs_device); /* + * CGS + */ +void *amdgpu_cgs_create_device(struct amdgpu_device *adev); +void amdgpu_cgs_destroy_device(void *cgs_device); + + +/* GPU virtualization */ +struct amdgpu_virtualization { + bool supports_sr_iov; +}; + +/* * Core structure, functions and helpers. */ typedef uint32_t (*amdgpu_rreg_t)(struct amdgpu_device*, uint32_t); @@ -1944,6 +1926,10 @@ struct amdgpu_device { struct drm_device *ddev; struct pci_dev *pdev; +#ifdef CONFIG_DRM_AMD_ACP + struct amdgpu_acp acp; +#endif + /* ASIC */ enum amd_asic_type asic_type; uint32_t family; @@ -2020,7 +2006,6 @@ struct amdgpu_device { /* memory management */ struct amdgpu_mman mman; - struct amdgpu_gem gem; struct amdgpu_vram_scratch vram_scratch; struct amdgpu_wb wb; atomic64_t vram_usage; @@ -2038,7 +2023,6 @@ struct amdgpu_device { /* rings */ unsigned fence_context; - struct mutex ring_lock; unsigned num_rings; struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; bool ib_pool_ready; @@ -2050,6 +2034,7 @@ struct amdgpu_device { /* powerplay */ struct amd_powerplay powerplay; bool pp_enabled; + bool pp_force_state_enabled; /* dpm */ struct amdgpu_pm pm; @@ -2091,8 +2076,7 @@ struct amdgpu_device { /* amdkfd interface */ struct kfd_dev *kfd; - /* kernel conext for IB submission */ - struct amdgpu_ctx kernel_ctx; + struct amdgpu_virtualization virtualization; }; bool amdgpu_device_is_px(struct drm_device *dev); @@ -2197,7 +2181,6 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) ring->ring[ring->wptr++] = v; ring->wptr &= ring->ptr_mask; ring->count_dw--; - ring->ring_free_dw--; } static inline struct amdgpu_sdma_instance * @@ -2233,9 +2216,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid)) #define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) -#define amdgpu_vm_write_pte(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (addr), (count), (incr), (flags))) +#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) -#define amdgpu_vm_pad_ib(adev, ib) ((adev)->vm_manager.vm_pte_funcs->pad_ib((ib))) #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) #define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r)) @@ -2245,9 +2227,9 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib)) #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) -#define amdgpu_ring_emit_semaphore(r, semaphore, emit_wait) (r)->funcs->emit_semaphore((r), (semaphore), (emit_wait)) #define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) +#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) @@ -2339,6 +2321,21 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_dpm_get_performance_level(adev) \ (adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle) +#define amdgpu_dpm_get_pp_num_states(adev, data) \ + (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data) + +#define amdgpu_dpm_get_pp_table(adev, table) \ + (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table) + +#define amdgpu_dpm_set_pp_table(adev, buf, size) \ + (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size) + +#define amdgpu_dpm_print_clock_levels(adev, type, buf) \ + (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf) + +#define amdgpu_dpm_force_clock_level(adev, type, level) \ + (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level) + #define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \ (adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output)) @@ -2349,7 +2346,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev); void amdgpu_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_card_posted(struct amdgpu_device *adev); void amdgpu_update_display_priority(struct amdgpu_device *adev); -bool amdgpu_boot_test_post_card(struct amdgpu_device *adev); int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data); int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, @@ -2359,7 +2355,9 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain); bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo); int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, uint32_t flags); -bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm); +struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm); +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c new file mode 100644 index 0000000..9f8cfaa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -0,0 +1,502 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#include <linux/irqdomain.h> +#include <linux/pm_domain.h> +#include <linux/platform_device.h> +#include <sound/designware_i2s.h> +#include <sound/pcm.h> + +#include "amdgpu.h" +#include "atom.h" +#include "amdgpu_acp.h" + +#include "acp_gfx_if.h" + +#define ACP_TILE_ON_MASK 0x03 +#define ACP_TILE_OFF_MASK 0x02 +#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f +#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20 + +#define ACP_TILE_P1_MASK 0x3e +#define ACP_TILE_P2_MASK 0x3d +#define ACP_TILE_DSP0_MASK 0x3b +#define ACP_TILE_DSP1_MASK 0x37 + +#define ACP_TILE_DSP2_MASK 0x2f + +#define ACP_DMA_REGS_END 0x146c0 +#define ACP_I2S_PLAY_REGS_START 0x14840 +#define ACP_I2S_PLAY_REGS_END 0x148b4 +#define ACP_I2S_CAP_REGS_START 0x148b8 +#define ACP_I2S_CAP_REGS_END 0x1496c + +#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac +#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8 +#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c +#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68 + +#define mmACP_PGFSM_RETAIN_REG 0x51c9 +#define mmACP_PGFSM_CONFIG_REG 0x51ca +#define mmACP_PGFSM_READ_REG_0 0x51cc + +#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8 +#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9 +#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa +#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb + +#define ACP_TIMEOUT_LOOP 0x000000FF +#define ACP_DEVS 3 +#define ACP_SRC_ID 162 + +enum { + ACP_TILE_P1 = 0, + ACP_TILE_P2, + ACP_TILE_DSP0, + ACP_TILE_DSP1, + ACP_TILE_DSP2, +}; + +static int acp_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + adev->acp.parent = adev->dev; + + adev->acp.cgs_device = + amdgpu_cgs_create_device(adev); + if (!adev->acp.cgs_device) + return -EINVAL; + + return 0; +} + +static int acp_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (adev->acp.cgs_device) + amdgpu_cgs_destroy_device(adev->acp.cgs_device); + + return 0; +} + +/* power off a tile/block within ACP */ +static int acp_suspend_tile(void *cgs_dev, int tile) +{ + u32 val = 0; + u32 count = 0; + + if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { + pr_err("Invalid ACP tile : %d to suspend\n", tile); + return -1; + } + + val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); + val &= ACP_TILE_ON_MASK; + + if (val == 0x0) { + val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); + val = val | (1 << tile); + cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); + cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, + 0x500 + tile); + + count = ACP_TIMEOUT_LOOP; + while (true) { + val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + + tile); + val = val & ACP_TILE_ON_MASK; + if (val == ACP_TILE_OFF_MASK) + break; + if (--count == 0) { + pr_err("Timeout reading ACP PGFSM status\n"); + return -ETIMEDOUT; + } + udelay(100); + } + + val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); + + val |= ACP_TILE_OFF_RETAIN_REG_MASK; + cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); + } + return 0; +} + +/* power on a tile/block within ACP */ +static int acp_resume_tile(void *cgs_dev, int tile) +{ + u32 val = 0; + u32 count = 0; + + if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { + pr_err("Invalid ACP tile to resume\n"); + return -1; + } + + val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); + val = val & ACP_TILE_ON_MASK; + + if (val != 0x0) { + cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, + 0x600 + tile); + count = ACP_TIMEOUT_LOOP; + while (true) { + val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + + tile); + val = val & ACP_TILE_ON_MASK; + if (val == 0x0) + break; + if (--count == 0) { + pr_err("Timeout reading ACP PGFSM status\n"); + return -ETIMEDOUT; + } + udelay(100); + } + val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); + if (tile == ACP_TILE_P1) + val = val & (ACP_TILE_P1_MASK); + else if (tile == ACP_TILE_P2) + val = val & (ACP_TILE_P2_MASK); + + cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); + } + return 0; +} + +struct acp_pm_domain { + void *cgs_dev; + struct generic_pm_domain gpd; +}; + +static int acp_poweroff(struct generic_pm_domain *genpd) +{ + int i, ret; + struct acp_pm_domain *apd; + + apd = container_of(genpd, struct acp_pm_domain, gpd); + if (apd != NULL) { + /* Donot return abruptly if any of power tile fails to suspend. + * Log it and continue powering off other tile + */ + for (i = 4; i >= 0 ; i--) { + ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); + if (ret) + pr_err("ACP tile %d tile suspend failed\n", i); + } + } + return 0; +} + +static int acp_poweron(struct generic_pm_domain *genpd) +{ + int i, ret; + struct acp_pm_domain *apd; + + apd = container_of(genpd, struct acp_pm_domain, gpd); + if (apd != NULL) { + for (i = 0; i < 2; i++) { + ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i); + if (ret) { + pr_err("ACP tile %d resume failed\n", i); + break; + } + } + + /* Disable DSPs which are not going to be used */ + for (i = 0; i < 3; i++) { + ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i); + /* Continue suspending other DSP, even if one fails */ + if (ret) + pr_err("ACP DSP %d suspend failed\n", i); + } + } + return 0; +} + +static struct device *get_mfd_cell_dev(const char *device_name, int r) +{ + char auto_dev_name[25]; + char buf[8]; + struct device *dev; + + sprintf(buf, ".%d.auto", r); + strcpy(auto_dev_name, device_name); + strcat(auto_dev_name, buf); + dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name); + dev_info(dev, "device %s added to pm domain\n", auto_dev_name); + + return dev; +} + +/** + * acp_hw_init - start and test ACP block + * + * @adev: amdgpu_device pointer + * + */ +static int acp_hw_init(void *handle) +{ + int r, i; + uint64_t acp_base; + struct device *dev; + struct i2s_platform_data *i2s_pdata; + + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + const struct amdgpu_ip_block_version *ip_version = + amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP); + + if (!ip_version) + return -EINVAL; + + r = amd_acp_hw_init(adev->acp.cgs_device, + ip_version->major, ip_version->minor); + /* -ENODEV means board uses AZ rather than ACP */ + if (r == -ENODEV) + return 0; + else if (r) + return r; + + r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO, + 0x5289, 0, &acp_base); + if (r == -ENODEV) + return 0; + else if (r) + return r; + + adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); + if (adev->acp.acp_genpd == NULL) + return -ENOMEM; + + adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; + adev->acp.acp_genpd->gpd.power_off = acp_poweroff; + adev->acp.acp_genpd->gpd.power_on = acp_poweron; + + + adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; + + pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); + + adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS, + GFP_KERNEL); + + if (adev->acp.acp_cell == NULL) + return -ENOMEM; + + adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL); + + if (adev->acp.acp_res == NULL) { + kfree(adev->acp.acp_cell); + return -ENOMEM; + } + + i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL); + if (i2s_pdata == NULL) { + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_cell); + return -ENOMEM; + } + + i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET; + i2s_pdata[0].cap = DWC_I2S_PLAY; + i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET; + i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET; + + i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET | + DW_I2S_QUIRK_COMP_PARAM1; + i2s_pdata[1].cap = DWC_I2S_RECORD; + i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000; + i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET; + i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET; + + adev->acp.acp_res[0].name = "acp2x_dma"; + adev->acp.acp_res[0].flags = IORESOURCE_MEM; + adev->acp.acp_res[0].start = acp_base; + adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END; + + adev->acp.acp_res[1].name = "acp2x_dw_i2s_play"; + adev->acp.acp_res[1].flags = IORESOURCE_MEM; + adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START; + adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END; + + adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap"; + adev->acp.acp_res[2].flags = IORESOURCE_MEM; + adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START; + adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END; + + adev->acp.acp_res[3].name = "acp2x_dma_irq"; + adev->acp.acp_res[3].flags = IORESOURCE_IRQ; + adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162); + adev->acp.acp_res[3].end = adev->acp.acp_res[3].start; + + adev->acp.acp_cell[0].name = "acp_audio_dma"; + adev->acp.acp_cell[0].num_resources = 4; + adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0]; + + adev->acp.acp_cell[1].name = "designware-i2s"; + adev->acp.acp_cell[1].num_resources = 1; + adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1]; + adev->acp.acp_cell[1].platform_data = &i2s_pdata[0]; + adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data); + + adev->acp.acp_cell[2].name = "designware-i2s"; + adev->acp.acp_cell[2].num_resources = 1; + adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2]; + adev->acp.acp_cell[2].platform_data = &i2s_pdata[1]; + adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data); + + r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, + ACP_DEVS); + if (r) + return r; + + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); + if (r) { + dev_err(dev, "Failed to add dev to genpd\n"); + return r; + } + } + + return 0; +} + +/** + * acp_hw_fini - stop the hardware block + * + * @adev: amdgpu_device pointer + * + */ +static int acp_hw_fini(void *handle) +{ + int i, ret; + struct device *dev; + + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev); + /* If removal fails, dont giveup and try rest */ + if (ret) + dev_err(dev, "remove dev from genpd failed\n"); + } + + mfd_remove_devices(adev->acp.parent); + kfree(adev->acp.acp_res); + kfree(adev->acp.acp_genpd); + kfree(adev->acp.acp_cell); + + return 0; +} + +static int acp_suspend(void *handle) +{ + return 0; +} + +static int acp_resume(void *handle) +{ + int i, ret; + struct acp_pm_domain *apd; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* SMU block will power on ACP irrespective of ACP runtime status. + * Power off explicitly based on genpd ACP runtime status so that ACP + * hw and ACP-genpd status are in sync. + * 'suspend_power_off' represents "Power status before system suspend" + */ + if (adev->acp.acp_genpd->gpd.suspend_power_off == true) { + apd = container_of(&adev->acp.acp_genpd->gpd, + struct acp_pm_domain, gpd); + + for (i = 4; i >= 0 ; i--) { + ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); + if (ret) + pr_err("ACP tile %d tile suspend failed\n", i); + } + } + return 0; +} + +static int acp_early_init(void *handle) +{ + return 0; +} + +static bool acp_is_idle(void *handle) +{ + return true; +} + +static int acp_wait_for_idle(void *handle) +{ + return 0; +} + +static int acp_soft_reset(void *handle) +{ + return 0; +} + +static void acp_print_status(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + dev_info(adev->dev, "ACP STATUS\n"); +} + +static int acp_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + return 0; +} + +static int acp_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs acp_ip_funcs = { + .early_init = acp_early_init, + .late_init = NULL, + .sw_init = acp_sw_init, + .sw_fini = acp_sw_fini, + .hw_init = acp_hw_init, + .hw_fini = acp_hw_fini, + .suspend = acp_suspend, + .resume = acp_resume, + .is_idle = acp_is_idle, + .wait_for_idle = acp_wait_for_idle, + .soft_reset = acp_soft_reset, + .print_status = acp_print_status, + .set_clockgating_state = acp_set_clockgating_state, + .set_powergating_state = acp_set_powergating_state, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h new file mode 100644 index 0000000..f6e32a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h @@ -0,0 +1,42 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: AMD + * + */ + +#ifndef __AMDGPU_ACP_H__ +#define __AMDGPU_ACP_H__ + +#include <linux/mfd/core.h> + +struct amdgpu_acp { + struct device *parent; + void *cgs_device; + struct amd_acp_private *private; + struct mfd_cell *acp_cell; + struct resource *acp_res; + struct acp_pm_domain *acp_genpd; +}; + +extern const struct amd_ip_funcs acp_ip_funcs; + +#endif /* __AMDGPU_ACP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 9416e0f..84b0ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, return -EINVAL; } +bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev) +{ + int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo); + u8 frev, crev; + u16 data_offset, size; + + if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size, + &frev, &crev, &data_offset)) + return true; + + return false; +} + void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock) { uint32_t bios_6_scratch; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 0ebb959..9e14420 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev, u8 module_index, struct atom_mc_reg_table *reg_table); +bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev); + void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock); void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev); void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index f82a2dd..90d6fc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -32,6 +32,9 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +#define AMDGPU_BO_LIST_MAX_PRIORITY 32u +#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1) + static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv, struct amdgpu_bo_list **result, int *id) @@ -90,6 +93,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, bool has_userptr = false; unsigned i; + int r; array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry)); if (!array) @@ -99,31 +103,34 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, for (i = 0; i < num_entries; ++i) { struct amdgpu_bo_list_entry *entry = &array[i]; struct drm_gem_object *gobj; + struct mm_struct *usermm; gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle); - if (!gobj) + if (!gobj) { + r = -ENOENT; goto error_free; + } entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); drm_gem_object_unreference_unlocked(gobj); - entry->priority = info[i].bo_priority; - entry->prefered_domains = entry->robj->initial_domain; - entry->allowed_domains = entry->prefered_domains; - if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) - entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; - if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) { + entry->priority = min(info[i].bo_priority, + AMDGPU_BO_LIST_MAX_PRIORITY); + usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm); + if (usermm) { + if (usermm != current->mm) { + r = -EPERM; + goto error_free; + } has_userptr = true; - entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; - entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; } entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; - if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) + if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS) gds_obj = entry->robj; - if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) + if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS) gws_obj = entry->robj; - if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA) + if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA) oa_obj = entry->robj; trace_amdgpu_bo_list_set(list, entry->robj); @@ -145,7 +152,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev, error_free: drm_free_large(array); - return -ENOENT; + return r; } struct amdgpu_bo_list * @@ -161,6 +168,36 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id) return result; } +void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, + struct list_head *validated) +{ + /* This is based on the bucket sort with O(n) time complexity. + * An item with priority "i" is added to bucket[i]. The lists are then + * concatenated in descending order. + */ + struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS]; + unsigned i; + + for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) + INIT_LIST_HEAD(&bucket[i]); + + /* Since buffers which appear sooner in the relocation list are + * likely to be used more often than buffers which appear later + * in the list, the sort mustn't change the ordering of buffers + * with the same priority, i.e. it must be stable. + */ + for (i = 0; i < list->num_entries; i++) { + unsigned priority = list->array[i].priority; + + list_add_tail(&list->array[i].tv.head, + &bucket[priority]); + } + + /* Connect the sorted buckets in the output list. */ + for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++) + list_splice(&bucket[i], validated); +} + void amdgpu_bo_list_put(struct amdgpu_bo_list *list) { mutex_unlock(&list->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b882e81..52c3eb9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -30,47 +30,6 @@ #include "amdgpu.h" #include "amdgpu_trace.h" -#define AMDGPU_CS_MAX_PRIORITY 32u -#define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1) - -/* This is based on the bucket sort with O(n) time complexity. - * An item with priority "i" is added to bucket[i]. The lists are then - * concatenated in descending order. - */ -struct amdgpu_cs_buckets { - struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; -}; - -static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) -{ - unsigned i; - - for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) - INIT_LIST_HEAD(&b->bucket[i]); -} - -static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b, - struct list_head *item, unsigned priority) -{ - /* Since buffers which appear sooner in the relocation list are - * likely to be used more often than buffers which appear later - * in the list, the sort mustn't change the ordering of buffers - * with the same priority, i.e. it must be stable. - */ - list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]); -} - -static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b, - struct list_head *out_list) -{ - unsigned i; - - /* Connect the sorted buckets in the output list. */ - for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) { - list_splice(&b->bucket[i], out_list); - } -} - int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, u32 ip_instance, u32 ring, struct amdgpu_ring **out_ring) @@ -128,6 +87,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, } static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, + struct amdgpu_user_fence *uf, struct drm_amdgpu_cs_chunk_fence *fence_data) { struct drm_gem_object *gobj; @@ -139,17 +99,15 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, if (gobj == NULL) return -EINVAL; - p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - p->uf.offset = fence_data->offset; + uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + uf->offset = fence_data->offset; - if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { + if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { drm_gem_object_unreference_unlocked(gobj); return -EINVAL; } - p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); - p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT; - p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + p->uf_entry.robj = amdgpu_bo_ref(uf->bo); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; @@ -160,11 +118,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned size; + struct amdgpu_user_fence uf = {}; + unsigned size, num_ibs = 0; int i; int ret; @@ -181,15 +140,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - /* get chunks */ - INIT_LIST_HEAD(&p->validated); chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_bo_list; + goto put_ctx; } p->nchunks = cs->in.num_chunks; @@ -197,7 +153,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_bo_list; + goto put_ctx; } for (i = 0; i < p->nchunks; i++) { @@ -217,7 +173,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; - p->chunks[i].user_ptr = cdata; p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); if (p->chunks[i].kdata == NULL) { @@ -233,7 +188,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) switch (p->chunks[i].chunk_id) { case AMDGPU_CHUNK_ID_IB: - p->num_ibs++; + ++num_ibs; break; case AMDGPU_CHUNK_ID_FENCE: @@ -243,7 +198,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_partial_kdata; } - ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); + ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); if (ret) goto free_partial_kdata; @@ -258,12 +213,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } } - - p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!p->ibs) { - ret = -ENOMEM; + ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); + if (ret) goto free_all_kdata; - } + + p->job->uf = uf; kfree(chunk_array); return 0; @@ -274,9 +228,7 @@ free_partial_kdata: for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); -put_bo_list: - if (p->bo_list) - amdgpu_bo_list_put(p->bo_list); +put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); @@ -336,80 +288,76 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) return max(bytes_moved_threshold, 1024*1024ull); } -int amdgpu_cs_list_validate(struct amdgpu_device *adev, - struct amdgpu_vm *vm, +int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { struct amdgpu_bo_list_entry *lobj; - struct amdgpu_bo *bo; - u64 bytes_moved = 0, initial_bytes_moved; - u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); + u64 initial_bytes_moved; int r; list_for_each_entry(lobj, validated, tv.head) { - bo = lobj->robj; - if (!bo->pin_count) { - u32 domain = lobj->prefered_domains; - u32 current_domain = - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this buffer will be moved and don't move it - * if we have moved too many buffers for this IB already. - * - * Note that this allows moving at least one buffer of - * any size, because it doesn't take the current "bo" - * into account. We don't want to disallow buffer moves - * completely. - */ - if ((lobj->allowed_domains & current_domain) != 0 && - (domain & current_domain) == 0 && /* will be moved */ - bytes_moved > bytes_moved_threshold) { - /* don't move it */ - domain = current_domain; - } + struct amdgpu_bo *bo = lobj->robj; + struct mm_struct *usermm; + uint32_t domain; - retry: - amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - - if (unlikely(r)) { - if (r != -ERESTARTSYS && domain != lobj->allowed_domains) { - domain = lobj->allowed_domains; - goto retry; - } - return r; + usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); + if (usermm && usermm != current->mm) + return -EPERM; + + if (bo->pin_count) + continue; + + /* Avoid moving this one if we have moved too many buffers + * for this IB already. + * + * Note that this allows moving at least one buffer of + * any size, because it doesn't take the current "bo" + * into account. We don't want to disallow buffer moves + * completely. + */ + if (p->bytes_moved <= p->bytes_moved_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + + retry: + amdgpu_ttm_placement_from_domain(bo, domain); + initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + initial_bytes_moved; + + if (unlikely(r)) { + if (r != -ERESTARTSYS && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; } + return r; } - lobj->bo_va = amdgpu_vm_bo_find(vm, bo); } return 0; } -static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) +static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_cs_buckets buckets; struct list_head duplicates; bool need_mmap_lock = false; - int i, r; + int r; + INIT_LIST_HEAD(&p->validated); + + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); if (p->bo_list) { need_mmap_lock = p->bo_list->has_userptr; - amdgpu_cs_buckets_init(&buckets); - for (i = 0; i < p->bo_list->num_entries; i++) - amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head, - p->bo_list->array[i].priority); - - amdgpu_cs_buckets_get_list(&buckets, &p->validated); + amdgpu_bo_list_get_list(p->bo_list, &p->validated); } INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf.bo) + if (p->job->uf.bo) list_add(&p->uf_entry.tv.head, &p->validated); if (need_mmap_lock) @@ -421,11 +369,27 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); - r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); + p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + p->bytes_moved = 0; + + r = amdgpu_cs_list_validate(p, &duplicates); + if (r) + goto error_validate; + + r = amdgpu_cs_list_validate(p, &p->validated); if (r) goto error_validate; - r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); + if (p->bo_list) { + struct amdgpu_vm *vm = &fpriv->vm; + unsigned i; + + for (i = 0; i < p->bo_list->num_entries; i++) { + struct amdgpu_bo *bo = p->bo_list->array[i].robj; + + p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); + } + } error_validate: if (r) { @@ -447,7 +411,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); if (r) return r; @@ -510,11 +474,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - if (parser->ibs) - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); - kfree(parser->ibs); - amdgpu_bo_unref(&parser->uf.bo); + if (parser->job) + amdgpu_job_free(parser->job); amdgpu_bo_unref(&parser->uf_entry.robj); } @@ -530,7 +491,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; - r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); + r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); if (r) return r; @@ -556,14 +517,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, return r; f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); + r = amdgpu_sync_fence(adev, &p->job->sync, f); if (r) return r; } } - r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); + r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ @@ -581,29 +542,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, } static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) + struct amdgpu_cs_parser *p) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = p->job->ring; int i, r; - if (parser->num_ibs == 0) - return 0; - /* Only for UVD/VCE VM emulation */ - for (i = 0; i < parser->num_ibs; i++) { - ring = parser->ibs[i].ring; - if (ring->funcs->parse_cs) { - r = amdgpu_ring_parse_cs(ring, parser, i); + if (ring->funcs->parse_cs) { + for (i = 0; i < p->job->num_ibs; i++) { + r = amdgpu_ring_parse_cs(ring, p, i); if (r) return r; } } - r = amdgpu_bo_vm_update_pte(parser, vm); + r = amdgpu_bo_vm_update_pte(p, vm); if (!r) - amdgpu_cs_sync_rings(parser); + amdgpu_cs_sync_rings(p); return r; } @@ -626,14 +583,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, int i, j; int r; - for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) { + for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct amdgpu_ring *ring; chunk = &parser->chunks[i]; - ib = &parser->ibs[j]; + ib = &parser->job->ibs[j]; chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) @@ -645,6 +602,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; + if (parser->job->ring && parser->job->ring != ring) + return -EINVAL; + + parser->job->ring = ring; + if (ring->funcs->parse_cs) { struct amdgpu_bo_va_mapping *m; struct amdgpu_bo *aobj = NULL; @@ -673,7 +635,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; kptr += chunk_ib->va_start - offset; - r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib); + r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -682,7 +644,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); } else { - r = amdgpu_ib_get(ring, vm, 0, ib); + r = amdgpu_ib_get(adev, vm, 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -697,15 +659,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - if (!parser->num_ibs) - return 0; - /* add GDS resources to first IB */ if (parser->bo_list) { struct amdgpu_bo *gds = parser->bo_list->gds_obj; struct amdgpu_bo *gws = parser->bo_list->gws_obj; struct amdgpu_bo *oa = parser->bo_list->oa_obj; - struct amdgpu_ib *ib = &parser->ibs[0]; + struct amdgpu_ib *ib = &parser->job->ibs[0]; if (gds) { ib->gds_base = amdgpu_bo_gpu_offset(gds); @@ -721,15 +680,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } } /* wrap the last IB with user fence */ - if (parser->uf.bo) { - struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; + if (parser->job->uf.bo) { + struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; /* UVD & VCE fw doesn't support user fences */ - if (ib->ring->type == AMDGPU_RING_TYPE_UVD || - ib->ring->type == AMDGPU_RING_TYPE_VCE) + if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || + parser->job->ring->type == AMDGPU_RING_TYPE_VCE) return -EINVAL; - ib->user = &parser->uf; + ib->user = &parser->job->uf; } return 0; @@ -739,14 +698,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_ib *ib; int i, j, r; - if (!p->num_ibs) - return 0; - - /* Add dependencies to first IB */ - ib = &p->ibs[0]; for (i = 0; i < p->nchunks; ++i) { struct drm_amdgpu_cs_chunk_dep *deps; struct amdgpu_cs_chunk *chunk; @@ -784,7 +737,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return r; } else if (fence) { - r = amdgpu_sync_fence(adev, &ib->sync, fence); + r = amdgpu_sync_fence(adev, &p->job->sync, + fence); fence_put(fence); amdgpu_ctx_put(ctx); if (r) @@ -796,15 +750,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_free_job(struct amdgpu_job *job) +static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { - int i; - if (job->ibs) - for (i = 0; i < job->num_ibs; i++) - amdgpu_ib_free(job->adev, &job->ibs[i]); - kfree(job->ibs); - if (job->uf.bo) - amdgpu_bo_unref(&job->uf.bo); + struct amdgpu_ring *ring = p->job->ring; + struct amd_sched_fence *fence; + struct amdgpu_job *job; + + job = p->job; + p->job = NULL; + + job->base.sched = &ring->sched; + job->base.s_entity = &p->ctx->rings[ring->idx].entity; + job->owner = p->filp; + + fence = amd_sched_fence_create(job->base.s_entity, p->filp); + if (!fence) { + amdgpu_job_free(job); + return -ENOMEM; + } + + job->base.s_fence = fence; + p->fence = fence_get(&fence->base); + + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, + &fence->base); + job->ibs[job->num_ibs - 1].sequence = cs->out.handle; + + trace_amdgpu_cs_ioctl(job); + amd_sched_entity_push_job(&job->base); + return 0; } @@ -829,7 +804,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_handle_lockup(adev, r); return r; } - r = amdgpu_cs_parser_relocs(&parser); + r = amdgpu_cs_parser_bos(&parser, data); if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r && r != -ERESTARTSYS) @@ -848,68 +823,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; - for (i = 0; i < parser.num_ibs; i++) + for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); r = amdgpu_cs_ib_vm_chunk(adev, &parser); if (r) goto out; - if (amdgpu_enable_scheduler && parser.num_ibs) { - struct amdgpu_ring * ring = parser.ibs->ring; - struct amd_sched_fence *fence; - struct amdgpu_job *job; - - job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); - if (!job) { - r = -ENOMEM; - goto out; - } - - job->base.sched = &ring->sched; - job->base.s_entity = &parser.ctx->rings[ring->idx].entity; - job->adev = parser.adev; - job->owner = parser.filp; - job->free_job = amdgpu_cs_free_job; - - job->ibs = parser.ibs; - job->num_ibs = parser.num_ibs; - parser.ibs = NULL; - parser.num_ibs = 0; - - if (job->ibs[job->num_ibs - 1].user) { - job->uf = parser.uf; - job->ibs[job->num_ibs - 1].user = &job->uf; - parser.uf.bo = NULL; - } - - fence = amd_sched_fence_create(job->base.s_entity, - parser.filp); - if (!fence) { - r = -ENOMEM; - amdgpu_cs_free_job(job); - kfree(job); - goto out; - } - job->base.s_fence = fence; - parser.fence = fence_get(&fence->base); - - cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, - &fence->base); - job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - - trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(&job->base); - - } else { - struct amdgpu_fence *fence; - - r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, - parser.filp); - fence = parser.ibs[parser.num_ibs - 1].fence; - parser.fence = fence_get(&fence->base); - cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; - } + r = amdgpu_cs_submit(&parser, cs); out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -980,30 +901,36 @@ struct amdgpu_bo_va_mapping * amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo) { - struct amdgpu_bo_list_entry *reloc; struct amdgpu_bo_va_mapping *mapping; + unsigned i; + + if (!parser->bo_list) + return NULL; addr /= AMDGPU_GPU_PAGE_SIZE; - list_for_each_entry(reloc, &parser->validated, tv.head) { - if (!reloc->bo_va) + for (i = 0; i < parser->bo_list->num_entries; i++) { + struct amdgpu_bo_list_entry *lobj; + + lobj = &parser->bo_list->array[i]; + if (!lobj->bo_va) continue; - list_for_each_entry(mapping, &reloc->bo_va->valids, list) { + list_for_each_entry(mapping, &lobj->bo_va->valids, list) { if (mapping->it.start > addr || addr > mapping->it.last) continue; - *bo = reloc->bo_va->bo; + *bo = lobj->bo_va->bo; return mapping; } - list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { + list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { if (mapping->it.start > addr || addr > mapping->it.last) continue; - *bo = reloc->bo_va->bo; + *bo = lobj->bo_va->bo; return mapping; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 17d1fb1..17e1362 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -25,8 +25,7 @@ #include <drm/drmP.h> #include "amdgpu.h" -int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, - struct amdgpu_ctx *ctx) +static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) { unsigned i, j; int r; @@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri, ctx->adev = adev; kref_init(&ctx->refcount); spin_lock_init(&ctx->ring_lock); - ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs * - AMDGPU_MAX_RINGS, GFP_KERNEL); + ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, + sizeof(struct fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; - ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) * - amdgpu_sched_jobs * i; + ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; } - if (amdgpu_enable_scheduler) { - /* create context entity for each ring */ - for (i = 0; i < adev->num_rings; i++) { - struct amd_sched_rq *rq; - if (pri >= AMD_SCHED_MAX_PRIORITY) { - kfree(ctx->fences); - return -EINVAL; - } - rq = &adev->rings[i]->sched.sched_rq[pri]; - r = amd_sched_entity_init(&adev->rings[i]->sched, - &ctx->rings[i].entity, - rq, amdgpu_sched_jobs); - if (r) - break; - } - - if (i < adev->num_rings) { - for (j = 0; j < i; j++) - amd_sched_entity_fini(&adev->rings[j]->sched, - &ctx->rings[j].entity); - kfree(ctx->fences); - return r; - } + /* create context entity for each ring */ + for (i = 0; i < adev->num_rings; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + struct amd_sched_rq *rq; + + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity, + rq, amdgpu_sched_jobs); + if (r) + break; + } + + if (i < adev->num_rings) { + for (j = 0; j < i; j++) + amd_sched_entity_fini(&adev->rings[j]->sched, + &ctx->rings[j].entity); + kfree(ctx->fences); + return r; } return 0; } -void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) +static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) { struct amdgpu_device *adev = ctx->adev; unsigned i, j; @@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) fence_put(ctx->rings[i].fences[j]); kfree(ctx->fences); - if (amdgpu_enable_scheduler) { - for (i = 0; i < adev->num_rings; i++) - amd_sched_entity_fini(&adev->rings[i]->sched, - &ctx->rings[i].entity); - } + for (i = 0; i < adev->num_rings; i++) + amd_sched_entity_fini(&adev->rings[i]->sched, + &ctx->rings[i].entity); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, @@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return r; } *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx); + r = amdgpu_ctx_init(adev, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, id = args->in.ctx_id; switch (args->in.op) { - case AMDGPU_CTX_OP_ALLOC_CTX: - r = amdgpu_ctx_alloc(adev, fpriv, &id); - args->out.alloc.ctx_id = id; - break; - case AMDGPU_CTX_OP_FREE_CTX: - r = amdgpu_ctx_free(fpriv, id); - break; - case AMDGPU_CTX_OP_QUERY_STATE: - r = amdgpu_ctx_query(adev, fpriv, id, &args->out); - break; - default: - return -EINVAL; + case AMDGPU_CTX_OP_ALLOC_CTX: + r = amdgpu_ctx_alloc(adev, fpriv, &id); + args->out.alloc.ctx_id = id; + break; + case AMDGPU_CTX_OP_FREE_CTX: + r = amdgpu_ctx_free(fpriv, id); + break; + case AMDGPU_CTX_OP_QUERY_STATE: + r = amdgpu_ctx_query(adev, fpriv, id, &args->out); + break; + default: + return -EINVAL; } return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 6553146..db20d27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -636,31 +636,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev) } /** - * amdgpu_boot_test_post_card - check and possibly initialize the hw - * - * @adev: amdgpu_device pointer - * - * Check if the asic is initialized and if not, attempt to initialize - * it (all asics). - * Returns true if initialized or false if not. - */ -bool amdgpu_boot_test_post_card(struct amdgpu_device *adev) -{ - if (amdgpu_card_posted(adev)) - return true; - - if (adev->bios) { - DRM_INFO("GPU not posted. posting now...\n"); - if (adev->is_atom_bios) - amdgpu_atom_asic_init(adev->mode_info.atom_context); - return true; - } else { - dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); - return false; - } -} - -/** * amdgpu_dummy_page_init - init dummy page used by the driver * * @adev: amdgpu_device pointer @@ -959,12 +934,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev) amdgpu_sched_jobs); amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); } - /* vramlimit must be a power of two */ - if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) { - dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n", - amdgpu_vram_limit); - amdgpu_vram_limit = 0; - } if (amdgpu_gart_size != -1) { /* gtt size must be power of two and greater or equal to 32M */ @@ -1434,7 +1403,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; - adev->vm_manager.vm_pte_funcs_ring = NULL; + adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS); @@ -1455,9 +1424,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* mutex initialization are all done here so we * can recall function without having locking issues */ - mutex_init(&adev->ring_lock); + mutex_init(&adev->vm_manager.lock); atomic_set(&adev->irq.ih.lock, 0); - mutex_init(&adev->gem.mutex); mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); @@ -1531,8 +1499,13 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } + /* See if the asic supports SR-IOV */ + adev->virtualization.supports_sr_iov = + amdgpu_atombios_has_gpu_virtualization_table(adev); + /* Post card if necessary */ - if (!amdgpu_card_posted(adev)) { + if (!amdgpu_card_posted(adev) || + adev->virtualization.supports_sr_iov) { if (!adev->bios) { dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n"); return -EINVAL; @@ -1577,11 +1550,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, return r; } - r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx); - if (r) { - dev_err(adev->dev, "failed to create kernel context (%d).\n", r); - return r; - } r = amdgpu_ib_ring_tests(adev); if (r) DRM_ERROR("ib ring test failed (%d).\n", r); @@ -1645,7 +1613,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) adev->shutdown = true; /* evict vram memory */ amdgpu_bo_evict_vram(adev); - amdgpu_ctx_fini(&adev->kernel_ctx); amdgpu_ib_pool_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); @@ -1889,6 +1856,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev) retry: r = amdgpu_asic_reset(adev); + /* post card */ + amdgpu_atom_asic_init(adev->mode_info.atom_context); + if (!r) { dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); r = amdgpu_resume(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index acd066d0..2cb53c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -35,32 +35,30 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_edid.h> -static void amdgpu_flip_wait_fence(struct amdgpu_device *adev, - struct fence **f) +static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb) { - struct amdgpu_fence *fence; - long r; + struct amdgpu_flip_work *work = + container_of(cb, struct amdgpu_flip_work, cb); - if (*f == NULL) - return; + fence_put(f); + schedule_work(&work->flip_work); +} - fence = to_amdgpu_fence(*f); - if (fence) { - r = fence_wait(&fence->base, false); - if (r == -EDEADLK) - r = amdgpu_gpu_reset(adev); - } else - r = fence_wait(*f, false); +static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work, + struct fence **f) +{ + struct fence *fence= *f; - if (r) - DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r); + if (fence == NULL) + return false; - /* We continue with the page flip even if we failed to wait on - * the fence, otherwise the DRM core and userspace will be - * confused about which BO the CRTC is scanning out - */ - fence_put(*f); *f = NULL; + + if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback)) + return true; + + fence_put(*f); + return false; } static void amdgpu_flip_work_func(struct work_struct *__work) @@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work) int vpos, hpos, stat, min_udelay; struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id]; - amdgpu_flip_wait_fence(adev, &work->excl); + if (amdgpu_flip_handle_fence(work, &work->excl)) + return; + for (i = 0; i < work->shared_count; ++i) - amdgpu_flip_wait_fence(adev, &work->shared[i]); + if (amdgpu_flip_handle_fence(work, &work->shared[i])) + return; /* We borrow the event spin lock for protecting flip_status */ spin_lock_irqsave(&crtc->dev->event_lock, flags); @@ -118,12 +119,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work) spin_lock_irqsave(&crtc->dev->event_lock, flags); }; - /* do the flip (mmio) */ - adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); /* set the flip status */ amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED; - spin_unlock_irqrestore(&crtc->dev->event_lock, flags); + + /* Do the flip (mmio) */ + adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base); } /* @@ -242,7 +243,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc, /* update crtc fb */ crtc->primary->fb = fb; spin_unlock_irqrestore(&crtc->dev->event_lock, flags); - queue_work(amdgpu_crtc->pflip_queue, &work->flip_work); + amdgpu_flip_work_func(&work->flip_work); return 0; vblank_cleanup: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 9c1af89..ce79a8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -69,7 +69,6 @@ int amdgpu_dpm = -1; int amdgpu_smc_load_fw = 1; int amdgpu_aspm = -1; int amdgpu_runtime_pm = -1; -int amdgpu_hard_reset = 0; unsigned amdgpu_ip_block_mask = 0xffffffff; int amdgpu_bapm = -1; int amdgpu_deep_color = 0; @@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1; int amdgpu_vm_fault_stop = 0; int amdgpu_vm_debug = 0; int amdgpu_exp_hw_support = 0; -int amdgpu_enable_scheduler = 1; int amdgpu_sched_jobs = 32; int amdgpu_sched_hw_submission = 2; -int amdgpu_enable_semaphores = 0; int amdgpu_powerplay = -1; MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); @@ -126,9 +123,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444); MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)"); module_param_named(runpm, amdgpu_runtime_pm, int, 0444); -MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))"); -module_param_named(hard_reset, amdgpu_hard_reset, int, 0444); - MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); @@ -153,18 +147,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644); MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); -MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)"); -module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444); - MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)"); module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444); MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)"); module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444); -MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))"); -module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644); - #ifdef CONFIG_DRM_AMD_POWERPLAY MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))"); module_param_named(powerplay, amdgpu_powerplay, int, 0444); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 3671f9f..97db196 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -107,7 +107,7 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner, if ((*fence) == NULL) { return -ENOMEM; } - (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx]; + (*fence)->seq = ++ring->fence_drv.sync_seq; (*fence)->ring = ring; (*fence)->owner = owner; fence_init(&(*fence)->base, &amdgpu_fence_ops, @@ -171,7 +171,7 @@ static bool amdgpu_fence_activity(struct amdgpu_ring *ring) */ last_seq = atomic64_read(&ring->fence_drv.last_seq); do { - last_emitted = ring->fence_drv.sync_seq[ring->idx]; + last_emitted = ring->fence_drv.sync_seq; seq = amdgpu_fence_read(ring); seq |= last_seq & 0xffffffff00000000LL; if (seq < last_seq) { @@ -260,34 +260,28 @@ static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq) } /* - * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal + * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal * @ring: ring to wait on for the seq number * @seq: seq number wait for * * return value: * 0: seq signaled, and gpu not hang - * -EDEADL: GPU hang detected * -EINVAL: some paramter is not valid */ static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq) { - bool signaled = false; - BUG_ON(!ring); - if (seq > ring->fence_drv.sync_seq[ring->idx]) + if (seq > ring->fence_drv.sync_seq) return -EINVAL; if (atomic64_read(&ring->fence_drv.last_seq) >= seq) return 0; amdgpu_fence_schedule_fallback(ring); - wait_event(ring->fence_drv.fence_queue, ( - (signaled = amdgpu_fence_seq_signaled(ring, seq)))); + wait_event(ring->fence_drv.fence_queue, + amdgpu_fence_seq_signaled(ring, seq)); - if (signaled) - return 0; - else - return -EDEADLK; + return 0; } /** @@ -304,7 +298,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) { uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL; - if (seq >= ring->fence_drv.sync_seq[ring->idx]) + if (seq >= ring->fence_drv.sync_seq) return -ENOENT; return amdgpu_fence_ring_wait_seq(ring, seq); @@ -322,7 +316,7 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring) */ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) { - uint64_t seq = ring->fence_drv.sync_seq[ring->idx]; + uint64_t seq = ring->fence_drv.sync_seq; if (!seq) return 0; @@ -347,7 +341,7 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) * but it's ok to report slightly wrong fence count here. */ amdgpu_fence_process(ring); - emitted = ring->fence_drv.sync_seq[ring->idx] + emitted = ring->fence_drv.sync_seq - atomic64_read(&ring->fence_drv.last_seq); /* to avoid 32bits warp around */ if (emitted > 0x10000000) @@ -357,68 +351,6 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring) } /** - * amdgpu_fence_need_sync - do we need a semaphore - * - * @fence: amdgpu fence object - * @dst_ring: which ring to check against - * - * Check if the fence needs to be synced against another ring - * (all asics). If so, we need to emit a semaphore. - * Returns true if we need to sync with another ring, false if - * not. - */ -bool amdgpu_fence_need_sync(struct amdgpu_fence *fence, - struct amdgpu_ring *dst_ring) -{ - struct amdgpu_fence_driver *fdrv; - - if (!fence) - return false; - - if (fence->ring == dst_ring) - return false; - - /* we are protected by the ring mutex */ - fdrv = &dst_ring->fence_drv; - if (fence->seq <= fdrv->sync_seq[fence->ring->idx]) - return false; - - return true; -} - -/** - * amdgpu_fence_note_sync - record the sync point - * - * @fence: amdgpu fence object - * @dst_ring: which ring to check against - * - * Note the sequence number at which point the fence will - * be synced with the requested ring (all asics). - */ -void amdgpu_fence_note_sync(struct amdgpu_fence *fence, - struct amdgpu_ring *dst_ring) -{ - struct amdgpu_fence_driver *dst, *src; - unsigned i; - - if (!fence) - return; - - if (fence->ring == dst_ring) - return; - - /* we are protected by the ring mutex */ - src = &fence->ring->fence_drv; - dst = &dst_ring->fence_drv; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - if (i == dst_ring->idx) - continue; - - dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]); - } -} - -/** * amdgpu_fence_driver_start_ring - make the fence driver * ready for use on the requested ring. * @@ -471,13 +403,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, */ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) { - int i, r; + long timeout; + int r; ring->fence_drv.cpu_addr = NULL; ring->fence_drv.gpu_addr = 0; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - ring->fence_drv.sync_seq[i] = 0; - + ring->fence_drv.sync_seq = 0; atomic64_set(&ring->fence_drv.last_seq, 0); ring->fence_drv.initialized = false; @@ -486,26 +417,24 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring) init_waitqueue_head(&ring->fence_drv.fence_queue); - if (amdgpu_enable_scheduler) { - long timeout = msecs_to_jiffies(amdgpu_lockup_timeout); - if (timeout == 0) { - /* - * FIXME: - * Delayed workqueue cannot use it directly, - * so the scheduler will not use delayed workqueue if - * MAX_SCHEDULE_TIMEOUT is set. - * Currently keep it simple and silly. - */ - timeout = MAX_SCHEDULE_TIMEOUT; - } - r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, - amdgpu_sched_hw_submission, - timeout, ring->name); - if (r) { - DRM_ERROR("Failed to create scheduler on ring %s.\n", - ring->name); - return r; - } + timeout = msecs_to_jiffies(amdgpu_lockup_timeout); + if (timeout == 0) { + /* + * FIXME: + * Delayed workqueue cannot use it directly, + * so the scheduler will not use delayed workqueue if + * MAX_SCHEDULE_TIMEOUT is set. + * Currently keep it simple and silly. + */ + timeout = MAX_SCHEDULE_TIMEOUT; + } + r = amd_sched_init(&ring->sched, &amdgpu_sched_ops, + amdgpu_sched_hw_submission, + timeout, ring->name); + if (r) { + DRM_ERROR("Failed to create scheduler on ring %s.\n", + ring->name); + return r; } return 0; @@ -552,7 +481,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) if (atomic_dec_and_test(&amdgpu_fence_slab_ref)) kmem_cache_destroy(amdgpu_fence_slab); - mutex_lock(&adev->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; @@ -570,7 +498,6 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev) del_timer_sync(&ring->fence_drv.fallback_timer); ring->fence_drv.initialized = false; } - mutex_unlock(&adev->ring_lock); } /** @@ -585,7 +512,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) { int i, r; - mutex_lock(&adev->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->fence_drv.initialized) @@ -602,7 +528,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev) amdgpu_irq_put(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); } - mutex_unlock(&adev->ring_lock); } /** @@ -621,7 +546,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) { int i; - mutex_lock(&adev->ring_lock); for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; if (!ring || !ring->fence_drv.initialized) @@ -631,7 +555,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev) amdgpu_irq_get(adev, ring->fence_drv.irq_src, ring->fence_drv.irq_type); } - mutex_unlock(&adev->ring_lock); } /** @@ -651,7 +574,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev) if (!ring || !ring->fence_drv.initialized) continue; - amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]); + amdgpu_fence_write(ring, ring->fence_drv.sync_seq); } } @@ -781,7 +704,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; - int i, j; + int i; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -794,28 +717,38 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) seq_printf(m, "Last signaled fence 0x%016llx\n", (unsigned long long)atomic64_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%016llx\n", - ring->fence_drv.sync_seq[i]); - - for (j = 0; j < AMDGPU_MAX_RINGS; ++j) { - struct amdgpu_ring *other = adev->rings[j]; - if (i != j && other && other->fence_drv.initialized && - ring->fence_drv.sync_seq[j]) - seq_printf(m, "Last sync to ring %d 0x%016llx\n", - j, ring->fence_drv.sync_seq[j]); - } + ring->fence_drv.sync_seq); } return 0; } +/** + * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset + * + * Manually trigger a gpu reset at the next fence wait. + */ +static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data) +{ + struct drm_info_node *node = (struct drm_info_node *) m->private; + struct drm_device *dev = node->minor->dev; + struct amdgpu_device *adev = dev->dev_private; + + seq_printf(m, "gpu reset\n"); + amdgpu_gpu_reset(adev); + + return 0; +} + static struct drm_info_list amdgpu_debugfs_fence_list[] = { {"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL}, + {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL} }; #endif int amdgpu_debugfs_fence_init(struct amdgpu_device *adev) { #if defined(CONFIG_DEBUG_FS) - return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1); + return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2); #else return 0; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 7380f78..2e26a51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -83,24 +83,32 @@ retry: return r; } *obj = &robj->gem_base; - robj->pid = task_pid_nr(current); - - mutex_lock(&adev->gem.mutex); - list_add_tail(&robj->list, &adev->gem.objects); - mutex_unlock(&adev->gem.mutex); return 0; } -int amdgpu_gem_init(struct amdgpu_device *adev) +void amdgpu_gem_force_release(struct amdgpu_device *adev) { - INIT_LIST_HEAD(&adev->gem.objects); - return 0; -} + struct drm_device *ddev = adev->ddev; + struct drm_file *file; -void amdgpu_gem_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_force_delete(adev); + mutex_lock(&ddev->struct_mutex); + + list_for_each_entry(file, &ddev->filelist, lhead) { + struct drm_gem_object *gobj; + int handle; + + WARN_ONCE(1, "Still active user space clients!\n"); + spin_lock(&file->table_lock); + idr_for_each_entry(&file->object_idr, gobj, handle) { + WARN_ONCE(1, "And also active allocations!\n"); + drm_gem_object_unreference(gobj); + } + idr_destroy(&file->object_idr); + spin_unlock(&file->table_lock); + } + + mutex_unlock(&ddev->struct_mutex); } /* @@ -252,6 +260,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, goto handle_lockup; bo = gem_to_amdgpu_bo(gobj); + bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags); if (r) goto release_object; @@ -308,7 +318,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp, return -ENOENT; } robj = gem_to_amdgpu_bo(gobj); - if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) || + if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) || (robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) { drm_gem_object_unreference_unlocked(gobj); return -EPERM; @@ -628,7 +638,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, info.bo_size = robj->gem_base.size; info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT; - info.domains = robj->initial_domain; + info.domains = robj->prefered_domains; info.domain_flags = robj->flags; amdgpu_bo_unreserve(robj); if (copy_to_user(out, &info, sizeof(info))) @@ -636,14 +646,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, break; } case AMDGPU_GEM_OP_SET_PLACEMENT: - if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) { + if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) { r = -EPERM; amdgpu_bo_unreserve(robj); break; } - robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU); + robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU); + robj->allowed_domains = robj->prefered_domains; + if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) + robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; + amdgpu_bo_unreserve(robj); break; default: @@ -688,38 +702,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv, } #if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data) +{ + struct drm_gem_object *gobj = ptr; + struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); + struct seq_file *m = data; + + unsigned domain; + const char *placement; + unsigned pin_count; + + domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); + switch (domain) { + case AMDGPU_GEM_DOMAIN_VRAM: + placement = "VRAM"; + break; + case AMDGPU_GEM_DOMAIN_GTT: + placement = " GTT"; + break; + case AMDGPU_GEM_DOMAIN_CPU: + default: + placement = " CPU"; + break; + } + seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx", + id, amdgpu_bo_size(bo), placement, + amdgpu_bo_gpu_offset(bo)); + + pin_count = ACCESS_ONCE(bo->pin_count); + if (pin_count) + seq_printf(m, " pin count %d", pin_count); + seq_printf(m, "\n"); + + return 0; +} + static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; struct drm_device *dev = node->minor->dev; - struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_bo *rbo; - unsigned i = 0; + struct drm_file *file; + int r; - mutex_lock(&adev->gem.mutex); - list_for_each_entry(rbo, &adev->gem.objects, list) { - unsigned domain; - const char *placement; + r = mutex_lock_interruptible(&dev->struct_mutex); + if (r) + return r; - domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type); - switch (domain) { - case AMDGPU_GEM_DOMAIN_VRAM: - placement = "VRAM"; - break; - case AMDGPU_GEM_DOMAIN_GTT: - placement = " GTT"; - break; - case AMDGPU_GEM_DOMAIN_CPU: - default: - placement = " CPU"; - break; - } - seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n", - i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20, - placement, (unsigned long)rbo->pid); - i++; + list_for_each_entry(file, &dev->filelist, lhead) { + struct task_struct *task; + + /* + * Although we have a valid reference on file->pid, that does + * not guarantee that the task_struct who called get_pid() is + * still alive (e.g. get_pid(current) => fork() => exit()). + * Therefore, we need to protect this ->comm access using RCU. + */ + rcu_read_lock(); + task = pid_task(file->pid, PIDTYPE_PID); + seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid), + task ? task->comm : "<unknown>"); + rcu_read_unlock(); + + spin_lock(&file->table_lock); + idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m); + spin_unlock(&file->table_lock); } - mutex_unlock(&adev->gem.mutex); + + mutex_unlock(&dev->struct_mutex); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 9e25eda..b5bdd5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev); * suballocator. * Returns 0 on success, error on failure. */ -int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, +int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib) { - struct amdgpu_device *adev = ring->adev; int r; if (size) { @@ -75,9 +74,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } - amdgpu_sync_create(&ib->sync); - - ib->ring = ring; ib->vm = vm; return 0; @@ -93,7 +89,6 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, */ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) { - amdgpu_sync_free(adev, &ib->sync, &ib->fence->base); amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base); if (ib->fence) fence_put(&ib->fence->base); @@ -106,6 +101,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) * @num_ibs: number of IBs to schedule * @ibs: IB objects to schedule * @owner: owner for creating the fences + * @f: fence created during this submission * * Schedule an IB on the associated ring (all asics). * Returns 0 on success, error on failure. @@ -120,11 +116,13 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib) * a CONST_IB), it will be put on the ring prior to the DE IB. Prior * to SI there was just a DE IB. */ -int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, - struct amdgpu_ib *ibs, void *owner) +int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, + struct amdgpu_ib *ibs, void *owner, + struct fence *last_vm_update, + struct fence **f) { + struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; - struct amdgpu_ring *ring; struct amdgpu_ctx *ctx, *old_ctx; struct amdgpu_vm *vm; unsigned i; @@ -133,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, if (num_ibs == 0) return -EINVAL; - ring = ibs->ring; ctx = ibs->ctx; vm = ibs->vm; @@ -141,36 +138,21 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, dev_err(adev->dev, "couldn't schedule ib\n"); return -EINVAL; } - r = amdgpu_sync_wait(&ibs->sync); - if (r) { - dev_err(adev->dev, "IB sync failed (%d).\n", r); - return r; - } - r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs); - if (r) { - dev_err(adev->dev, "scheduling IB failed (%d).\n", r); - return r; - } - if (vm) { - /* grab a vm id if necessary */ - r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync); - if (r) { - amdgpu_ring_unlock_undo(ring); - return r; - } + if (vm && !ibs->grabbed_vmid) { + dev_err(adev->dev, "VM IB without ID\n"); + return -EINVAL; } - r = amdgpu_sync_rings(&ibs->sync, ring); + r = amdgpu_ring_alloc(ring, 256 * num_ibs); if (r) { - amdgpu_ring_unlock_undo(ring); - dev_err(adev->dev, "failed to sync rings (%d)\n", r); + dev_err(adev->dev, "scheduling IB failed (%d).\n", r); return r; } if (vm) { /* do context switch */ - amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update); + amdgpu_vm_flush(ring, vm, last_vm_update); if (ring->funcs->emit_gds_switch) amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id, @@ -186,9 +168,9 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) { + if (ib->ctx != ctx || ib->vm != vm) { ring->current_ctx = old_ctx; - amdgpu_ring_unlock_undo(ring); + amdgpu_ring_undo(ring); return -EINVAL; } amdgpu_ring_emit_ib(ring, ib); @@ -199,14 +181,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, if (r) { dev_err(adev->dev, "failed to emit fence (%d)\n", r); ring->current_ctx = old_ctx; - amdgpu_ring_unlock_undo(ring); + amdgpu_ring_undo(ring); return r; } - if (!amdgpu_enable_scheduler && ib->ctx) - ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring, - &ib->fence->base); - /* wrap the last IB with fence */ if (ib->user) { uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo); @@ -215,10 +193,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs, AMDGPU_FENCE_FLAG_64BIT); } - if (ib->vm) - amdgpu_vm_fence(adev, ib->vm, &ib->fence->base); + if (f) + *f = fence_get(&ib->fence->base); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c new file mode 100644 index 0000000..f29bbb9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -0,0 +1,159 @@ +/* + * Copyright 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include <linux/kthread.h> +#include <linux/wait.h> +#include <linux/sched.h> +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_trace.h" + +int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, + struct amdgpu_job **job) +{ + size_t size = sizeof(struct amdgpu_job); + + if (num_ibs == 0) + return -EINVAL; + + size += sizeof(struct amdgpu_ib) * num_ibs; + + *job = kzalloc(size, GFP_KERNEL); + if (!*job) + return -ENOMEM; + + (*job)->adev = adev; + (*job)->ibs = (void *)&(*job)[1]; + (*job)->num_ibs = num_ibs; + + amdgpu_sync_create(&(*job)->sync); + + return 0; +} + +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, + struct amdgpu_job **job) +{ + int r; + + r = amdgpu_job_alloc(adev, 1, job); + if (r) + return r; + + r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); + if (r) + kfree(*job); + + return r; +} + +void amdgpu_job_free(struct amdgpu_job *job) +{ + unsigned i; + + for (i = 0; i < job->num_ibs; ++i) + amdgpu_ib_free(job->adev, &job->ibs[i]); + + amdgpu_bo_unref(&job->uf.bo); + amdgpu_sync_free(&job->sync); + kfree(job); +} + +int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, + struct amd_sched_entity *entity, void *owner, + struct fence **f) +{ + job->ring = ring; + job->base.sched = &ring->sched; + job->base.s_entity = entity; + job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); + if (!job->base.s_fence) + return -ENOMEM; + + *f = fence_get(&job->base.s_fence->base); + + job->owner = owner; + amd_sched_entity_push_job(&job->base); + + return 0; +} + +static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) +{ + struct amdgpu_job *job = to_amdgpu_job(sched_job); + struct amdgpu_vm *vm = job->ibs->vm; + + struct fence *fence = amdgpu_sync_get_fence(&job->sync); + + if (fence == NULL && vm && !job->ibs->grabbed_vmid) { + struct amdgpu_ring *ring = job->ring; + int r; + + r = amdgpu_vm_grab_id(vm, ring, &job->sync, + &job->base.s_fence->base); + if (r) + DRM_ERROR("Error getting VM ID (%d)\n", r); + else + job->ibs->grabbed_vmid = true; + + fence = amdgpu_sync_get_fence(&job->sync); + } + + return fence; +} + +static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job) +{ + struct fence *fence = NULL; + struct amdgpu_job *job; + int r; + + if (!sched_job) { + DRM_ERROR("job is null\n"); + return NULL; + } + job = to_amdgpu_job(sched_job); + + r = amdgpu_sync_wait(&job->sync); + if (r) { + DRM_ERROR("failed to sync wait (%d)\n", r); + return NULL; + } + + trace_amdgpu_sched_run_job(job); + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner, + job->sync.last_vm_update, &fence); + if (r) { + DRM_ERROR("Error scheduling IBs (%d)\n", r); + goto err; + } + +err: + amdgpu_job_free(job); + return fence; +} + +struct amd_sched_backend_ops amdgpu_sched_ops = { + .dependency = amdgpu_job_dependency, + .run_job = amdgpu_job_run, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index e23843f..7805a87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file dev_info.max_memory_clock = adev->pm.default_mclk * 10; } dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask; - dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se * - adev->gfx.config.max_shader_engines; + dev_info.num_rb_pipes = adev->gfx.config.num_rbs; dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts; dev_info._pad = 0; dev_info.ids_flags = 0; @@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe, /* Get associated drm_crtc: */ crtc = &adev->mode_info.crtcs[pipe]->base; + if (!crtc) { + /* This can occur on driver load if some component fails to + * initialize completely and driver is unloaded */ + DRM_ERROR("Uninitialized crtc %d\n", pipe); + return -EINVAL; + } /* Helper routine in DRM core does all the work: */ return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index b1969f2..d7ec9bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -48,8 +48,7 @@ struct amdgpu_mn { /* protected by adev->mn_lock */ struct hlist_node node; - /* objects protected by lock */ - struct mutex lock; + /* objects protected by mm->mmap_sem */ struct rb_root objects; }; @@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work) struct amdgpu_bo *bo, *next_bo; mutex_lock(&adev->mn_lock); - mutex_lock(&rmn->lock); + down_write(&rmn->mm->mmap_sem); hash_del(&rmn->node); rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects, it.rb) { - - interval_tree_remove(&node->it, &rmn->objects); list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) { bo->mn = NULL; list_del_init(&bo->mn_list); } kfree(node); } - mutex_unlock(&rmn->lock); + up_write(&rmn->mm->mmap_sem); mutex_unlock(&adev->mn_lock); - mmu_notifier_unregister(&rmn->mn, rmn->mm); + mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm); kfree(rmn); } @@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, /* notification is exclusive, but interval is inclusive */ end -= 1; - mutex_lock(&rmn->lock); - it = interval_tree_iter_first(&rmn->objects, start, end); while (it) { struct amdgpu_mn_node *node; @@ -142,7 +137,8 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, list_for_each_entry(bo, &node->bos, mn_list) { - if (!bo->tbo.ttm || bo->tbo.ttm->state != tt_bound) + if (!amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, start, + end)) continue; r = amdgpu_bo_reserve(bo, true); @@ -164,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn, amdgpu_bo_unreserve(bo); } } - - mutex_unlock(&rmn->lock); } static const struct mmu_notifier_ops amdgpu_mn_ops = { @@ -186,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) struct amdgpu_mn *rmn; int r; - down_write(&mm->mmap_sem); mutex_lock(&adev->mn_lock); + down_write(&mm->mmap_sem); hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm) if (rmn->mm == mm) @@ -202,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) rmn->adev = adev; rmn->mm = mm; rmn->mn.ops = &amdgpu_mn_ops; - mutex_init(&rmn->lock); rmn->objects = RB_ROOT; r = __mmu_notifier_register(&rmn->mn, mm); @@ -212,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev) hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm); release_locks: - mutex_unlock(&adev->mn_lock); up_write(&mm->mmap_sem); + mutex_unlock(&adev->mn_lock); return rmn; free_rmn: - mutex_unlock(&adev->mn_lock); up_write(&mm->mmap_sem); + mutex_unlock(&adev->mn_lock); kfree(rmn); return ERR_PTR(r); @@ -249,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) INIT_LIST_HEAD(&bos); - mutex_lock(&rmn->lock); + down_write(&rmn->mm->mmap_sem); while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) { kfree(node); @@ -263,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) if (!node) { node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL); if (!node) { - mutex_unlock(&rmn->lock); + up_write(&rmn->mm->mmap_sem); return -ENOMEM; } } @@ -278,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) interval_tree_insert(&node->it, &rmn->objects); - mutex_unlock(&rmn->lock); + up_write(&rmn->mm->mmap_sem); return 0; } @@ -297,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) struct list_head *head; mutex_lock(&adev->mn_lock); + rmn = bo->mn; if (rmn == NULL) { mutex_unlock(&adev->mn_lock); return; } - mutex_lock(&rmn->lock); + down_write(&rmn->mm->mmap_sem); + /* save the next list entry for later */ head = bo->mn_list.next; @@ -317,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) kfree(node); } - mutex_unlock(&rmn->lock); + up_write(&rmn->mm->mmap_sem); mutex_unlock(&adev->mn_lock); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index fdc1be8..8d432e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -390,7 +390,6 @@ struct amdgpu_crtc { struct drm_display_mode native_mode; u32 pll_id; /* page flipping */ - struct workqueue_struct *pflip_queue; struct amdgpu_flip_work *pflip_works; enum amdgpu_flip_status pflip_status; int deferred_flip_completion; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b8fbbd7..9a025a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL); - mutex_lock(&bo->adev->gem.mutex); - list_del_init(&bo->list); - mutex_unlock(&bo->adev->gem.mutex); drm_gem_object_release(&bo->gem_base); amdgpu_bo_unref(&bo->parent); kfree(bo->metadata); @@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->adev = adev; INIT_LIST_HEAD(&bo->list); INIT_LIST_HEAD(&bo->va); - bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT | - AMDGPU_GEM_DOMAIN_CPU | - AMDGPU_GEM_DOMAIN_GDS | - AMDGPU_GEM_DOMAIN_GWS | - AMDGPU_GEM_DOMAIN_OA); + bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT | + AMDGPU_GEM_DOMAIN_CPU | + AMDGPU_GEM_DOMAIN_GDS | + AMDGPU_GEM_DOMAIN_GWS | + AMDGPU_GEM_DOMAIN_OA); + bo->allowed_domains = bo->prefered_domains; + if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM) + bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; bo->flags = flags; @@ -367,7 +367,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, int r, i; unsigned fpfn, lpfn; - if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return -EPERM; if (WARN_ON_ONCE(min_offset > max_offset)) @@ -470,26 +470,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev) return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); } -void amdgpu_bo_force_delete(struct amdgpu_device *adev) -{ - struct amdgpu_bo *bo, *n; - - if (list_empty(&adev->gem.objects)) { - return; - } - dev_err(adev->dev, "Userspace still has active objects !\n"); - list_for_each_entry_safe(bo, n, &adev->gem.objects, list) { - dev_err(adev->dev, "%p %p %lu %lu force free\n", - &bo->gem_base, bo, (unsigned long)bo->gem_base.size, - *((unsigned long *)&bo->gem_base.refcount)); - mutex_lock(&bo->adev->gem.mutex); - list_del_init(&bo->list); - mutex_unlock(&bo->adev->gem.mutex); - /* this should unref the ttm bo */ - drm_gem_object_unreference_unlocked(&bo->gem_base); - } -} - int amdgpu_bo_init(struct amdgpu_device *adev) { /* Add an MTRR for the VRAM */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 5107fb2..acc0801 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr); int amdgpu_bo_unpin(struct amdgpu_bo *bo); int amdgpu_bo_evict_vram(struct amdgpu_device *adev); -void amdgpu_bo_force_delete(struct amdgpu_device *adev); int amdgpu_bo_init(struct amdgpu_device *adev); void amdgpu_bo_fini(struct amdgpu_device *adev); int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7d8d84e..d77b2bd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -119,7 +119,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, level = amdgpu_dpm_get_performance_level(adev); return snprintf(buf, PAGE_SIZE, "%s\n", (level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" : - (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high"); + (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : + (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" : + (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown"); } else { enum amdgpu_dpm_forced_level level; @@ -146,6 +148,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev, level = AMDGPU_DPM_FORCED_LEVEL_HIGH; } else if (strncmp("auto", buf, strlen("auto")) == 0) { level = AMDGPU_DPM_FORCED_LEVEL_AUTO; + } else if (strncmp("manual", buf, strlen("manual")) == 0) { + level = AMDGPU_DPM_FORCED_LEVEL_MANUAL; } else { count = -EINVAL; goto fail; @@ -172,10 +176,293 @@ fail: return count; } +static ssize_t amdgpu_get_pp_num_states(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct pp_states_info data; + int i, buf_len; + + if (adev->pp_enabled) + amdgpu_dpm_get_pp_num_states(adev, &data); + + buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums); + for (i = 0; i < data.nums; i++) + buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i, + (data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" : + (data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" : + (data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" : + (data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default"); + + return buf_len; +} + +static ssize_t amdgpu_get_pp_cur_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct pp_states_info data; + enum amd_pm_state_type pm = 0; + int i = 0; + + if (adev->pp_enabled) { + + pm = amdgpu_dpm_get_current_power_state(adev); + amdgpu_dpm_get_pp_num_states(adev, &data); + + for (i = 0; i < data.nums; i++) { + if (pm == data.states[i]) + break; + } + + if (i == data.nums) + i = -EINVAL; + } + + return snprintf(buf, PAGE_SIZE, "%d\n", i); +} + +static ssize_t amdgpu_get_pp_force_state(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + struct pp_states_info data; + enum amd_pm_state_type pm = 0; + int i; + + if (adev->pp_force_state_enabled && adev->pp_enabled) { + pm = amdgpu_dpm_get_current_power_state(adev); + amdgpu_dpm_get_pp_num_states(adev, &data); + + for (i = 0; i < data.nums; i++) { + if (pm == data.states[i]) + break; + } + + if (i == data.nums) + i = -EINVAL; + + return snprintf(buf, PAGE_SIZE, "%d\n", i); + + } else + return snprintf(buf, PAGE_SIZE, "\n"); +} + +static ssize_t amdgpu_set_pp_force_state(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + enum amd_pm_state_type state = 0; + long idx; + int ret; + + if (strlen(buf) == 1) + adev->pp_force_state_enabled = false; + else { + ret = kstrtol(buf, 0, &idx); + + if (ret) { + count = -EINVAL; + goto fail; + } + + if (adev->pp_enabled) { + struct pp_states_info data; + amdgpu_dpm_get_pp_num_states(adev, &data); + state = data.states[idx]; + /* only set user selected power states */ + if (state != POWER_STATE_TYPE_INTERNAL_BOOT && + state != POWER_STATE_TYPE_DEFAULT) { + amdgpu_dpm_dispatch_task(adev, + AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL); + adev->pp_force_state_enabled = true; + } + } + } +fail: + return count; +} + +static ssize_t amdgpu_get_pp_table(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + char *table = NULL; + int size, i; + + if (adev->pp_enabled) + size = amdgpu_dpm_get_pp_table(adev, &table); + else + return 0; + + if (size >= PAGE_SIZE) + size = PAGE_SIZE - 1; + + for (i = 0; i < size; i++) { + sprintf(buf + i, "%02x", table[i]); + } + sprintf(buf + i, "\n"); + + return size; +} + +static ssize_t amdgpu_set_pp_table(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + + if (adev->pp_enabled) + amdgpu_dpm_set_pp_table(adev, buf, count); + + return count; +} + +static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + ssize_t size = 0; + + if (adev->pp_enabled) + size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf); + + return size; +} + +static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + long level; + + ret = kstrtol(buf, 0, &level); + + if (ret) { + count = -EINVAL; + goto fail; + } + + if (adev->pp_enabled) + amdgpu_dpm_force_clock_level(adev, PP_SCLK, level); +fail: + return count; +} + +static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + ssize_t size = 0; + + if (adev->pp_enabled) + size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf); + + return size; +} + +static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + long level; + + ret = kstrtol(buf, 0, &level); + + if (ret) { + count = -EINVAL; + goto fail; + } + + if (adev->pp_enabled) + amdgpu_dpm_force_clock_level(adev, PP_MCLK, level); +fail: + return count; +} + +static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + ssize_t size = 0; + + if (adev->pp_enabled) + size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf); + + return size; +} + +static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = ddev->dev_private; + int ret; + long level; + + ret = kstrtol(buf, 0, &level); + + if (ret) { + count = -EINVAL; + goto fail; + } + + if (adev->pp_enabled) + amdgpu_dpm_force_clock_level(adev, PP_PCIE, level); +fail: + return count; +} + static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR, amdgpu_get_dpm_forced_performance_level, amdgpu_set_dpm_forced_performance_level); +static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL); +static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL); +static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR, + amdgpu_get_pp_force_state, + amdgpu_set_pp_force_state); +static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR, + amdgpu_get_pp_table, + amdgpu_set_pp_table); +static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_sclk, + amdgpu_set_pp_dpm_sclk); +static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_mclk, + amdgpu_set_pp_dpm_mclk); +static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR, + amdgpu_get_pp_dpm_pcie, + amdgpu_set_pp_dpm_pcie); static ssize_t amdgpu_hwmon_show_temp(struct device *dev, struct device_attribute *attr, @@ -623,14 +910,12 @@ force: amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps); } - mutex_lock(&adev->ring_lock); - /* update whether vce is active */ ps->vce_active = adev->pm.dpm.vce_active; ret = amdgpu_dpm_pre_set_power_state(adev); if (ret) - goto done; + return; /* update display watermarks based on new power state */ amdgpu_display_bandwidth_update(adev); @@ -667,9 +952,6 @@ force: amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level); } } - -done: - mutex_unlock(&adev->ring_lock); } void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) @@ -770,6 +1052,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev) DRM_ERROR("failed to create device file for dpm state\n"); return ret; } + + if (adev->pp_enabled) { + ret = device_create_file(adev->dev, &dev_attr_pp_num_states); + if (ret) { + DRM_ERROR("failed to create device file pp_num_states\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_cur_state); + if (ret) { + DRM_ERROR("failed to create device file pp_cur_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_force_state); + if (ret) { + DRM_ERROR("failed to create device file pp_force_state\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_table); + if (ret) { + DRM_ERROR("failed to create device file pp_table\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_sclk\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_mclk\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie); + if (ret) { + DRM_ERROR("failed to create device file pp_dpm_pcie\n"); + return ret; + } + } ret = amdgpu_debugfs_pm_init(adev); if (ret) { DRM_ERROR("Failed to register debugfs file for dpm!\n"); @@ -787,6 +1107,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev) hwmon_device_unregister(adev->pm.int_hwmon_dev); device_remove_file(adev->dev, &dev_attr_power_dpm_state); device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level); + if (adev->pp_enabled) { + device_remove_file(adev->dev, &dev_attr_pp_num_states); + device_remove_file(adev->dev, &dev_attr_pp_cur_state); + device_remove_file(adev->dev, &dev_attr_pp_force_state); + device_remove_file(adev->dev, &dev_attr_pp_table); + device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk); + device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk); + device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie); + } } void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) @@ -802,13 +1131,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) int i = 0; amdgpu_display_bandwidth_update(adev); - mutex_lock(&adev->ring_lock); - for (i = 0; i < AMDGPU_MAX_RINGS; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->ready) - amdgpu_fence_wait_empty(ring); - } - mutex_unlock(&adev->ring_lock); + for (i = 0; i < AMDGPU_MAX_RINGS; i++) { + struct amdgpu_ring *ring = adev->rings[i]; + if (ring && ring->ready) + amdgpu_fence_wait_empty(ring); + } amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL); } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 59f735a..be6388f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); - mutex_lock(&adev->gem.mutex); - list_add_tail(&bo->list, &adev->gem.objects); - mutex_unlock(&adev->gem.mutex); - return &bo->gem_base; } @@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, { struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj); - if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm)) + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) return ERR_PTR(-EPERM); return drm_gem_prime_export(dev, gobj, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d1f234d..56c07e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -49,28 +49,6 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring); /** - * amdgpu_ring_free_size - update the free size - * - * @adev: amdgpu_device pointer - * @ring: amdgpu_ring structure holding ring information - * - * Update the free dw slots in the ring buffer (all asics). - */ -void amdgpu_ring_free_size(struct amdgpu_ring *ring) -{ - uint32_t rptr = amdgpu_ring_get_rptr(ring); - - /* This works because ring_size is a power of 2 */ - ring->ring_free_dw = rptr + (ring->ring_size / 4); - ring->ring_free_dw -= ring->wptr; - ring->ring_free_dw &= ring->ptr_mask; - if (!ring->ring_free_dw) { - /* this is an empty ring */ - ring->ring_free_dw = ring->ring_size / 4; - } -} - -/** * amdgpu_ring_alloc - allocate space on the ring buffer * * @adev: amdgpu_device pointer @@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring) */ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw) { - int r; - - /* make sure we aren't trying to allocate more space than there is on the ring */ - if (ndw > (ring->ring_size / 4)) - return -ENOMEM; /* Align requested size with padding so unlock_commit can * pad safely */ - amdgpu_ring_free_size(ring); ndw = (ndw + ring->align_mask) & ~ring->align_mask; - while (ndw > (ring->ring_free_dw - 1)) { - amdgpu_ring_free_size(ring); - if (ndw < ring->ring_free_dw) { - break; - } - r = amdgpu_fence_wait_next(ring); - if (r) - return r; - } - ring->count_dw = ndw; - ring->wptr_old = ring->wptr; - return 0; -} -/** - * amdgpu_ring_lock - lock the ring and allocate space on it - * - * @adev: amdgpu_device pointer - * @ring: amdgpu_ring structure holding ring information - * @ndw: number of dwords to allocate in the ring buffer - * - * Lock the ring and allocate @ndw dwords in the ring buffer - * (all asics). - * Returns 0 on success, error on failure. - */ -int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) -{ - int r; + /* Make sure we aren't trying to allocate more space + * than the maximum for one submission + */ + if (WARN_ON_ONCE(ndw > ring->max_dw)) + return -ENOMEM; - mutex_lock(ring->ring_lock); - r = amdgpu_ring_alloc(ring, ndw); - if (r) { - mutex_unlock(ring->ring_lock); - return r; - } + ring->count_dw = ndw; + ring->wptr_old = ring->wptr; return 0; } @@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) amdgpu_ring_write(ring, ring->nop); } +/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets + * + * @ring: amdgpu_ring structure holding ring information + * @ib: IB to add NOP packets to + * + * This is the generic pad_ib function for rings except SDMA + */ +void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) +{ + while (ib->length_dw & ring->align_mask) + ib->ptr[ib->length_dw++] = ring->nop; +} + /** * amdgpu_ring_commit - tell the GPU to execute the new * commands on the ring buffer @@ -168,20 +127,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) } /** - * amdgpu_ring_unlock_commit - tell the GPU to execute the new - * commands on the ring buffer and unlock it - * - * @ring: amdgpu_ring structure holding ring information - * - * Call amdgpu_ring_commit() then unlock the ring (all asics). - */ -void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring) -{ - amdgpu_ring_commit(ring); - mutex_unlock(ring->ring_lock); -} - -/** * amdgpu_ring_undo - reset the wptr * * @ring: amdgpu_ring structure holding ring information @@ -194,19 +139,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** - * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring - * - * @ring: amdgpu_ring structure holding ring information - * - * Call amdgpu_ring_undo() then unlock the ring (all asics). - */ -void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring) -{ - amdgpu_ring_undo(ring); - mutex_unlock(ring->ring_lock); -} - -/** * amdgpu_ring_backup - Back up the content of a ring * * @ring: the ring we want to back up @@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring, { unsigned size, ptr, i; - /* just in case lock the ring */ - mutex_lock(ring->ring_lock); *data = NULL; - if (ring->ring_obj == NULL) { - mutex_unlock(ring->ring_lock); + if (ring->ring_obj == NULL) return 0; - } /* it doesn't make sense to save anything if all fences are signaled */ - if (!amdgpu_fence_count_emitted(ring)) { - mutex_unlock(ring->ring_lock); + if (!amdgpu_fence_count_emitted(ring)) return 0; - } ptr = le32_to_cpu(*ring->next_rptr_cpu_addr); size = ring->wptr + (ring->ring_size / 4); size -= ptr; size &= ring->ptr_mask; - if (size == 0) { - mutex_unlock(ring->ring_lock); + if (size == 0) return 0; - } /* and then save the content of the ring */ *data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL); - if (!*data) { - mutex_unlock(ring->ring_lock); + if (!*data) return 0; - } for (i = 0; i < size; ++i) { (*data)[i] = ring->ring[ptr++]; ptr &= ring->ptr_mask; } - mutex_unlock(ring->ring_lock); return size; } @@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, return 0; /* restore the saved ring content */ - r = amdgpu_ring_lock(ring, size); + r = amdgpu_ring_alloc(ring, size); if (r) return r; @@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring, amdgpu_ring_write(ring, data[i]); } - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); kfree(data); return 0; } @@ -352,7 +273,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, return r; } - ring->ring_lock = &adev->ring_lock; /* Align ring size */ rb_bufsz = order_base_2(ring_size / 8); ring_size = (1 << (rb_bufsz + 1)) * 4; @@ -389,7 +309,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } } ring->ptr_mask = (ring->ring_size / 4) - 1; - ring->ring_free_dw = ring->ring_size / 4; + ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4, + amdgpu_sched_hw_submission); if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); @@ -410,15 +331,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) int r; struct amdgpu_bo *ring_obj; - if (ring->ring_lock == NULL) - return; - - mutex_lock(ring->ring_lock); ring_obj = ring->ring_obj; ring->ready = false; ring->ring = NULL; ring->ring_obj = NULL; - mutex_unlock(ring->ring_lock); amdgpu_wb_free(ring->adev, ring->fence_offs); amdgpu_wb_free(ring->adev, ring->rptr_offs); @@ -474,29 +390,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset); uint32_t rptr, wptr, rptr_next; - unsigned count, i, j; - - amdgpu_ring_free_size(ring); - count = (ring->ring_size / 4) - ring->ring_free_dw; + unsigned i; wptr = amdgpu_ring_get_wptr(ring); - seq_printf(m, "wptr: 0x%08x [%5d]\n", - wptr, wptr); + seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr); rptr = amdgpu_ring_get_rptr(ring); - seq_printf(m, "rptr: 0x%08x [%5d]\n", - rptr, rptr); - rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr); + seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr); + seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n", ring->wptr, ring->wptr); - seq_printf(m, "last semaphore signal addr : 0x%016llx\n", - ring->last_semaphore_signal_addr); - seq_printf(m, "last semaphore wait addr : 0x%016llx\n", - ring->last_semaphore_wait_addr); - seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw); - seq_printf(m, "%u dwords in ring\n", count); if (!ring->ready) return 0; @@ -505,11 +410,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data) * packet that is the root issue */ i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask; - for (j = 0; j <= (count + 32); j++) { + while (i != rptr) { + seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); + if (i == rptr) + seq_puts(m, " *"); + if (i == rptr_next) + seq_puts(m, " #"); + seq_puts(m, "\n"); + i = (i + 1) & ring->ptr_mask; + } + while (i != wptr) { seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]); - if (rptr == i) + if (i == rptr) seq_puts(m, " *"); - if (rptr_next == i) + if (i == rptr_next) seq_puts(m, " #"); seq_puts(m, "\n"); i = (i + 1) & ring->ptr_mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 8b88edb..7d8f8f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -321,8 +321,11 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, int i, r; signed long t; - BUG_ON(align > sa_manager->align); - BUG_ON(size > sa_manager->size); + if (WARN_ON_ONCE(align > sa_manager->align)) + return -EINVAL; + + if (WARN_ON_ONCE(size > sa_manager->size)) + return -EINVAL; *sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL); if ((*sa_bo) == NULL) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c deleted file mode 100644 index 438c052..0000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright 2015 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * - */ -#include <linux/kthread.h> -#include <linux/wait.h> -#include <linux/sched.h> -#include <drm/drmP.h> -#include "amdgpu.h" -#include "amdgpu_trace.h" - -static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job) -{ - struct amdgpu_job *job = to_amdgpu_job(sched_job); - return amdgpu_sync_get_fence(&job->ibs->sync); -} - -static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job) -{ - struct amdgpu_fence *fence = NULL; - struct amdgpu_job *job; - int r; - - if (!sched_job) { - DRM_ERROR("job is null\n"); - return NULL; - } - job = to_amdgpu_job(sched_job); - trace_amdgpu_sched_run_job(job); - r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner); - if (r) { - DRM_ERROR("Error scheduling IBs (%d)\n", r); - goto err; - } - - fence = job->ibs[job->num_ibs - 1].fence; - fence_get(&fence->base); - -err: - if (job->free_job) - job->free_job(job); - - kfree(job); - return fence ? &fence->base : NULL; -} - -struct amd_sched_backend_ops amdgpu_sched_ops = { - .dependency = amdgpu_sched_dependency, - .run_job = amdgpu_sched_run_job, -}; - -int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_ib *ibs, - unsigned num_ibs, - int (*free_job)(struct amdgpu_job *), - void *owner, - struct fence **f) -{ - int r = 0; - if (amdgpu_enable_scheduler) { - struct amdgpu_job *job = - kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); - if (!job) - return -ENOMEM; - job->base.sched = &ring->sched; - job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity; - job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner); - if (!job->base.s_fence) { - kfree(job); - return -ENOMEM; - } - *f = fence_get(&job->base.s_fence->base); - - job->adev = adev; - job->ibs = ibs; - job->num_ibs = num_ibs; - job->owner = owner; - job->free_job = free_job; - amd_sched_entity_push_job(&job->base); - } else { - r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); - if (r) - return r; - *f = fence_get(&ibs[num_ibs - 1].fence->base); - } - - return 0; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c deleted file mode 100644 index 1caaf20..0000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright 2011 Christian König. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - */ -/* - * Authors: - * Christian König <deathsimple@vodafone.de> - */ -#include <drm/drmP.h> -#include "amdgpu.h" -#include "amdgpu_trace.h" - -int amdgpu_semaphore_create(struct amdgpu_device *adev, - struct amdgpu_semaphore **semaphore) -{ - int r; - - *semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL); - if (*semaphore == NULL) { - return -ENOMEM; - } - r = amdgpu_sa_bo_new(&adev->ring_tmp_bo, - &(*semaphore)->sa_bo, 8, 8); - if (r) { - kfree(*semaphore); - *semaphore = NULL; - return r; - } - (*semaphore)->waiters = 0; - (*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo); - - *((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0; - - return 0; -} - -bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore) -{ - trace_amdgpu_semaphore_signale(ring->idx, semaphore); - - if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) { - --semaphore->waiters; - - /* for debugging lockup only, used by sysfs debug files */ - ring->last_semaphore_signal_addr = semaphore->gpu_addr; - return true; - } - return false; -} - -bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore) -{ - trace_amdgpu_semaphore_wait(ring->idx, semaphore); - - if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) { - ++semaphore->waiters; - - /* for debugging lockup only, used by sysfs debug files */ - ring->last_semaphore_wait_addr = semaphore->gpu_addr; - return true; - } - return false; -} - -void amdgpu_semaphore_free(struct amdgpu_device *adev, - struct amdgpu_semaphore **semaphore, - struct fence *fence) -{ - if (semaphore == NULL || *semaphore == NULL) { - return; - } - if ((*semaphore)->waiters > 0) { - dev_err(adev->dev, "semaphore %p has more waiters than signalers," - " hardware lockup imminent!\n", *semaphore); - } - amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence); - kfree(*semaphore); - *semaphore = NULL; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 181ce39..c15be00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -46,14 +46,6 @@ struct amdgpu_sync_entry { */ void amdgpu_sync_create(struct amdgpu_sync *sync) { - unsigned i; - - for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) - sync->semaphores[i] = NULL; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - sync->sync_to[i] = NULL; - hash_init(sync->fences); sync->last_vm_update = NULL; } @@ -107,7 +99,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct fence *f) { struct amdgpu_sync_entry *e; - struct amdgpu_fence *fence; if (!f) return 0; @@ -116,27 +107,20 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM)) amdgpu_sync_keep_later(&sync->last_vm_update, f); - fence = to_amdgpu_fence(f); - if (!fence || fence->ring->adev != adev) { - hash_for_each_possible(sync->fences, e, node, f->context) { - if (unlikely(e->fence->context != f->context)) - continue; - - amdgpu_sync_keep_later(&e->fence, f); - return 0; - } - - e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); - if (!e) - return -ENOMEM; + hash_for_each_possible(sync->fences, e, node, f->context) { + if (unlikely(e->fence->context != f->context)) + continue; - hash_add(sync->fences, &e->node, f->context); - e->fence = fence_get(f); + amdgpu_sync_keep_later(&e->fence, f); return 0; } - amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f); + e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL); + if (!e) + return -ENOMEM; + hash_add(sync->fences, &e->node, f->context); + e->fence = fence_get(f); return 0; } @@ -153,13 +137,13 @@ static void *amdgpu_sync_get_owner(struct fence *f) } /** - * amdgpu_sync_resv - use the semaphores to sync to a reservation object + * amdgpu_sync_resv - sync to a reservation object * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence * @shared: true if we should only sync to the exclusive fence * - * Sync to the fence using the semaphore objects + * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, @@ -250,123 +234,17 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync) kfree(e); } - if (amdgpu_enable_semaphores) - return 0; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct fence *fence = sync->sync_to[i]; - if (!fence) - continue; - - r = fence_wait(fence, false); - if (r) - return r; - } - - return 0; -} - -/** - * amdgpu_sync_rings - sync ring to all registered fences - * - * @sync: sync object to use - * @ring: ring that needs sync - * - * Ensure that all registered fences are signaled before letting - * the ring continue. The caller must hold the ring lock. - */ -int amdgpu_sync_rings(struct amdgpu_sync *sync, - struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - unsigned count = 0; - int i, r; - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - struct amdgpu_ring *other = adev->rings[i]; - struct amdgpu_semaphore *semaphore; - struct amdgpu_fence *fence; - - if (!sync->sync_to[i]) - continue; - - fence = to_amdgpu_fence(sync->sync_to[i]); - - /* check if we really need to sync */ - if (!amdgpu_enable_scheduler && - !amdgpu_fence_need_sync(fence, ring)) - continue; - - /* prevent GPU deadlocks */ - if (!other->ready) { - dev_err(adev->dev, "Syncing to a disabled ring!"); - return -EINVAL; - } - - if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) { - r = fence_wait(sync->sync_to[i], true); - if (r) - return r; - continue; - } - - if (count >= AMDGPU_NUM_SYNCS) { - /* not enough room, wait manually */ - r = fence_wait(&fence->base, false); - if (r) - return r; - continue; - } - r = amdgpu_semaphore_create(adev, &semaphore); - if (r) - return r; - - sync->semaphores[count++] = semaphore; - - /* allocate enough space for sync command */ - r = amdgpu_ring_alloc(other, 16); - if (r) - return r; - - /* emit the signal semaphore */ - if (!amdgpu_semaphore_emit_signal(other, semaphore)) { - /* signaling wasn't successful wait manually */ - amdgpu_ring_undo(other); - r = fence_wait(&fence->base, false); - if (r) - return r; - continue; - } - - /* we assume caller has already allocated space on waiters ring */ - if (!amdgpu_semaphore_emit_wait(ring, semaphore)) { - /* waiting wasn't successful wait manually */ - amdgpu_ring_undo(other); - r = fence_wait(&fence->base, false); - if (r) - return r; - continue; - } - - amdgpu_ring_commit(other); - amdgpu_fence_note_sync(fence, ring); - } - return 0; } /** * amdgpu_sync_free - free the sync object * - * @adev: amdgpu_device pointer * @sync: sync object to use - * @fence: fence to use for the free * - * Free the sync object by freeing all semaphores in it. + * Free the sync object. */ -void amdgpu_sync_free(struct amdgpu_device *adev, - struct amdgpu_sync *sync, - struct fence *fence) +void amdgpu_sync_free(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; @@ -378,11 +256,5 @@ void amdgpu_sync_free(struct amdgpu_device *adev, kfree(e); } - for (i = 0; i < AMDGPU_NUM_SYNCS; ++i) - amdgpu_semaphore_free(adev, &sync->semaphores[i], fence); - - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) - fence_put(sync->sync_to[i]); - fence_put(sync->last_vm_update); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 4865615..05a53f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev) amdgpu_do_test_moves(adev); } -static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct fence **fence) -{ - uint32_t handle = ring->idx ^ 0xdeafbeef; - int r; - - if (ring == &adev->uvd.ring) { - r = amdgpu_uvd_get_create_msg(ring, handle, NULL); - if (r) { - DRM_ERROR("Failed to get dummy create msg\n"); - return r; - } - - r = amdgpu_uvd_get_destroy_msg(ring, handle, fence); - if (r) { - DRM_ERROR("Failed to get dummy destroy msg\n"); - return r; - } - - } else if (ring == &adev->vce.ring[0] || - ring == &adev->vce.ring[1]) { - r = amdgpu_vce_get_create_msg(ring, handle, NULL); - if (r) { - DRM_ERROR("Failed to get dummy create msg\n"); - return r; - } - - r = amdgpu_vce_get_destroy_msg(ring, handle, fence); - if (r) { - DRM_ERROR("Failed to get dummy destroy msg\n"); - return r; - } - } else { - struct amdgpu_fence *a_fence = NULL; - r = amdgpu_ring_lock(ring, 64); - if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ring->idx); - return r; - } - amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence); - amdgpu_ring_unlock_commit(ring); - *fence = &a_fence->base; - } - return 0; -} - void amdgpu_test_ring_sync(struct amdgpu_device *adev, struct amdgpu_ring *ringA, struct amdgpu_ring *ringB) { - struct fence *fence1 = NULL, *fence2 = NULL; - struct amdgpu_semaphore *semaphore = NULL; - int r; - - r = amdgpu_semaphore_create(adev, &semaphore); - if (r) { - DRM_ERROR("Failed to create semaphore\n"); - goto out_cleanup; - } - - r = amdgpu_ring_lock(ringA, 64); - if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); - goto out_cleanup; - } - amdgpu_semaphore_emit_wait(ringA, semaphore); - amdgpu_ring_unlock_commit(ringA); - - r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1); - if (r) - goto out_cleanup; - - r = amdgpu_ring_lock(ringA, 64); - if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); - goto out_cleanup; - } - amdgpu_semaphore_emit_wait(ringA, semaphore); - amdgpu_ring_unlock_commit(ringA); - - r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2); - if (r) - goto out_cleanup; - - mdelay(1000); - - if (fence_is_signaled(fence1)) { - DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n"); - goto out_cleanup; - } - - r = amdgpu_ring_lock(ringB, 64); - if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringB); - goto out_cleanup; - } - amdgpu_semaphore_emit_signal(ringB, semaphore); - amdgpu_ring_unlock_commit(ringB); - - r = fence_wait(fence1, false); - if (r) { - DRM_ERROR("Failed to wait for sync fence 1\n"); - goto out_cleanup; - } - - mdelay(1000); - - if (fence_is_signaled(fence2)) { - DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n"); - goto out_cleanup; - } - - r = amdgpu_ring_lock(ringB, 64); - if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringB); - goto out_cleanup; - } - amdgpu_semaphore_emit_signal(ringB, semaphore); - amdgpu_ring_unlock_commit(ringB); - - r = fence_wait(fence2, false); - if (r) { - DRM_ERROR("Failed to wait for sync fence 1\n"); - goto out_cleanup; - } - -out_cleanup: - amdgpu_semaphore_free(adev, &semaphore, NULL); - - if (fence1) - fence_put(fence1); - - if (fence2) - fence_put(fence2); - - if (r) - printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); } static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, @@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev, struct amdgpu_ring *ringB, struct amdgpu_ring *ringC) { - struct fence *fenceA = NULL, *fenceB = NULL; - struct amdgpu_semaphore *semaphore = NULL; - bool sigA, sigB; - int i, r; - - r = amdgpu_semaphore_create(adev, &semaphore); - if (r) { - DRM_ERROR("Failed to create semaphore\n"); - goto out_cleanup; - } - - r = amdgpu_ring_lock(ringA, 64); - if (r) { - DRM_ERROR("Failed to lock ring A %d\n", ringA->idx); - goto out_cleanup; - } - amdgpu_semaphore_emit_wait(ringA, semaphore); - amdgpu_ring_unlock_commit(ringA); - - r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA); - if (r) - goto out_cleanup; - - r = amdgpu_ring_lock(ringB, 64); - if (r) { - DRM_ERROR("Failed to lock ring B %d\n", ringB->idx); - goto out_cleanup; - } - amdgpu_semaphore_emit_wait(ringB, semaphore); - amdgpu_ring_unlock_commit(ringB); - r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB); - if (r) - goto out_cleanup; - - mdelay(1000); - - if (fence_is_signaled(fenceA)) { - DRM_ERROR("Fence A signaled without waiting for semaphore.\n"); - goto out_cleanup; - } - if (fence_is_signaled(fenceB)) { - DRM_ERROR("Fence B signaled without waiting for semaphore.\n"); - goto out_cleanup; - } - - r = amdgpu_ring_lock(ringC, 64); - if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); - goto out_cleanup; - } - amdgpu_semaphore_emit_signal(ringC, semaphore); - amdgpu_ring_unlock_commit(ringC); - - for (i = 0; i < 30; ++i) { - mdelay(100); - sigA = fence_is_signaled(fenceA); - sigB = fence_is_signaled(fenceB); - if (sigA || sigB) - break; - } - - if (!sigA && !sigB) { - DRM_ERROR("Neither fence A nor B has been signaled\n"); - goto out_cleanup; - } else if (sigA && sigB) { - DRM_ERROR("Both fence A and B has been signaled\n"); - goto out_cleanup; - } - - DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B'); - - r = amdgpu_ring_lock(ringC, 64); - if (r) { - DRM_ERROR("Failed to lock ring B %p\n", ringC); - goto out_cleanup; - } - amdgpu_semaphore_emit_signal(ringC, semaphore); - amdgpu_ring_unlock_commit(ringC); - - mdelay(1000); - - r = fence_wait(fenceA, false); - if (r) { - DRM_ERROR("Failed to wait for sync fence A\n"); - goto out_cleanup; - } - r = fence_wait(fenceB, false); - if (r) { - DRM_ERROR("Failed to wait for sync fence B\n"); - goto out_cleanup; - } - -out_cleanup: - amdgpu_semaphore_free(adev, &semaphore, NULL); - - if (fenceA) - fence_put(fenceA); - - if (fenceB) - fence_put(fenceB); - - if (r) - printk(KERN_WARNING "Error while testing ring sync (%d).\n", r); } static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 8f9834ab..9ca3735 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs, TP_fast_assign( __entry->bo_list = p->bo_list; - __entry->ring = p->ibs[i].ring->idx; - __entry->dw = p->ibs[i].length_dw; + __entry->ring = p->job->ring->idx; + __entry->dw = p->job->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( - p->ibs[i].ring); + p->job->ring); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, @@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl, __entry->sched_job = &job->base; __entry->ib = job->ibs; __entry->fence = &job->base.s_fence->base; - __entry->ring_name = job->ibs[0].ring->name; + __entry->ring_name = job->ring->name; __entry->num_ibs = job->num_ibs; ), TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", @@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job, __entry->sched_job = &job->base; __entry->ib = job->ibs; __entry->fence = &job->base.s_fence->base; - __entry->ring_name = job->ibs[0].ring->name; + __entry->ring_name = job->ring->name; __entry->num_ibs = job->num_ibs; ), TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u", @@ -100,18 +100,21 @@ TRACE_EVENT(amdgpu_sched_run_job, TRACE_EVENT(amdgpu_vm_grab_id, - TP_PROTO(unsigned vmid, int ring), - TP_ARGS(vmid, ring), + TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring), + TP_ARGS(vm, vmid, ring), TP_STRUCT__entry( + __field(struct amdgpu_vm *, vm) __field(u32, vmid) __field(u32, ring) ), TP_fast_assign( + __entry->vm = vm; __entry->vmid = vmid; __entry->ring = ring; ), - TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring) + TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid, + __entry->ring) ); TRACE_EVENT(amdgpu_vm_bo_map, @@ -247,42 +250,6 @@ TRACE_EVENT(amdgpu_bo_list_set, TP_printk("list=%p, bo=%p", __entry->list, __entry->bo) ); -DECLARE_EVENT_CLASS(amdgpu_semaphore_request, - - TP_PROTO(int ring, struct amdgpu_semaphore *sem), - - TP_ARGS(ring, sem), - - TP_STRUCT__entry( - __field(int, ring) - __field(signed, waiters) - __field(uint64_t, gpu_addr) - ), - - TP_fast_assign( - __entry->ring = ring; - __entry->waiters = sem->waiters; - __entry->gpu_addr = sem->gpu_addr; - ), - - TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring, - __entry->waiters, __entry->gpu_addr) -); - -DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale, - - TP_PROTO(int ring, struct amdgpu_semaphore *sem), - - TP_ARGS(ring, sem) -); - -DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait, - - TP_PROTO(int ring, struct amdgpu_semaphore *sem), - - TP_ARGS(ring, sem) -); - #endif /* This part must be outside protection */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 55cf05e..e52fc64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) static int amdgpu_ttm_global_init(struct amdgpu_device *adev) { struct drm_global_reference *global_ref; + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; int r; adev->mman.mem_global_referenced = false; @@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev) return r; } + ring = adev->mman.buffer_funcs_ring; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; + r = amd_sched_entity_init(&ring->sched, &adev->mman.entity, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up TTM BO move run queue.\n"); + drm_global_item_unref(&adev->mman.mem_global_ref); + drm_global_item_unref(&adev->mman.bo_global_ref.ref); + return r; + } + adev->mman.mem_global_referenced = true; + return 0; } static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) { if (adev->mman.mem_global_referenced) { + amd_sched_entity_fini(adev->mman.entity.sched, + &adev->mman.entity); drm_global_item_unref(&adev->mman.bo_global_ref.ref); drm_global_item_unref(&adev->mman.mem_global_ref); adev->mman.mem_global_referenced = false; @@ -499,9 +515,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) enum dma_data_direction direction = write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; - if (current->mm != gtt->usermm) - return -EPERM; - if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) { /* check that we only pin down anonymous memory to prevent problems with writeback */ @@ -773,14 +786,33 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr, return 0; } -bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm) +struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm) +{ + struct amdgpu_ttm_tt *gtt = (void *)ttm; + + if (gtt == NULL) + return NULL; + + return gtt->usermm; +} + +bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start, + unsigned long end) { struct amdgpu_ttm_tt *gtt = (void *)ttm; + unsigned long size; if (gtt == NULL) return false; - return !!gtt->userptr; + if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr) + return false; + + size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE; + if (gtt->userptr > end || gtt->userptr + size <= start) + return false; + + return true; } bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) @@ -996,9 +1028,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, struct fence **fence) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + uint32_t max_bytes; unsigned num_loops, num_dw; - struct amdgpu_ib *ib; unsigned i; int r; @@ -1010,20 +1043,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, while (num_dw & 0x7) num_dw++; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - - r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); - if (r) { - kfree(ib); + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job); + if (r) return r; - } - - ib->length_dw = 0; if (resv) { - r = amdgpu_sync_resv(adev, &ib->sync, resv, + r = amdgpu_sync_resv(adev, &job->sync, resv, AMDGPU_FENCE_OWNER_UNDEFINED); if (r) { DRM_ERROR("sync failed (%d).\n", r); @@ -1034,31 +1059,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, - cur_size_in_bytes); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset, + dst_offset, cur_size_in_bytes); src_offset += cur_size_in_bytes; dst_offset += cur_size_in_bytes; byte_count -= cur_size_in_bytes; } - amdgpu_vm_pad_ib(adev, ib); - WARN_ON(ib->length_dw > num_dw); - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vm_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED, - fence); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + r = amdgpu_job_submit(job, ring, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, fence); if (r) goto error_free; - if (!amdgpu_enable_scheduler) { - amdgpu_ib_free(adev, ib); - kfree(ib); - } return 0; + error_free: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 53f987a..1de82bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -91,6 +91,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work); int amdgpu_uvd_sw_init(struct amdgpu_device *adev) { + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; unsigned long bo_size; const char *fw_name; const struct common_firmware_header *hdr; @@ -191,6 +193,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) amdgpu_bo_unreserve(adev->uvd.vcpu_bo); + ring = &adev->uvd.ring; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up UVD run queue.\n"); + return r; + } + for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) { atomic_set(&adev->uvd.handles[i], 0); adev->uvd.filp[i] = NULL; @@ -210,6 +221,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev) if (adev->uvd.vcpu_bo == NULL) return 0; + amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity); + r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false); if (!r) { amdgpu_bo_kunmap(adev->uvd.vcpu_bo); @@ -241,7 +254,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev) amdgpu_uvd_note_usage(adev); - r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence); if (r) { DRM_ERROR("Error destroying UVD (%d)!\n", r); continue; @@ -295,7 +308,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp) amdgpu_uvd_note_usage(adev); - r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, handle, + false, &fence); if (r) { DRM_ERROR("Error destroying UVD (%d)!\n", r); continue; @@ -616,7 +630,6 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) { struct amdgpu_bo_va_mapping *mapping; struct amdgpu_bo *bo; - struct amdgpu_ib *ib; uint32_t cmd, lo, hi; uint64_t start, end; uint64_t addr; @@ -638,9 +651,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE; start += addr; - ib = &ctx->parser->ibs[ctx->ib_idx]; - ib->ptr[ctx->data0] = start & 0xFFFFFFFF; - ib->ptr[ctx->data1] = start >> 32; + amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0, + lower_32_bits(start)); + amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1, + upper_32_bits(start)); cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1; if (cmd < 0x4) { @@ -702,7 +716,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx) static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; + struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int i, r; ctx->idx++; @@ -748,7 +762,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx, static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx, int (*cb)(struct amdgpu_uvd_cs_ctx *ctx)) { - struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx]; + struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx]; int r; for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) { @@ -790,7 +804,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) [0x00000003] = 2048, [0x00000004] = 0xFFFFFFFF, }; - struct amdgpu_ib *ib = &parser->ibs[ib_idx]; + struct amdgpu_ib *ib = &parser->job->ibs[ib_idx]; int r; if (ib->length_dw % 16) { @@ -823,22 +837,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx) return 0; } -static int amdgpu_uvd_free_job( - struct amdgpu_job *job) -{ - amdgpu_ib_free(job->adev, job->ibs); - kfree(job->ibs); - return 0; -} - -static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct fence **fence) +static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo, + bool direct, struct fence **fence) { struct ttm_validate_buffer tv; struct ww_acquire_ctx ticket; struct list_head head; - struct amdgpu_ib *ib = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; struct fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t addr; @@ -862,15 +868,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); if (r) goto err; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) { - r = -ENOMEM; - goto err; - } - r = amdgpu_ib_get(ring, NULL, 64, ib); + + r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) - goto err1; + goto err; + ib = &job->ibs[0]; addr = amdgpu_bo_gpu_offset(bo); ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0); ib->ptr[1] = addr; @@ -882,12 +885,19 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, ib->ptr[i] = PACKET2(0); ib->length_dw = 16; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_uvd_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); - if (r) - goto err2; + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, + AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f); + if (r) + goto err_free; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &adev->uvd.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err_free; + } ttm_eu_fence_buffer_objects(&ticket, &head, f); @@ -895,16 +905,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, *fence = fence_get(f); amdgpu_bo_unref(&bo); fence_put(f); - if (amdgpu_enable_scheduler) - return 0; - amdgpu_ib_free(ring->adev, ib); - kfree(ib); return 0; -err2: - amdgpu_ib_free(ring->adev, ib); -err1: - kfree(ib); + +err_free: + amdgpu_job_free(job); + err: ttm_eu_backoff_reservation(&ticket, &head); return r; @@ -959,11 +965,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, amdgpu_bo_kunmap(bo); amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, fence); + return amdgpu_uvd_send_msg(ring, bo, true, fence); } int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence) + bool direct, struct fence **fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_bo *bo; @@ -1001,7 +1007,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, amdgpu_bo_kunmap(bo); amdgpu_bo_unreserve(bo); - return amdgpu_uvd_send_msg(ring, bo, fence); + return amdgpu_uvd_send_msg(ring, bo, direct, fence); } static void amdgpu_uvd_idle_work_handler(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index 1724c2c..9a3b449 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev); int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct fence **fence); int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence); + bool direct, struct fence **fence); void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index a745eee..39c3aa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -74,6 +74,8 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work); */ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) { + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; const char *fw_name; const struct common_firmware_header *hdr; unsigned ucode_version, version_major, version_minor, binary_id; @@ -170,6 +172,16 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return r; } + + ring = &adev->vce.ring[0]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + r = amd_sched_entity_init(&ring->sched, &adev->vce.entity, + rq, amdgpu_sched_jobs); + if (r != 0) { + DRM_ERROR("Failed setting up VCE run queue.\n"); + return r; + } + for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) { atomic_set(&adev->vce.handles[i], 0); adev->vce.filp[i] = NULL; @@ -190,6 +202,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev) if (adev->vce.vcpu_bo == NULL) return 0; + amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity); + amdgpu_bo_unref(&adev->vce.vcpu_bo); amdgpu_ring_fini(&adev->vce.ring[0]); @@ -337,7 +351,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) amdgpu_vce_note_usage(adev); - r = amdgpu_vce_get_destroy_msg(ring, handle, NULL); + r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL); if (r) DRM_ERROR("Error destroying VCE handle (%d)!\n", r); @@ -346,14 +360,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp) } } -static int amdgpu_vce_free_job( - struct amdgpu_job *job) -{ - amdgpu_ib_free(job->adev, job->ibs); - kfree(job->ibs); - return 0; -} - /** * amdgpu_vce_get_create_msg - generate a VCE create msg * @@ -368,21 +374,17 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct fence **fence) { const unsigned ib_size_dw = 1024; - struct amdgpu_ib *ib = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; struct fence *f = NULL; - struct amdgpu_device *adev = ring->adev; uint64_t dummy; int i, r; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); - kfree(ib); + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) return r; - } + + ib = &job->ibs[0]; dummy = ib->gpu_addr + 1024; @@ -423,20 +425,19 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vce_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err; + + amdgpu_job_free(job); if (fence) *fence = fence_get(f); fence_put(f); - if (amdgpu_enable_scheduler) - return 0; + return 0; + err: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } @@ -451,26 +452,20 @@ err: * Close up a stream for HW test or if userspace failed to do so */ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence) + bool direct, struct fence **fence) { const unsigned ib_size_dw = 1024; - struct amdgpu_ib *ib = NULL; + struct amdgpu_job *job; + struct amdgpu_ib *ib; struct fence *f = NULL; - struct amdgpu_device *adev = ring->adev; uint64_t dummy; int i, r; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - - r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib); - if (r) { - kfree(ib); - DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); + r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job); + if (r) return r; - } + ib = &job->ibs[0]; dummy = ib->gpu_addr + 1024; /* stitch together an VCE destroy msg */ @@ -490,20 +485,29 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vce_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); - if (r) - goto err; + + if (direct) { + r = amdgpu_ib_schedule(ring, 1, ib, + AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); + if (r) + goto err; + + amdgpu_job_free(job); + } else { + r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &f); + if (r) + goto err; + } + if (fence) *fence = fence_get(f); fence_put(f); - if (amdgpu_enable_scheduler) - return 0; + return 0; + err: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } @@ -521,7 +525,6 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, int lo, int hi, unsigned size, uint32_t index) { struct amdgpu_bo_va_mapping *mapping; - struct amdgpu_ib *ib = &p->ibs[ib_idx]; struct amdgpu_bo *bo; uint64_t addr; @@ -550,8 +553,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx, addr += amdgpu_bo_gpu_offset(bo); addr -= ((uint64_t)size) * ((uint64_t)index); - ib->ptr[lo] = addr & 0xFFFFFFFF; - ib->ptr[hi] = addr >> 32; + amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr)); + amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr)); return 0; } @@ -606,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p, */ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx) { - struct amdgpu_ib *ib = &p->ibs[ib_idx]; + struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; unsigned fb_idx = 0, bs_idx = 0; int session_idx = -1; bool destroyed = false; @@ -743,30 +746,6 @@ out: } /** - * amdgpu_vce_ring_emit_semaphore - emit a semaphore command - * - * @ring: engine to use - * @semaphore: address of semaphore - * @emit_wait: true=emit wait, false=emit signal - * - */ -bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE); - amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); - amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); - amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0)); - if (!emit_wait) - amdgpu_ring_write(ring, VCE_CMD_END); - - return true; -} - -/** * amdgpu_vce_ring_emit_ib - execute indirect buffer * * @ring: engine to use @@ -814,14 +793,14 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - r = amdgpu_ring_lock(ring, 16); + r = amdgpu_ring_alloc(ring, 16); if (r) { DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", ring->idx, r); return r; } amdgpu_ring_write(ring, VCE_CMD_END); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { if (amdgpu_ring_get_rptr(ring) != rptr) @@ -862,7 +841,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring) goto error; } - r = amdgpu_vce_get_destroy_msg(ring, 1, &fence); + r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index ba2da8e..ef99d23 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev); int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, struct fence **fence); int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - struct fence **fence); + bool direct, struct fence **fence); void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); -bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait); void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9599f75..264c596 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -55,7 +55,7 @@ * * @adev: amdgpu_device pointer * - * Calculate the number of page directory entries (cayman+). + * Calculate the number of page directory entries. */ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) { @@ -67,7 +67,7 @@ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev) * * @adev: amdgpu_device pointer * - * Calculate the size of the page directory in bytes (cayman+). + * Calculate the size of the page directory in bytes. */ static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev) { @@ -89,8 +89,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct amdgpu_bo_list_entry *entry) { entry->robj = vm->page_directory; - entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; - entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; entry->priority = 0; entry->tv.bo = &vm->page_directory->tbo; entry->tv.shared = true; @@ -154,29 +152,34 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev, * @vm: vm to allocate id for * @ring: ring we want to submit job to * @sync: sync object where we add dependencies + * @fence: fence protecting ID from reuse * * Allocate an id for the vm, adding fences to the sync obj as necessary. - * - * Global mutex must be locked! */ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, - struct amdgpu_sync *sync) + struct amdgpu_sync *sync, struct fence *fence) { - struct fence *best[AMDGPU_MAX_RINGS] = {}; struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx]; struct amdgpu_device *adev = ring->adev; + struct amdgpu_vm_manager_id *id; + int r; - unsigned choices[2] = {}; - unsigned i; + mutex_lock(&adev->vm_manager.lock); /* check if the id is still valid */ if (vm_id->id) { - unsigned id = vm_id->id; long owner; - owner = atomic_long_read(&adev->vm_manager.ids[id].owner); + id = &adev->vm_manager.ids[vm_id->id]; + owner = atomic_long_read(&id->owner); if (owner == (long)vm) { - trace_amdgpu_vm_grab_id(vm_id->id, ring->idx); + list_move_tail(&id->list, &adev->vm_manager.ids_lru); + trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); + + fence_put(id->active); + id->active = fence_get(fence); + + mutex_unlock(&adev->vm_manager.lock); return 0; } } @@ -184,41 +187,24 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* we definately need to flush */ vm_id->pd_gpu_addr = ~0ll; - /* skip over VMID 0, since it is the system VM */ - for (i = 1; i < adev->vm_manager.nvm; ++i) { - struct fence *fence = adev->vm_manager.ids[i].active; - struct amdgpu_ring *fring; - - if (fence == NULL) { - /* found a free one */ - vm_id->id = i; - trace_amdgpu_vm_grab_id(i, ring->idx); - return 0; - } - - fring = amdgpu_ring_from_fence(fence); - if (best[fring->idx] == NULL || - fence_is_later(best[fring->idx], fence)) { - best[fring->idx] = fence; - choices[fring == ring ? 0 : 1] = i; - } - } + id = list_first_entry(&adev->vm_manager.ids_lru, + struct amdgpu_vm_manager_id, + list); + list_move_tail(&id->list, &adev->vm_manager.ids_lru); + atomic_long_set(&id->owner, (long)vm); - for (i = 0; i < 2; ++i) { - if (choices[i]) { - struct fence *fence; + vm_id->id = id - adev->vm_manager.ids; + trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx); - fence = adev->vm_manager.ids[choices[i]].active; - vm_id->id = choices[i]; + r = amdgpu_sync_fence(ring->adev, sync, id->active); - trace_amdgpu_vm_grab_id(choices[i], ring->idx); - return amdgpu_sync_fence(ring->adev, sync, fence); - } + if (!r) { + fence_put(id->active); + id->active = fence_get(fence); } - /* should never happen */ - BUG(); - return -EINVAL; + mutex_unlock(&adev->vm_manager.lock); + return r; } /** @@ -228,9 +214,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, * @vm: vm we want to flush * @updates: last vm update that we waited for * - * Flush the vm (cayman+). - * - * Global and local mutex must be locked! + * Flush the vm. */ void amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_vm *vm, @@ -260,36 +244,12 @@ void amdgpu_vm_flush(struct amdgpu_ring *ring, } /** - * amdgpu_vm_fence - remember fence for vm - * - * @adev: amdgpu_device pointer - * @vm: vm we want to fence - * @fence: fence to remember - * - * Fence the vm (cayman+). - * Set the fence used to protect page table and id. - * - * Global and local mutex must be locked! - */ -void amdgpu_vm_fence(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct fence *fence) -{ - struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence); - unsigned vm_id = vm->ids[ring->idx].id; - - fence_put(adev->vm_manager.ids[vm_id].active); - adev->vm_manager.ids[vm_id].active = fence_get(fence); - atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm); -} - -/** * amdgpu_vm_bo_find - find the bo_va for a specific vm & bo * * @vm: requested vm * @bo: requested buffer object * - * Find @bo inside the requested vm (cayman+). + * Find @bo inside the requested vm. * Search inside the @bos vm list for the requested vm * Returns the found bo_va or NULL if none is found * @@ -312,32 +272,40 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * amdgpu_vm_update_pages - helper to call the right asic function * * @adev: amdgpu_device pointer + * @gtt: GART instance to use for mapping + * @gtt_flags: GTT hw access flags * @ib: indirect buffer to fill with commands * @pe: addr of the page entry * @addr: dst addr to write into pe * @count: number of page entries to update * @incr: increase next addr by incr bytes * @flags: hw access flags - * @gtt_flags: GTT hw access flags * * Traces the parameters and calls the right asic functions * to setup the page table using the DMA. */ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, + struct amdgpu_gart *gtt, + uint32_t gtt_flags, struct amdgpu_ib *ib, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, - uint32_t flags, uint32_t gtt_flags) + uint32_t flags) { trace_amdgpu_vm_set_page(pe, addr, count, incr, flags); - if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { - uint64_t src = adev->gart.table_addr + (addr >> 12) * 8; + if ((gtt == &adev->gart) && (flags == gtt_flags)) { + uint64_t src = gtt->table_addr + (addr >> 12) * 8; amdgpu_vm_copy_pte(adev, ib, pe, src, count); - } else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) { - amdgpu_vm_write_pte(adev, ib, pe, addr, - count, incr, flags); + } else if (gtt) { + dma_addr_t *pages_addr = gtt->pages_addr; + amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr, + count, incr, flags); + + } else if (count < 3) { + amdgpu_vm_write_pte(adev, ib, NULL, pe, addr, + count, incr, flags); } else { amdgpu_vm_set_pte_pde(adev, ib, pe, addr, @@ -345,15 +313,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, } } -int amdgpu_vm_free_job(struct amdgpu_job *job) -{ - int i; - for (i = 0; i < job->num_ibs; i++) - amdgpu_ib_free(job->adev, &job->ibs[i]); - kfree(job->ibs); - return 0; -} - /** * amdgpu_vm_clear_bo - initially clear the page dir/table * @@ -363,15 +322,18 @@ int amdgpu_vm_free_job(struct amdgpu_job *job) * need to reserve bo first before calling it. */ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, + struct amdgpu_vm *vm, struct amdgpu_bo *bo) { - struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; + struct amdgpu_ring *ring; struct fence *fence = NULL; - struct amdgpu_ib *ib; + struct amdgpu_job *job; unsigned entries; uint64_t addr; int r; + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + r = reservation_object_reserve_shared(bo->tbo.resv); if (r) return r; @@ -383,56 +345,57 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, addr = amdgpu_bo_gpu_offset(bo); entries = amdgpu_bo_size(bo) / 8; - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) + r = amdgpu_job_alloc_with_ib(adev, 64, &job); + if (r) goto error; - r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib); + amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries, + 0, 0); + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + + WARN_ON(job->ibs[0].length_dw > 64); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); if (r) goto error_free; - ib->length_dw = 0; - - amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0); - amdgpu_vm_pad_ib(adev, ib); - WARN_ON(ib->length_dw > 64); - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vm_free_job, - AMDGPU_FENCE_OWNER_VM, - &fence); - if (!r) - amdgpu_bo_fence(bo, fence, true); + amdgpu_bo_fence(bo, fence, true); fence_put(fence); - if (amdgpu_enable_scheduler) - return 0; + return 0; error_free: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); error: return r; } /** - * amdgpu_vm_map_gart - get the physical address of a gart page + * amdgpu_vm_map_gart - Resolve gart mapping of addr * - * @adev: amdgpu_device pointer + * @pages_addr: optional DMA address to use for lookup * @addr: the unmapped addr * * Look up the physical address of the page that the pte resolves - * to (cayman+). - * Returns the physical address of the page. + * to and return the pointer for the page table entry. */ -uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; - /* page table offset */ - result = adev->gart.pages_addr[addr >> PAGE_SHIFT]; + if (pages_addr) { + /* page table offset */ + result = pages_addr[addr >> PAGE_SHIFT]; - /* in case cpu page size != gpu page size*/ - result |= addr & (~PAGE_MASK); + /* in case cpu page size != gpu page size*/ + result |= addr & (~PAGE_MASK); + + } else { + /* No mapping required */ + result = addr; + } + + result &= 0xFFFFFFFFFFFFF000ULL; return result; } @@ -446,45 +409,37 @@ uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr) * @end: end of GPU address range * * Allocates new page tables if necessary - * and updates the page directory (cayman+). + * and updates the page directory. * Returns 0 for success, error for failure. - * - * Global and local mutex must be locked! */ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; + struct amdgpu_ring *ring; struct amdgpu_bo *pd = vm->page_directory; uint64_t pd_addr = amdgpu_bo_gpu_offset(pd); uint32_t incr = AMDGPU_VM_PTE_COUNT * 8; uint64_t last_pde = ~0, last_pt = ~0; unsigned count = 0, pt_idx, ndw; + struct amdgpu_job *job; struct amdgpu_ib *ib; struct fence *fence = NULL; int r; + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); + /* padding, etc. */ ndw = 64; /* assume the worst case */ ndw += vm->max_pde_used * 6; - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - - r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); - if (r) { - kfree(ib); + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + if (r) return r; - } - ib->length_dw = 0; + + ib = &job->ibs[0]; /* walk over the address space and update the page directory */ for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { @@ -504,9 +459,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ((last_pt + incr * count) != pt)) { if (count) { - amdgpu_vm_update_pages(adev, ib, last_pde, - last_pt, count, incr, - AMDGPU_PTE_VALID, 0); + amdgpu_vm_update_pages(adev, NULL, 0, ib, + last_pde, last_pt, + count, incr, + AMDGPU_PTE_VALID); } count = 1; @@ -518,17 +474,16 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, } if (count) - amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count, - incr, AMDGPU_PTE_VALID, 0); + amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt, + count, incr, AMDGPU_PTE_VALID); if (ib->length_dw != 0) { - amdgpu_vm_pad_ib(adev, ib); - amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM); + amdgpu_ring_pad_ib(ring, ib); + amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv, + AMDGPU_FENCE_OWNER_VM); WARN_ON(ib->length_dw > ndw); - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vm_free_job, - AMDGPU_FENCE_OWNER_VM, - &fence); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &fence); if (r) goto error_free; @@ -536,18 +491,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, fence_put(vm->page_directory_fence); vm->page_directory_fence = fence_get(fence); fence_put(fence); - } - if (!amdgpu_enable_scheduler || ib->length_dw == 0) { - amdgpu_ib_free(adev, ib); - kfree(ib); + } else { + amdgpu_job_free(job); } return 0; error_free: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } @@ -555,20 +507,20 @@ error_free: * amdgpu_vm_frag_ptes - add fragment information to PTEs * * @adev: amdgpu_device pointer + * @gtt: GART instance to use for mapping + * @gtt_flags: GTT hw mapping flags * @ib: IB for the update * @pe_start: first PTE to handle * @pe_end: last PTE to handle * @addr: addr those PTEs should point to * @flags: hw mapping flags - * @gtt_flags: GTT hw mapping flags - * - * Global and local mutex must be locked! */ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, + struct amdgpu_gart *gtt, + uint32_t gtt_flags, struct amdgpu_ib *ib, uint64_t pe_start, uint64_t pe_end, - uint64_t addr, uint32_t flags, - uint32_t gtt_flags) + uint64_t addr, uint32_t flags) { /** * The MC L1 TLB supports variable sized pages, based on a fragment @@ -598,36 +550,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, unsigned count; + /* Abort early if there isn't anything to do */ + if (pe_start == pe_end) + return; + /* system pages are non continuously */ - if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) || - (frag_start >= frag_end)) { + if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) { count = (pe_end - pe_start) / 8; - amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); + amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start, + addr, count, AMDGPU_GPU_PAGE_SIZE, + flags); return; } /* handle the 4K area at the beginning */ if (pe_start != frag_start) { count = (frag_start - pe_start) / 8; - amdgpu_vm_update_pages(adev, ib, pe_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); + amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr, + count, AMDGPU_GPU_PAGE_SIZE, flags); addr += AMDGPU_GPU_PAGE_SIZE * count; } /* handle the area in the middle */ count = (frag_end - frag_start) / 8; - amdgpu_vm_update_pages(adev, ib, frag_start, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags | frag_flags, - gtt_flags); + amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count, + AMDGPU_GPU_PAGE_SIZE, flags | frag_flags); /* handle the 4K area at the end */ if (frag_end != pe_end) { addr += AMDGPU_GPU_PAGE_SIZE * count; count = (pe_end - frag_end) / 8; - amdgpu_vm_update_pages(adev, ib, frag_end, addr, count, - AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags); + amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr, + count, AMDGPU_GPU_PAGE_SIZE, flags); } } @@ -635,122 +590,105 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev, * amdgpu_vm_update_ptes - make sure that page tables are valid * * @adev: amdgpu_device pointer + * @gtt: GART instance to use for mapping + * @gtt_flags: GTT hw mapping flags * @vm: requested vm * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to * @flags: mapping flags * - * Update the page tables in the range @start - @end (cayman+). - * - * Global and local mutex must be locked! + * Update the page tables in the range @start - @end. */ -static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_ib *ib, - uint64_t start, uint64_t end, - uint64_t dst, uint32_t flags, - uint32_t gtt_flags) +static void amdgpu_vm_update_ptes(struct amdgpu_device *adev, + struct amdgpu_gart *gtt, + uint32_t gtt_flags, + struct amdgpu_vm *vm, + struct amdgpu_ib *ib, + uint64_t start, uint64_t end, + uint64_t dst, uint32_t flags) { - uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; - uint64_t last_pte = ~0, last_dst = ~0; - void *owner = AMDGPU_FENCE_OWNER_VM; - unsigned count = 0; - uint64_t addr; + const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1; - /* sync to everything on unmapping */ - if (!(flags & AMDGPU_PTE_VALID)) - owner = AMDGPU_FENCE_OWNER_UNDEFINED; + uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0; + uint64_t addr; /* walk over the address space and update the page tables */ for (addr = start; addr < end; ) { uint64_t pt_idx = addr >> amdgpu_vm_block_size; struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj; unsigned nptes; - uint64_t pte; - int r; - - amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner); - r = reservation_object_reserve_shared(pt->tbo.resv); - if (r) - return r; + uint64_t pe_start; if ((addr & ~mask) == (end & ~mask)) nptes = end - addr; else nptes = AMDGPU_VM_PTE_COUNT - (addr & mask); - pte = amdgpu_bo_gpu_offset(pt); - pte += (addr & mask) * 8; + pe_start = amdgpu_bo_gpu_offset(pt); + pe_start += (addr & mask) * 8; - if ((last_pte + 8 * count) != pte) { + if (last_pe_end != pe_start) { - if (count) { - amdgpu_vm_frag_ptes(adev, ib, last_pte, - last_pte + 8 * count, - last_dst, flags, - gtt_flags); - } + amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, + last_pe_start, last_pe_end, + last_dst, flags); - count = nptes; - last_pte = pte; + last_pe_start = pe_start; + last_pe_end = pe_start + 8 * nptes; last_dst = dst; } else { - count += nptes; + last_pe_end += 8 * nptes; } addr += nptes; dst += nptes * AMDGPU_GPU_PAGE_SIZE; } - if (count) { - amdgpu_vm_frag_ptes(adev, ib, last_pte, - last_pte + 8 * count, - last_dst, flags, gtt_flags); - } - - return 0; + amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib, + last_pe_start, last_pe_end, + last_dst, flags); } /** * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table * * @adev: amdgpu_device pointer + * @gtt: GART instance to use for mapping + * @gtt_flags: flags as they are used for GTT * @vm: requested vm - * @mapping: mapped range and flags to use for the update + * @start: start of mapped range + * @last: last mapped entry + * @flags: flags for the entries * @addr: addr to set the area to - * @gtt_flags: flags as they are used for GTT * @fence: optional resulting fence * - * Fill in the page table entries for @mapping. + * Fill in the page table entries between @start and @last. * Returns 0 for success, -EINVAL for failure. - * - * Object have to be reserved and mutex must be locked! */ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_gart *gtt, + uint32_t gtt_flags, struct amdgpu_vm *vm, - struct amdgpu_bo_va_mapping *mapping, - uint64_t addr, uint32_t gtt_flags, + uint64_t start, uint64_t last, + uint32_t flags, uint64_t addr, struct fence **fence) { - struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring; + struct amdgpu_ring *ring; + void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; - uint32_t flags = gtt_flags; + struct amdgpu_job *job; struct amdgpu_ib *ib; struct fence *f = NULL; int r; - /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here - * but in case of something, we filter the flags in first place - */ - if (!(mapping->flags & AMDGPU_PTE_READABLE)) - flags &= ~AMDGPU_PTE_READABLE; - if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) - flags &= ~AMDGPU_PTE_WRITEABLE; + ring = container_of(vm->entity.sched, struct amdgpu_ring, sched); - trace_amdgpu_vm_bo_update(mapping); + /* sync to everything on unmapping */ + if (!(flags & AMDGPU_PTE_VALID)) + owner = AMDGPU_FENCE_OWNER_UNDEFINED; - nptes = mapping->it.last - mapping->it.start + 1; + nptes = last - start + 1; /* * reserve space for one command every (1 << BLOCK_SIZE) @@ -761,11 +699,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, /* padding, etc. */ ndw = 64; - if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) { + if ((gtt == &adev->gart) && (flags == gtt_flags)) { /* only copy commands needed */ ndw += ncmds * 7; - } else if (flags & AMDGPU_PTE_SYSTEM) { + } else if (gtt) { /* header for write data commands */ ndw += ncmds * 4; @@ -780,38 +718,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, ndw += 2 * 10; } - /* update too big for an IB */ - if (ndw > 0xfffff) - return -ENOMEM; - - ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!ib) - return -ENOMEM; - - r = amdgpu_ib_get(ring, NULL, ndw * 4, ib); - if (r) { - kfree(ib); + r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + if (r) return r; - } - ib->length_dw = 0; + ib = &job->ibs[0]; - r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start, - mapping->it.last + 1, addr + mapping->offset, - flags, gtt_flags); + r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv, + owner); + if (r) + goto error_free; - if (r) { - amdgpu_ib_free(adev, ib); - kfree(ib); - return r; - } + r = reservation_object_reserve_shared(vm->page_directory->tbo.resv); + if (r) + goto error_free; + + amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1, + addr, flags); - amdgpu_vm_pad_ib(adev, ib); + amdgpu_ring_pad_ib(ring, ib); WARN_ON(ib->length_dw > ndw); - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, - &amdgpu_vm_free_job, - AMDGPU_FENCE_OWNER_VM, - &f); + r = amdgpu_job_submit(job, ring, &vm->entity, + AMDGPU_FENCE_OWNER_VM, &f); if (r) goto error_free; @@ -821,19 +749,76 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, *fence = fence_get(f); } fence_put(f); - if (!amdgpu_enable_scheduler) { - amdgpu_ib_free(adev, ib); - kfree(ib); - } return 0; error_free: - amdgpu_ib_free(adev, ib); - kfree(ib); + amdgpu_job_free(job); return r; } /** + * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks + * + * @adev: amdgpu_device pointer + * @gtt: GART instance to use for mapping + * @vm: requested vm + * @mapping: mapped range and flags to use for the update + * @addr: addr to set the area to + * @gtt_flags: flags as they are used for GTT + * @fence: optional resulting fence + * + * Split the mapping into smaller chunks so that each update fits + * into a SDMA IB. + * Returns 0 for success, -EINVAL for failure. + */ +static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, + struct amdgpu_gart *gtt, + uint32_t gtt_flags, + struct amdgpu_vm *vm, + struct amdgpu_bo_va_mapping *mapping, + uint64_t addr, struct fence **fence) +{ + const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE; + + uint64_t start = mapping->it.start; + uint32_t flags = gtt_flags; + int r; + + /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here + * but in case of something, we filter the flags in first place + */ + if (!(mapping->flags & AMDGPU_PTE_READABLE)) + flags &= ~AMDGPU_PTE_READABLE; + if (!(mapping->flags & AMDGPU_PTE_WRITEABLE)) + flags &= ~AMDGPU_PTE_WRITEABLE; + + trace_amdgpu_vm_bo_update(mapping); + + addr += mapping->offset; + + if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags))) + return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, + start, mapping->it.last, + flags, addr, fence); + + while (start != mapping->it.last + 1) { + uint64_t last; + + last = min((uint64_t)mapping->it.last, start + max_size); + r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm, + start, last, flags, addr, + fence); + if (r) + return r; + + start = last + 1; + addr += max_size; + } + + return 0; +} + +/** * amdgpu_vm_bo_update - update all BO mappings in the vm page table * * @adev: amdgpu_device pointer @@ -851,14 +836,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, { struct amdgpu_vm *vm = bo_va->vm; struct amdgpu_bo_va_mapping *mapping; + struct amdgpu_gart *gtt = NULL; uint32_t flags; uint64_t addr; int r; if (mem) { addr = (u64)mem->start << PAGE_SHIFT; - if (mem->mem_type != TTM_PL_TT) + switch (mem->mem_type) { + case TTM_PL_TT: + gtt = &bo_va->bo->adev->gart; + break; + + case TTM_PL_VRAM: addr += adev->vm_manager.vram_base_offset; + break; + + default: + break; + } } else { addr = 0; } @@ -871,8 +867,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, spin_unlock(&vm->status_lock); list_for_each_entry(mapping, &bo_va->invalids, list) { - r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr, - flags, &bo_va->last_pt_update); + r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr, + &bo_va->last_pt_update); if (r) return r; } @@ -918,7 +914,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); spin_unlock(&vm->freed_lock); - r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL); + r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping, + 0, NULL); kfree(mapping); if (r) return r; @@ -976,7 +973,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev, * @vm: requested vm * @bo: amdgpu buffer object * - * Add @bo into the requested vm (cayman+). + * Add @bo into the requested vm. * Add @bo to the list of bos associated with the vm * Returns newly added bo_va or NULL for failure * @@ -1117,15 +1114,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, */ pt->parent = amdgpu_bo_ref(vm->page_directory); - r = amdgpu_vm_clear_bo(adev, pt); + r = amdgpu_vm_clear_bo(adev, vm, pt); if (r) { amdgpu_bo_unref(&pt); goto error_free; } entry->robj = pt; - entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM; - entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM; entry->priority = 0; entry->tv.bo = &entry->robj->tbo; entry->tv.shared = true; @@ -1210,7 +1205,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @bo_va: requested bo_va * - * Remove @bo_va->bo from the requested vm (cayman+). + * Remove @bo_va->bo from the requested vm. * * Object have to be reserved! */ @@ -1255,7 +1250,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, * @vm: requested vm * @bo: amdgpu buffer object * - * Mark @bo as invalid (cayman+). + * Mark @bo as invalid. */ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo) @@ -1276,13 +1271,16 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @vm: requested vm * - * Init @vm fields (cayman+). + * Init @vm fields. */ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) { const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, AMDGPU_VM_PTE_COUNT * 8); unsigned pd_size, pd_entries; + unsigned ring_instance; + struct amdgpu_ring *ring; + struct amd_sched_rq *rq; int i, r; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -1306,6 +1304,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) return -ENOMEM; } + /* create scheduler entity for page table updates */ + + ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); + ring_instance %= adev->vm_manager.vm_pte_num_rings; + ring = adev->vm_manager.vm_pte_rings[ring_instance]; + rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL]; + r = amd_sched_entity_init(&ring->sched, &vm->entity, + rq, amdgpu_sched_jobs); + if (r) + return r; + vm->page_directory_fence = NULL; r = amdgpu_bo_create(adev, pd_size, align, true, @@ -1313,22 +1322,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) AMDGPU_GEM_CREATE_NO_CPU_ACCESS, NULL, NULL, &vm->page_directory); if (r) - return r; + goto error_free_sched_entity; + r = amdgpu_bo_reserve(vm->page_directory, false); - if (r) { - amdgpu_bo_unref(&vm->page_directory); - vm->page_directory = NULL; - return r; - } - r = amdgpu_vm_clear_bo(adev, vm->page_directory); + if (r) + goto error_free_page_directory; + + r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory); amdgpu_bo_unreserve(vm->page_directory); - if (r) { - amdgpu_bo_unref(&vm->page_directory); - vm->page_directory = NULL; - return r; - } + if (r) + goto error_free_page_directory; return 0; + +error_free_page_directory: + amdgpu_bo_unref(&vm->page_directory); + vm->page_directory = NULL; + +error_free_sched_entity: + amd_sched_entity_fini(&ring->sched, &vm->entity); + + return r; } /** @@ -1337,7 +1351,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) * @adev: amdgpu_device pointer * @vm: requested vm * - * Tear down @vm (cayman+). + * Tear down @vm. * Unbind the VM and remove all bos from the vm bo list */ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) @@ -1345,6 +1359,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; int i; + amd_sched_entity_fini(vm->entity.sched, &vm->entity); + if (!RB_EMPTY_ROOT(&vm->va)) { dev_err(adev->dev, "still active bo inside vm\n"); } @@ -1375,6 +1391,27 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** + * amdgpu_vm_manager_init - init the VM manager + * + * @adev: amdgpu_device pointer + * + * Initialize the VM manager structures + */ +void amdgpu_vm_manager_init(struct amdgpu_device *adev) +{ + unsigned i; + + INIT_LIST_HEAD(&adev->vm_manager.ids_lru); + + /* skip over VMID 0, since it is the system VM */ + for (i = 1; i < adev->vm_manager.num_ids; ++i) + list_add_tail(&adev->vm_manager.ids[i].list, + &adev->vm_manager.ids_lru); + + atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); +} + +/** * amdgpu_vm_manager_fini - cleanup VM manager * * @adev: amdgpu_device pointer diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fd9c958..6b1f053 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1059,257 +1059,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; } -static void cik_print_gpu_status_regs(struct amdgpu_device *adev) -{ - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); - dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); -} - -/** - * cik_gpu_check_soft_reset - check which blocks are busy - * - * @adev: amdgpu_device pointer - * - * Check which blocks are busy and return the relevant reset - * mask to be used by cik_gpu_soft_reset(). - * Returns a mask of the blocks to be reset. - */ -u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev) -{ - u32 reset_mask = 0; - u32 tmp; - - /* GRBM_STATUS */ - tmp = RREG32(mmGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | - GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | - GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | - GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) - reset_mask |= AMDGPU_RESET_GFX; - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* GRBM_STATUS2 */ - tmp = RREG32(mmGRBM_STATUS2); - if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_RLC; - - /* SDMA0_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA; - - /* SDMA1_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS2 */ - tmp = RREG32(mmSRBM_STATUS2); - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA; - - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS */ - tmp = RREG32(mmSRBM_STATUS); - - if (tmp & SRBM_STATUS__IH_BUSY_MASK) - reset_mask |= AMDGPU_RESET_IH; - - if (tmp & SRBM_STATUS__SEM_BUSY_MASK) - reset_mask |= AMDGPU_RESET_SEM; - - if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) - reset_mask |= AMDGPU_RESET_GRBM; - - if (tmp & SRBM_STATUS__VMC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VMC; - - if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_MC; - - if (amdgpu_display_is_display_hung(adev)) - reset_mask |= AMDGPU_RESET_DISPLAY; - - /* Skip MC reset as it's mostly likely not hung, just busy */ - if (reset_mask & AMDGPU_RESET_MC) { - DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); - reset_mask &= ~AMDGPU_RESET_MC; - } - - return reset_mask; -} - -/** - * cik_gpu_soft_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * @reset_mask: mask of which blocks to reset - * - * Soft reset the blocks specified in @reset_mask. - */ -static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) -{ - struct amdgpu_mode_mc_save save; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; - u32 tmp; - - if (reset_mask == 0) - return; - - dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); - - cik_print_gpu_status_regs(adev); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - - /* disable CG/PG */ - - /* stop the rlc */ - gfx_v7_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - if (reset_mask & AMDGPU_RESET_DMA) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - } - if (reset_mask & AMDGPU_RESET_DMA1) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - } - - gmc_v7_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } - - if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) - grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK | - GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK; - - if (reset_mask & AMDGPU_RESET_CP) { - grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK; - - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; - } - - if (reset_mask & AMDGPU_RESET_DMA) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK; - - if (reset_mask & AMDGPU_RESET_DMA1) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK; - - if (reset_mask & AMDGPU_RESET_DISPLAY) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; - - if (reset_mask & AMDGPU_RESET_RLC) - grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK; - - if (reset_mask & AMDGPU_RESET_SEM) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK; - - if (reset_mask & AMDGPU_RESET_IH) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK; - - if (reset_mask & AMDGPU_RESET_GRBM) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK; - - if (reset_mask & AMDGPU_RESET_VMC) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK; - - if (!(adev->flags & AMD_IS_APU)) { - if (reset_mask & AMDGPU_RESET_MC) - srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK; - } - - if (grbm_soft_reset) { - tmp = RREG32(mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - } - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - } - - /* Wait a little for things to settle down */ - udelay(50); - - gmc_v7_0_mc_resume(adev, &save); - udelay(50); - - cik_print_gpu_status_regs(adev); -} - struct kv_reset_save_regs { u32 gmcon_reng_execute; u32 gmcon_misc; @@ -1405,45 +1154,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev, static void cik_gpu_pci_config_reset(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; struct kv_reset_save_regs kv_save = { 0 }; - u32 tmp, i; + u32 i; dev_info(adev->dev, "GPU pci config reset\n"); - /* disable dpm? */ - - /* disable cg/pg */ - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | - CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, - CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp |= SDMA0_F32_CNTL__HALT_MASK; - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - /* XXX other engines? */ - - /* halt the rlc, disable cp internal ints */ - gfx_v7_0_rlc_stop(adev); - - udelay(50); - - /* disable mem access */ - gmc_v7_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timed out !\n"); - } - if (adev->flags & AMD_IS_APU) kv_save_regs_for_reset(adev, &kv_save); @@ -1489,26 +1204,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu */ static int cik_asic_reset(struct amdgpu_device *adev) { - u32 reset_mask; - - reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); - - if (reset_mask) - cik_set_bios_scratch_engine_hung(adev, true); - - /* try soft reset */ - cik_gpu_soft_reset(adev, reset_mask); - - reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); - - /* try pci config reset */ - if (reset_mask && amdgpu_hard_reset) - cik_gpu_pci_config_reset(adev); + cik_set_bios_scratch_engine_hung(adev, true); - reset_mask = amdgpu_cik_gpu_check_soft_reset(adev); + cik_gpu_pci_config_reset(adev); - if (!reset_mask) - cik_set_bios_scratch_engine_hung(adev, false); + cik_set_bios_scratch_engine_hung(adev, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 5f712ce..675f349 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -295,30 +295,6 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring - * - * @ring: amdgpu_ring structure holding ring information - * @semaphore: amdgpu semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (CIK). - */ -static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S; - - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits)); - amdgpu_ring_write(ring, addr & 0xfffffff8); - amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff); - - return true; -} - -/** * cik_sdma_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer @@ -417,6 +393,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) cik_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], + adev->gfx.config.gb_addr_config & 0x70); + WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0); WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); @@ -584,7 +563,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ring_lock(ring, 5); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); @@ -595,7 +574,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); amdgpu_ring_write(ring, 1); /* number of DWs to follow */ amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -645,7 +624,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err0; @@ -657,9 +636,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err1; @@ -738,7 +716,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib, * Update PTEs by writing them manually using sDMA (CIK). */ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, - uint64_t pe, + const dma_addr_t *pages_addr, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { @@ -757,14 +735,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & AMDGPU_PTE_SYSTEM) { - value = amdgpu_vm_map_gart(ib->ring->adev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & AMDGPU_PTE_VALID) { - value = addr; - } else { - value = 0; - } + value = amdgpu_vm_map_gart(pages_addr, addr); addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; @@ -827,9 +798,9 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, * @ib: indirect buffer to fill with padding * */ -static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) +static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); u32 pad_count; int i; @@ -1097,6 +1068,8 @@ static void cik_sdma_print_status(void *handle) i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); + dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", + i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); mutex_lock(&adev->srbm_mutex); for (j = 0; j < 16; j++) { cik_srbm_select(adev, 0, 0, 0, j); @@ -1297,12 +1270,12 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .parse_cs = NULL, .emit_ib = cik_sdma_ring_emit_ib, .emit_fence = cik_sdma_ring_emit_fence, - .emit_semaphore = cik_sdma_ring_emit_semaphore, .emit_vm_flush = cik_sdma_ring_emit_vm_flush, .emit_hdp_flush = cik_sdma_ring_emit_hdp_flush, .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .insert_nop = cik_sdma_ring_insert_nop, + .pad_ib = cik_sdma_ring_pad_ib, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) @@ -1399,14 +1372,18 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { .copy_pte = cik_sdma_vm_copy_pte, .write_pte = cik_sdma_vm_write_pte, .set_pte_pde = cik_sdma_vm_set_pte_pde, - .pad_ib = cik_sdma_vm_pad_ib, }; static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) { + unsigned i; + if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; - adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; - adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->vm_manager.vm_pte_rings[i] = + &adev->sdma.instance[i].ring; + + adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; } } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 3483018..e3ff809 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2670,7 +2670,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); drm_crtc_cleanup(crtc); - destroy_workqueue(amdgpu_crtc->pflip_queue); kfree(amdgpu_crtc); } @@ -2890,7 +2889,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); amdgpu_crtc->crtc_id = index; - amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); adev->mode_info.crtcs[index] = amdgpu_crtc; amdgpu_crtc->max_cursor_width = 128; @@ -3366,7 +3364,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->ddev->event_lock, flags); drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); - queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); + schedule_work(&works->unpin_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 36deea1..6b6c9b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2661,7 +2661,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); drm_crtc_cleanup(crtc); - destroy_workqueue(amdgpu_crtc->pflip_queue); kfree(amdgpu_crtc); } @@ -2881,7 +2880,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); amdgpu_crtc->crtc_id = index; - amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); adev->mode_info.crtcs[index] = amdgpu_crtc; amdgpu_crtc->max_cursor_width = 128; @@ -3361,7 +3359,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->ddev->event_lock, flags); drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); - queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); + schedule_work(&works->unpin_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 25dd8b6..56bea36 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -2582,7 +2582,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc) struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); drm_crtc_cleanup(crtc); - destroy_workqueue(amdgpu_crtc->pflip_queue); kfree(amdgpu_crtc); } @@ -2809,7 +2808,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256); amdgpu_crtc->crtc_id = index; - amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue"); adev->mode_info.crtcs[index] = amdgpu_crtc; amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH; @@ -3375,7 +3373,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev, spin_unlock_irqrestore(&adev->ddev->event_lock, flags); drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id); - queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work); + schedule_work(&works->unpin_work); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c index e35340a..b336c91 100644 --- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c @@ -272,6 +272,12 @@ static int fiji_smu_upload_firmware_image(struct amdgpu_device *adev) if (!adev->pm.fw) return -EINVAL; + /* Skip SMC ucode loading on SR-IOV capable boards. + * vbios does this for us in asic_init in that case. + */ + if (adev->virtualization.supports_sr_iov) + return 0; + hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; amdgpu_ucode_print_smc_hdr(&hdr->header); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 6c76139..250bcbc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -31,8 +31,6 @@ #include "amdgpu_ucode.h" #include "clearstate_ci.h" -#include "uvd/uvd_4_2_d.h" - #include "dce/dce_8_0_d.h" #include "dce/dce_8_0_sh_mask.h" @@ -1006,9 +1004,15 @@ out: */ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) { - const u32 num_tile_mode_states = 32; - const u32 num_secondary_tile_mode_states = 16; - u32 reg_offset, gb_tile_moden, split_equal_to_row_size; + const u32 num_tile_mode_states = + ARRAY_SIZE(adev->gfx.config.tile_mode_array); + const u32 num_secondary_tile_mode_states = + ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); + u32 reg_offset, split_equal_to_row_size; + uint32_t *tile, *macrotile; + + tile = adev->gfx.config.tile_mode_array; + macrotile = adev->gfx.config.macrotile_mode_array; switch (adev->gfx.config.mem_row_size_in_kb) { case 1: @@ -1023,832 +1027,531 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev) break; } + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + tile[reg_offset] = 0; + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + macrotile[reg_offset] = 0; + switch (adev->asic_type) { case CHIP_BONAIRE: - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 1: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 2: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 3: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 4: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 5: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 6: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 7: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | - PIPE_CONFIG(ADDR_SURF_P4_16x16)); - break; - case 9: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); - break; - case 10: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 11: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 12: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 13: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); - break; - case 14: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 15: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 16: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 17: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 19: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); - break; - case 20: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 21: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 22: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 23: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 24: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 25: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 26: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 27: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); - break; - case 28: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 29: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 30: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 4: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 5: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 6: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 8: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 9: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 10: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 14: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); - } + tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[7] = (TILE_SPLIT(split_equal_to_row_size)); + tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P4_16x16)); + tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); + tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[12] = (TILE_SPLIT(split_equal_to_row_size)); + tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); + tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[17] = (TILE_SPLIT(split_equal_to_row_size)); + tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); + tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[23] = (TILE_SPLIT(split_equal_to_row_size)); + tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); + tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[30] = (TILE_SPLIT(split_equal_to_row_size)); + + macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); break; case CHIP_HAWAII: - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 1: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 2: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 3: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 4: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 5: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 6: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 7: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); - break; - case 9: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); - break; - case 10: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 11: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 12: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 13: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); - break; - case 14: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 15: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 16: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 17: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 18: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 19: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); - break; - case 20: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 21: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 22: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 23: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 24: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 25: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 26: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 27: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); - break; - case 28: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 29: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 30: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P4_16x16) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 1: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 2: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 3: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 4: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 5: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 6: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - case 8: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 9: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 10: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 12: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 14: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | - NUM_BANKS(ADDR_SURF_4_BANK)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); - } + tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16)); + tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); + tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); + tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); + tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); + tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P4_16x16) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + + macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) | + NUM_BANKS(ADDR_SURF_4_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); break; case CHIP_KABINI: case CHIP_KAVERI: case CHIP_MULLINS: default: - for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 1: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 2: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 3: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 4: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 5: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); - break; - case 6: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | - TILE_SPLIT(split_equal_to_row_size)); - break; - case 7: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - - case 8: - gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | - PIPE_CONFIG(ADDR_SURF_P2)); - break; - case 9: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); - break; - case 10: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 11: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 12: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 13: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); - break; - case 14: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 15: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 16: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 17: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 18: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 19: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); - break; - case 20: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 21: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 22: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 23: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - case 24: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 25: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 26: - gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); - break; - case 27: - gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); - break; - case 28: - gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); - break; - case 29: - gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | - PIPE_CONFIG(ADDR_SURF_P2) | - MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | - SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); - break; - case 30: - gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden); - } - for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) { - switch (reg_offset) { - case 0: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 1: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 2: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 3: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 4: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 5: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 6: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - case 8: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 9: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 10: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 11: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 12: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 13: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | - NUM_BANKS(ADDR_SURF_16_BANK)); - break; - case 14: - gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | - BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | - MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | - NUM_BANKS(ADDR_SURF_8_BANK)); - break; - default: - gb_tile_moden = 0; - break; - } - adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden; - WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden); - } + tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING)); + tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) | + TILE_SPLIT(split_equal_to_row_size)); + tile[7] = (TILE_SPLIT(split_equal_to_row_size)); + tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) | + PIPE_CONFIG(ADDR_SURF_P2)); + tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING)); + tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[12] = (TILE_SPLIT(split_equal_to_row_size)); + tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING)); + tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[17] = (TILE_SPLIT(split_equal_to_row_size)); + tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING)); + tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[23] = (TILE_SPLIT(split_equal_to_row_size)); + tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1)); + tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING)); + tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2)); + tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) | + PIPE_CONFIG(ADDR_SURF_P2) | + MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) | + SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8)); + tile[30] = (TILE_SPLIT(split_equal_to_row_size)); + + macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) | + NUM_BANKS(ADDR_SURF_16_BANK)); + macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) | + BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) | + MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) | + NUM_BANKS(ADDR_SURF_8_BANK)); + + for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) + WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]); + for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) + if (reg_offset != 7) + WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]); break; } } @@ -1893,45 +1596,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) */ static u32 gfx_v7_0_create_bitmask(u32 bit_width) { - u32 i, mask = 0; - - for (i = 0; i < bit_width; i++) { - mask <<= 1; - mask |= 1; - } - return mask; + return (u32)((1ULL << bit_width) - 1); } /** - * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs + * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs * * @adev: amdgpu_device pointer - * @max_rb_num: max RBs (render backends) for the asic - * @se_num: number of SEs (shader engines) for the asic - * @sh_per_se: number of SH blocks per SE for the asic * - * Calculates the bitmask of disabled RBs (CIK). - * Returns the disabled RB bitmask. + * Calculates the bitmask of enabled RBs (CIK). + * Returns the enabled RB bitmask. */ -static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, - u32 max_rb_num_per_se, - u32 sh_per_se) +static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; data = RREG32(mmCC_RB_BACKEND_DISABLE); - if (data & 1) - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - else - data = 0; - data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se); + mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); - return data & mask; + return (~data) & mask; } /** @@ -1940,73 +1629,36 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev, * @adev: amdgpu_device pointer * @se_num: number of SEs (shader engines) for the asic * @sh_per_se: number of SH blocks per SE for the asic - * @max_rb_num: max RBs (render backends) for the asic * * Configures per-SE/SH RB registers (CIK). */ -static void gfx_v7_0_setup_rb(struct amdgpu_device *adev, - u32 se_num, u32 sh_per_se, - u32 max_rb_num_per_se) +static void gfx_v7_0_setup_rb(struct amdgpu_device *adev) { int i, j; - u32 data, mask; - u32 disabled_rbs = 0; - u32 enabled_rbs = 0; + u32 data, tmp, num_rbs = 0; + u32 active_rbs = 0; mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < se_num; i++) { - for (j = 0; j < sh_per_se; j++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v7_0_select_se_sh(adev, i, j); - data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se); + data = gfx_v7_0_get_rb_active_bitmap(adev); if (adev->asic_type == CHIP_HAWAII) - disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + HAWAII_RB_BITMAP_WIDTH_PER_SH); else - disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + CIK_RB_BITMAP_WIDTH_PER_SH); } } gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); - mask = 1; - for (i = 0; i < max_rb_num_per_se * se_num; i++) { - if (!(disabled_rbs & mask)) - enabled_rbs |= mask; - mask <<= 1; - } - - adev->gfx.config.backend_enable_mask = enabled_rbs; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < se_num; i++) { - gfx_v7_0_select_se_sh(adev, i, 0xffffffff); - data = 0; - for (j = 0; j < sh_per_se; j++) { - switch (enabled_rbs & 3) { - case 0: - if (j == 0) - data |= (RASTER_CONFIG_RB_MAP_3 << - PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); - else - data |= (RASTER_CONFIG_RB_MAP_0 << - PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); - break; - case 1: - data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2); - break; - case 2: - data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2); - break; - case 3: - default: - data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2); - break; - } - enabled_rbs >>= 2; - } - WREG32(mmPA_SC_RASTER_CONFIG, data); - } - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + adev->gfx.config.backend_enable_mask = active_rbs; + tmp = active_rbs; + while (tmp >>= 1) + num_rbs++; + adev->gfx.config.num_rbs = num_rbs; } /** @@ -2059,192 +1711,23 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev) */ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) { - u32 gb_addr_config; - u32 mc_shared_chmap, mc_arb_ramcfg; - u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; - u32 sh_mem_cfg; - u32 tmp; + u32 tmp, sh_mem_cfg; int i; - switch (adev->asic_type) { - case CHIP_BONAIRE: - adev->gfx.config.max_shader_engines = 2; - adev->gfx.config.max_tile_pipes = 4; - adev->gfx.config.max_cu_per_sh = 7; - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_backends_per_se = 2; - adev->gfx.config.max_texture_channel_caches = 4; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 32; - adev->gfx.config.max_hw_contexts = 8; - - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; - break; - case CHIP_HAWAII: - adev->gfx.config.max_shader_engines = 4; - adev->gfx.config.max_tile_pipes = 16; - adev->gfx.config.max_cu_per_sh = 11; - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_backends_per_se = 4; - adev->gfx.config.max_texture_channel_caches = 16; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 32; - adev->gfx.config.max_hw_contexts = 8; - - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; - break; - case CHIP_KAVERI: - adev->gfx.config.max_shader_engines = 1; - adev->gfx.config.max_tile_pipes = 4; - if ((adev->pdev->device == 0x1304) || - (adev->pdev->device == 0x1305) || - (adev->pdev->device == 0x130C) || - (adev->pdev->device == 0x130F) || - (adev->pdev->device == 0x1310) || - (adev->pdev->device == 0x1311) || - (adev->pdev->device == 0x131C)) { - adev->gfx.config.max_cu_per_sh = 8; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1309) || - (adev->pdev->device == 0x130A) || - (adev->pdev->device == 0x130D) || - (adev->pdev->device == 0x1313) || - (adev->pdev->device == 0x131D)) { - adev->gfx.config.max_cu_per_sh = 6; - adev->gfx.config.max_backends_per_se = 2; - } else if ((adev->pdev->device == 0x1306) || - (adev->pdev->device == 0x1307) || - (adev->pdev->device == 0x130B) || - (adev->pdev->device == 0x130E) || - (adev->pdev->device == 0x1315) || - (adev->pdev->device == 0x131B)) { - adev->gfx.config.max_cu_per_sh = 4; - adev->gfx.config.max_backends_per_se = 1; - } else { - adev->gfx.config.max_cu_per_sh = 3; - adev->gfx.config.max_backends_per_se = 1; - } - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_texture_channel_caches = 4; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 16; - adev->gfx.config.max_hw_contexts = 8; - - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; - break; - case CHIP_KABINI: - case CHIP_MULLINS: - default: - adev->gfx.config.max_shader_engines = 1; - adev->gfx.config.max_tile_pipes = 2; - adev->gfx.config.max_cu_per_sh = 2; - adev->gfx.config.max_sh_per_se = 1; - adev->gfx.config.max_backends_per_se = 1; - adev->gfx.config.max_texture_channel_caches = 2; - adev->gfx.config.max_gprs = 256; - adev->gfx.config.max_gs_threads = 16; - adev->gfx.config.max_hw_contexts = 8; - - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; - gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; - break; - } - WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT)); - mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); - adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); - mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; - - adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; - adev->gfx.config.mem_max_burst_length_bytes = 256; - if (adev->flags & AMD_IS_APU) { - /* Get memory bank mapping mode. */ - tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); - dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); - dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); - - tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); - dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); - dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); - - /* Validate settings in case only one DIMM installed. */ - if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) - dimm00_addr_map = 0; - if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) - dimm01_addr_map = 0; - if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) - dimm10_addr_map = 0; - if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) - dimm11_addr_map = 0; - - /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ - /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ - if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) - adev->gfx.config.mem_row_size_in_kb = 2; - else - adev->gfx.config.mem_row_size_in_kb = 1; - } else { - tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT; - adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; - if (adev->gfx.config.mem_row_size_in_kb > 4) - adev->gfx.config.mem_row_size_in_kb = 4; - } - /* XXX use MC settings? */ - adev->gfx.config.shader_engine_tile_size = 32; - adev->gfx.config.num_gpus = 1; - adev->gfx.config.multi_gpu_tile_size = 64; - - /* fix up row size */ - gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; - switch (adev->gfx.config.mem_row_size_in_kb) { - case 1: - default: - gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); - break; - case 2: - gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); - break; - case 4: - gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); - break; - } - adev->gfx.config.gb_addr_config = gb_addr_config; - - WREG32(mmGB_ADDR_CONFIG, gb_addr_config); - WREG32(mmHDP_ADDR_CONFIG, gb_addr_config); - WREG32(mmDMIF_ADDR_CALC, gb_addr_config); - WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70); - WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70); - WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config); - WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config); - WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config); + WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); gfx_v7_0_tiling_mode_table_init(adev); - gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines, - adev->gfx.config.max_sh_per_se, - adev->gfx.config.max_backends_per_se); + gfx_v7_0_setup_rb(adev); /* set HW defaults for 3D engine */ WREG32(mmCP_MEQ_THRESHOLDS, - (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | - (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); + (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) | + (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT)); mutex_lock(&adev->grbm_idx_mutex); /* @@ -2255,7 +1738,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ - sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, + sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, SH_MEM_ALIGNMENT_MODE_UNALIGNED); mutex_lock(&adev->srbm_mutex); @@ -2379,7 +1862,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); - r = amdgpu_ring_lock(ring, 3); + r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_gfx_scratch_free(adev, scratch); @@ -2388,7 +1871,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(scratch); @@ -2516,36 +1999,6 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, amdgpu_ring_write(ring, upper_32_bits(seq)); } -/** - * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring - * - * @ring: amdgpu ring buffer object - * @semaphore: amdgpu semaphore object - * @emit_wait: Is this a sempahore wait? - * - * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP - * from running ahead of semaphore waits. - */ -static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; - - amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1)); - amdgpu_ring_write(ring, addr & 0xffffffff); - amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel); - - if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) { - /* Prevent the PFP from running ahead of the semaphore wait */ - amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); - amdgpu_ring_write(ring, 0x0); - } - - return true; -} - /* * IB stuff */ @@ -2661,7 +2114,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) } WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err1; @@ -2671,9 +2124,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err2; @@ -2842,7 +2294,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v7_0_cp_gfx_enable(adev, true); - r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8); + r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); return r; @@ -2911,7 +2363,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */ amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */ - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); return 0; } @@ -2989,21 +2441,14 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring) { - u32 rptr; - - rptr = ring->adev->wb.wb[ring->rptr_offs]; - - return rptr; + return ring->adev->wb.wb[ring->rptr_offs]; } static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - u32 wptr; - wptr = RREG32(mmCP_RB0_WPTR); - - return wptr; + return RREG32(mmCP_RB0_WPTR); } static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) @@ -3016,21 +2461,13 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring) static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring) { - u32 rptr; - - rptr = ring->adev->wb.wb[ring->rptr_offs]; - - return rptr; + return ring->adev->wb.wb[ring->rptr_offs]; } static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring) { - u32 wptr; - /* XXX check if swapping is necessary on BE */ - wptr = ring->adev->wb.wb[ring->wptr_offs]; - - return wptr; + return ring->adev->wb.wb[ring->wptr_offs]; } static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring) @@ -3126,21 +2563,6 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev) } /** - * gfx_v7_0_cp_compute_start - start the compute queues - * - * @adev: amdgpu_device pointer - * - * Enable the compute queues. - * Returns 0 for success, error for failure. - */ -static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev) -{ - gfx_v7_0_cp_compute_enable(adev, true); - - return 0; -} - -/** * gfx_v7_0_cp_compute_fini - stop the compute queues * * @adev: amdgpu_device pointer @@ -3330,9 +2752,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) u32 *buf; struct bonaire_mqd *mqd; - r = gfx_v7_0_cp_compute_start(adev); - if (r) - return r; + gfx_v7_0_cp_compute_enable(adev, true); /* fix up chicken bits */ tmp = RREG32(mmCP_CPF_DEBUG); @@ -4395,28 +3815,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, } } -static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev, - u32 se, u32 sh) +static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev) { - u32 mask = 0, tmp, tmp1; - int i; - - gfx_v7_0_select_se_sh(adev, se, sh); - tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); - tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + u32 data, mask; - tmp &= 0xffff0000; + data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - tmp |= tmp1; - tmp >>= 16; + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { - mask <<= 1; - mask |= 1; - } + mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); - return (~tmp) & mask; + return (~data) & mask; } static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev) @@ -4754,6 +4166,172 @@ static int gfx_v7_0_late_init(void *handle) return 0; } +static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) +{ + u32 gb_addr_config; + u32 mc_shared_chmap, mc_arb_ramcfg; + u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map; + u32 tmp; + + switch (adev->asic_type) { + case CHIP_BONAIRE: + adev->gfx.config.max_shader_engines = 2; + adev->gfx.config.max_tile_pipes = 4; + adev->gfx.config.max_cu_per_sh = 7; + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_backends_per_se = 2; + adev->gfx.config.max_texture_channel_caches = 4; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_HAWAII: + adev->gfx.config.max_shader_engines = 4; + adev->gfx.config.max_tile_pipes = 16; + adev->gfx.config.max_cu_per_sh = 11; + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_backends_per_se = 4; + adev->gfx.config.max_texture_channel_caches = 16; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 32; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_KAVERI: + adev->gfx.config.max_shader_engines = 1; + adev->gfx.config.max_tile_pipes = 4; + if ((adev->pdev->device == 0x1304) || + (adev->pdev->device == 0x1305) || + (adev->pdev->device == 0x130C) || + (adev->pdev->device == 0x130F) || + (adev->pdev->device == 0x1310) || + (adev->pdev->device == 0x1311) || + (adev->pdev->device == 0x131C)) { + adev->gfx.config.max_cu_per_sh = 8; + adev->gfx.config.max_backends_per_se = 2; + } else if ((adev->pdev->device == 0x1309) || + (adev->pdev->device == 0x130A) || + (adev->pdev->device == 0x130D) || + (adev->pdev->device == 0x1313) || + (adev->pdev->device == 0x131D)) { + adev->gfx.config.max_cu_per_sh = 6; + adev->gfx.config.max_backends_per_se = 2; + } else if ((adev->pdev->device == 0x1306) || + (adev->pdev->device == 0x1307) || + (adev->pdev->device == 0x130B) || + (adev->pdev->device == 0x130E) || + (adev->pdev->device == 0x1315) || + (adev->pdev->device == 0x131B)) { + adev->gfx.config.max_cu_per_sh = 4; + adev->gfx.config.max_backends_per_se = 1; + } else { + adev->gfx.config.max_cu_per_sh = 3; + adev->gfx.config.max_backends_per_se = 1; + } + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_texture_channel_caches = 4; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 16; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; + break; + case CHIP_KABINI: + case CHIP_MULLINS: + default: + adev->gfx.config.max_shader_engines = 1; + adev->gfx.config.max_tile_pipes = 2; + adev->gfx.config.max_cu_per_sh = 2; + adev->gfx.config.max_sh_per_se = 1; + adev->gfx.config.max_backends_per_se = 1; + adev->gfx.config.max_texture_channel_caches = 2; + adev->gfx.config.max_gprs = 256; + adev->gfx.config.max_gs_threads = 16; + adev->gfx.config.max_hw_contexts = 8; + + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130; + gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN; + break; + } + + mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP); + adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); + mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + + adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; + adev->gfx.config.mem_max_burst_length_bytes = 256; + if (adev->flags & AMD_IS_APU) { + /* Get memory bank mapping mode. */ + tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING); + dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP); + dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP); + + tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING); + dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP); + dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP); + + /* Validate settings in case only one DIMM installed. */ + if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12)) + dimm00_addr_map = 0; + if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12)) + dimm01_addr_map = 0; + if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12)) + dimm10_addr_map = 0; + if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12)) + dimm11_addr_map = 0; + + /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */ + /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */ + if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11)) + adev->gfx.config.mem_row_size_in_kb = 2; + else + adev->gfx.config.mem_row_size_in_kb = 1; + } else { + tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT; + adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024; + if (adev->gfx.config.mem_row_size_in_kb > 4) + adev->gfx.config.mem_row_size_in_kb = 4; + } + /* XXX use MC settings? */ + adev->gfx.config.shader_engine_tile_size = 32; + adev->gfx.config.num_gpus = 1; + adev->gfx.config.multi_gpu_tile_size = 64; + + /* fix up row size */ + gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK; + switch (adev->gfx.config.mem_row_size_in_kb) { + case 1: + default: + gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); + break; + case 2: + gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); + break; + case 4: + gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT); + break; + } + adev->gfx.config.gb_addr_config = gb_addr_config; +} + static int gfx_v7_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -4857,6 +4435,10 @@ static int gfx_v7_0_sw_init(void *handle) if (r) return r; + adev->gfx.ce_ram_size = 0x8000; + + gfx_v7_0_gpu_early_init(adev); + return r; } @@ -4897,8 +4479,6 @@ static int gfx_v7_0_hw_init(void *handle) if (r) return r; - adev->gfx.ce_ram_size = 0x8000; - return r; } @@ -5015,16 +4595,6 @@ static void gfx_v7_0_print_status(void *handle) RREG32(mmHDP_ADDR_CONFIG)); dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", RREG32(mmDMIF_ADDR_CALC)); - dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n", - RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET)); - dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n", - RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", RREG32(mmCP_MEQ_THRESHOLDS)); @@ -5567,13 +5137,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .parse_cs = NULL, .emit_ib = gfx_v7_0_ring_emit_ib_gfx, .emit_fence = gfx_v7_0_ring_emit_fence_gfx, - .emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5583,13 +5153,13 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .parse_cs = NULL, .emit_ib = gfx_v7_0_ring_emit_ib_compute, .emit_fence = gfx_v7_0_ring_emit_fence_compute, - .emit_semaphore = gfx_v7_0_ring_emit_semaphore, .emit_vm_flush = gfx_v7_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v7_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush, .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5659,7 +5229,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev) int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info) + struct amdgpu_cu_info *cu_info) { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; @@ -5673,10 +5243,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j); + gfx_v7_0_select_se_sh(adev, i, j); + bitmap = gfx_v7_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { + for (k = 0; k < 16; k ++) { if (bitmap & mask) { if (counter < 2) ao_bitmap |= mask; @@ -5688,9 +5259,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev, ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); } } + gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; - mutex_unlock(&adev->grbm_idx_mutex); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8f8ec37..10c8650 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -43,9 +43,6 @@ #include "gca/gfx_8_0_sh_mask.h" #include "gca/gfx_8_0_enum.h" -#include "uvd/uvd_5_0_d.h" -#include "uvd/uvd_5_0_sh_mask.h" - #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" @@ -652,7 +649,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); - r = amdgpu_ring_lock(ring, 3); + r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); @@ -662,7 +659,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(scratch); @@ -699,7 +696,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) } WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err1; @@ -709,9 +706,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[2] = 0xDEADBEEF; ib.length_dw = 3; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err2; @@ -1171,7 +1167,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* allocate an indirect buffer to put the commands in */ memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, total_size, &ib); + r = amdgpu_ib_get(adev, NULL, total_size, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); return r; @@ -1266,9 +1262,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4); /* shedule the ib on the ring */ - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) { DRM_ERROR("amdgpu: ib submit failed (%d).\n", r); goto fail; @@ -2574,11 +2569,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev) } } -static u32 gfx_v8_0_create_bitmask(u32 bit_width) -{ - return (u32)((1ULL << bit_width) - 1); -} - void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) { u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1); @@ -2599,89 +2589,50 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num) WREG32(mmGRBM_GFX_INDEX, data); } -static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev, - u32 max_rb_num_per_se, - u32 sh_per_se) +static u32 gfx_v8_0_create_bitmask(u32 bit_width) +{ + return (u32)((1ULL << bit_width) - 1); +} + +static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev) { u32 data, mask; data = RREG32(mmCC_RB_BACKEND_DISABLE); - data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; - data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE); + data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK; data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT; - mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se); + mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); - return data & mask; + return (~data) & mask; } -static void gfx_v8_0_setup_rb(struct amdgpu_device *adev, - u32 se_num, u32 sh_per_se, - u32 max_rb_num_per_se) +static void gfx_v8_0_setup_rb(struct amdgpu_device *adev) { int i, j; - u32 data, mask; - u32 disabled_rbs = 0; - u32 enabled_rbs = 0; + u32 data, tmp, num_rbs = 0; + u32 active_rbs = 0; mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < se_num; i++) { - for (j = 0; j < sh_per_se; j++) { + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { gfx_v8_0_select_se_sh(adev, i, j); - data = gfx_v8_0_get_rb_disabled(adev, - max_rb_num_per_se, sh_per_se); - disabled_rbs |= data << ((i * sh_per_se + j) * - RB_BITMAP_WIDTH_PER_SH); + data = gfx_v8_0_get_rb_active_bitmap(adev); + active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * + RB_BITMAP_WIDTH_PER_SH); } } gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); mutex_unlock(&adev->grbm_idx_mutex); - mask = 1; - for (i = 0; i < max_rb_num_per_se * se_num; i++) { - if (!(disabled_rbs & mask)) - enabled_rbs |= mask; - mask <<= 1; - } - - adev->gfx.config.backend_enable_mask = enabled_rbs; - - mutex_lock(&adev->grbm_idx_mutex); - for (i = 0; i < se_num; i++) { - gfx_v8_0_select_se_sh(adev, i, 0xffffffff); - data = RREG32(mmPA_SC_RASTER_CONFIG); - for (j = 0; j < sh_per_se; j++) { - switch (enabled_rbs & 3) { - case 0: - if (j == 0) - data |= (RASTER_CONFIG_RB_MAP_3 << - PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); - else - data |= (RASTER_CONFIG_RB_MAP_0 << - PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT); - break; - case 1: - data |= (RASTER_CONFIG_RB_MAP_0 << - (i * sh_per_se + j) * 2); - break; - case 2: - data |= (RASTER_CONFIG_RB_MAP_3 << - (i * sh_per_se + j) * 2); - break; - case 3: - default: - data |= (RASTER_CONFIG_RB_MAP_2 << - (i * sh_per_se + j) * 2); - break; - } - enabled_rbs >>= 2; - } - WREG32(mmPA_SC_RASTER_CONFIG, data); - } - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); - mutex_unlock(&adev->grbm_idx_mutex); + adev->gfx.config.backend_enable_mask = active_rbs; + tmp = active_rbs; + while (tmp >>= 1) + num_rbs++; + adev->gfx.config.num_rbs = num_rbs; } /** @@ -2741,19 +2692,10 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config); WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config); - WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, - adev->gfx.config.gb_addr_config & 0x70); - WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, - adev->gfx.config.gb_addr_config & 0x70); - WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); - WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); gfx_v8_0_tiling_mode_table_init(adev); - gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines, - adev->gfx.config.max_sh_per_se, - adev->gfx.config.max_backends_per_se); + gfx_v8_0_setup_rb(adev); /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ @@ -3062,7 +3004,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) gfx_v8_0_cp_gfx_enable(adev, true); - r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4); + r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r); return r; @@ -3126,7 +3068,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev) amdgpu_ring_write(ring, 0x8000); amdgpu_ring_write(ring, 0x8000); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); return 0; } @@ -3226,13 +3168,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) udelay(50); } -static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev) -{ - gfx_v8_0_cp_compute_enable(adev, true); - - return 0; -} - static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) { const struct gfx_firmware_header_v1_0 *mec_hdr; @@ -3802,9 +3737,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_PQ_STATUS, tmp); } - r = gfx_v8_0_cp_compute_start(adev); - if (r) - return r; + gfx_v8_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; @@ -4016,16 +3949,6 @@ static void gfx_v8_0_print_status(void *handle) RREG32(mmHDP_ADDR_CONFIG)); dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n", RREG32(mmDMIF_ADDR_CALC)); - dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n", - RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET)); - dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n", - RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET)); - dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); - dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", - RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n", RREG32(mmCP_MEQ_THRESHOLDS)); @@ -4762,49 +4685,11 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr, } -/** - * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring - * - * @ring: amdgpu ring buffer object - * @semaphore: amdgpu semaphore object - * @emit_wait: Is this a sempahore wait? - * - * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP - * from running ahead of semaphore waits. - */ -static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL; - - if (ring->adev->asic_type == CHIP_TOPAZ || - ring->adev->asic_type == CHIP_TONGA || - ring->adev->asic_type == CHIP_FIJI) - /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */ - return false; - else { - amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2)); - amdgpu_ring_write(ring, lower_32_bits(addr)); - amdgpu_ring_write(ring, upper_32_bits(addr)); - amdgpu_ring_write(ring, sel); - } - - if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) { - /* Prevent the PFP from running ahead of the semaphore wait */ - amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); - amdgpu_ring_write(ring, 0x0); - } - - return true; -} - static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned vm_id, uint64_t pd_addr) { int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX); - uint32_t seq = ring->fence_drv.sync_seq[ring->idx]; + uint32_t seq = ring->fence_drv.sync_seq; uint64_t addr = ring->fence_drv.gpu_addr; amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5)); @@ -5145,13 +5030,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .parse_cs = NULL, .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, - .emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -5161,13 +5046,13 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .parse_cs = NULL, .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, - .emit_semaphore = gfx_v8_0_ring_emit_semaphore, .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush, .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch, .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush, .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) @@ -5236,32 +5121,24 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev) } } -static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev, - u32 se, u32 sh) +static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev) { - u32 mask = 0, tmp, tmp1; - int i; - - gfx_v8_0_select_se_sh(adev, se, sh); - tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); - tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + u32 data, mask; - tmp &= 0xffff0000; + data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG); + data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG); - tmp |= tmp1; - tmp >>= 16; + data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK; + data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT; - for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) { - mask <<= 1; - mask |= 1; - } + mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se / + adev->gfx.config.max_sh_per_se); - return (~tmp) & mask; + return (~data) & mask; } int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, - struct amdgpu_cu_info *cu_info) + struct amdgpu_cu_info *cu_info) { int i, j, k, counter, active_cu_number = 0; u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0; @@ -5275,10 +5152,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j); + gfx_v8_0_select_se_sh(adev, i, j); + bitmap = gfx_v8_0_get_cu_active_bitmap(adev); cu_info->bitmap[i][j] = bitmap; - for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { + for (k = 0; k < 16; k ++) { if (bitmap & mask) { if (counter < 2) ao_bitmap |= mask; @@ -5290,9 +5168,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev, ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8)); } } + gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; - mutex_unlock(&adev->grbm_idx_mutex); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 8aa2991..68ee66b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -694,7 +694,8 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + amdgpu_vm_manager_init(adev); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) { @@ -926,10 +927,6 @@ static int gmc_v7_0_sw_init(void *handle) int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_gem_init(adev); - if (r) - return r; - r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); if (r) return r; @@ -1010,7 +1007,7 @@ static int gmc_v7_0_sw_fini(void *handle) adev->vm_manager.enabled = false; } gmc_v7_0_gart_fini(adev); - amdgpu_gem_fini(adev); + amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 3efd455..757803a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -252,6 +252,12 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev) if (!adev->mc.fw) return -EINVAL; + /* Skip MC ucode loading on SR-IOV capable boards. + * vbios does this for us in asic_init in that case. + */ + if (adev->virtualization.supports_sr_iov) + return 0; + hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data; amdgpu_ucode_print_mc_hdr(&hdr->header); @@ -774,7 +780,8 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev) * amdgpu graphics/compute will use VMIDs 1-7 * amdkfd will use VMIDs 8-15 */ - adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS; + adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS; + amdgpu_vm_manager_init(adev); /* base offset of vram pages */ if (adev->flags & AMD_IS_APU) { @@ -880,10 +887,6 @@ static int gmc_v8_0_sw_init(void *handle) int dma_bits; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_gem_init(adev); - if (r) - return r; - r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault); if (r) return r; @@ -964,7 +967,7 @@ static int gmc_v8_0_sw_fini(void *handle) adev->vm_manager.enabled = false; } gmc_v8_0_gart_fini(adev); - amdgpu_gem_fini(adev); + amdgpu_gem_force_release(adev); amdgpu_bo_fini(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c index 090486c..52ee081 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c @@ -279,6 +279,12 @@ static int iceland_smu_upload_firmware_image(struct amdgpu_device *adev) if (!adev->pm.fw) return -EINVAL; + /* Skip SMC ucode loading on SR-IOV capable boards. + * vbios does this for us in asic_init in that case. + */ + if (adev->virtualization.supports_sr_iov) + return 0; + hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; amdgpu_ucode_print_smc_hdr(&hdr->header); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2cf5018..29ec986 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -335,31 +335,6 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se } /** - * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring - * - * @ring: amdgpu_ring structure holding ring information - * @semaphore: amdgpu semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (VI). - */ -static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 sig = emit_wait ? 0 : 1; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | - SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); - amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - return true; -} - -/** * sdma_v2_4_gfx_stop - stop the gfx async dma engines * * @adev: amdgpu_device pointer @@ -459,6 +434,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], + adev->gfx.config.gb_addr_config & 0x70); + WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ @@ -636,7 +614,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ring_lock(ring, 5); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); @@ -649,7 +627,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -699,7 +677,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err0; @@ -716,9 +694,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err1; @@ -797,7 +774,7 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib, * Update PTEs by writing them manually using sDMA (CIK). */ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, - uint64_t pe, + const dma_addr_t *pages_addr, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { @@ -816,14 +793,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & AMDGPU_PTE_SYSTEM) { - value = amdgpu_vm_map_gart(ib->ring->adev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & AMDGPU_PTE_VALID) { - value = addr; - } else { - value = 0; - } + value = amdgpu_vm_map_gart(pages_addr, addr); addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; @@ -881,14 +851,14 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, } /** - * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw + * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw * * @ib: indirect buffer to fill with padding * */ -static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) +static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); u32 pad_count; int i; @@ -1111,6 +1081,8 @@ static void sdma_v2_4_print_status(void *handle) i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i])); dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n", i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); + dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", + i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); mutex_lock(&adev->srbm_mutex); for (j = 0; j < 16; j++) { vi_srbm_select(adev, 0, 0, 0, j); @@ -1302,12 +1274,12 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .parse_cs = NULL, .emit_ib = sdma_v2_4_ring_emit_ib, .emit_fence = sdma_v2_4_ring_emit_fence, - .emit_semaphore = sdma_v2_4_ring_emit_semaphore, .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush, .emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush, .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .insert_nop = sdma_v2_4_ring_insert_nop, + .pad_ib = sdma_v2_4_ring_pad_ib, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) @@ -1405,14 +1377,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { .copy_pte = sdma_v2_4_vm_copy_pte, .write_pte = sdma_v2_4_vm_write_pte, .set_pte_pde = sdma_v2_4_vm_set_pte_pde, - .pad_ib = sdma_v2_4_vm_pad_ib, }; static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) { + unsigned i; + if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; - adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; - adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->vm_manager.vm_pte_rings[i] = + &adev->sdma.instance[i].ring; + + adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index ad54c46..6f064d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -444,32 +444,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0)); } - -/** - * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring - * - * @ring: amdgpu_ring structure holding ring information - * @semaphore: amdgpu semaphore object - * @emit_wait: wait or signal semaphore - * - * Add a DMA semaphore packet to the ring wait on or signal - * other rings (VI). - */ -static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - u64 addr = semaphore->gpu_addr; - u32 sig = emit_wait ? 0 : 1; - - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) | - SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig)); - amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8); - amdgpu_ring_write(ring, upper_32_bits(addr)); - - return true; -} - /** * sdma_v3_0_gfx_stop - stop the gfx async dma engines * @@ -596,6 +570,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) vi_srbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); + WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i], + adev->gfx.config.gb_addr_config & 0x70); + WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0); /* Set ring buffer size in dwords */ @@ -788,7 +765,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ring_lock(ring, 5); + r = amdgpu_ring_alloc(ring, 5); if (r) { DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); amdgpu_wb_free(adev, index); @@ -801,7 +778,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -851,7 +828,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); - r = amdgpu_ib_get(ring, NULL, 256, &ib); + r = amdgpu_ib_get(adev, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); goto err0; @@ -868,9 +845,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED, - &f); + r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED, + NULL, &f); if (r) goto err1; @@ -948,7 +924,7 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib, * Update PTEs by writing them manually using sDMA (CIK). */ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, - uint64_t pe, + const dma_addr_t *pages_addr, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, uint32_t flags) { @@ -967,14 +943,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = upper_32_bits(pe); ib->ptr[ib->length_dw++] = ndw; for (; ndw > 0; ndw -= 2, --count, pe += 8) { - if (flags & AMDGPU_PTE_SYSTEM) { - value = amdgpu_vm_map_gart(ib->ring->adev, addr); - value &= 0xFFFFFFFFFFFFF000ULL; - } else if (flags & AMDGPU_PTE_VALID) { - value = addr; - } else { - value = 0; - } + value = amdgpu_vm_map_gart(pages_addr, addr); addr += incr; value |= flags; ib->ptr[ib->length_dw++] = value; @@ -1032,14 +1001,14 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, } /** - * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw + * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw * * @ib: indirect buffer to fill with padding * */ -static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) +static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring); + struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); u32 pad_count; int i; @@ -1275,6 +1244,8 @@ static void sdma_v3_0_print_status(void *handle) i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i])); dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n", i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i])); + dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n", + i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i])); mutex_lock(&adev->srbm_mutex); for (j = 0; j < 16; j++) { vi_srbm_select(adev, 0, 0, 0, j); @@ -1570,12 +1541,12 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .parse_cs = NULL, .emit_ib = sdma_v3_0_ring_emit_ib, .emit_fence = sdma_v3_0_ring_emit_fence, - .emit_semaphore = sdma_v3_0_ring_emit_semaphore, .emit_vm_flush = sdma_v3_0_ring_emit_vm_flush, .emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush, .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .insert_nop = sdma_v3_0_ring_insert_nop, + .pad_ib = sdma_v3_0_ring_pad_ib, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) @@ -1673,14 +1644,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { .copy_pte = sdma_v3_0_vm_copy_pte, .write_pte = sdma_v3_0_vm_write_pte, .set_pte_pde = sdma_v3_0_vm_set_pte_pde, - .pad_ib = sdma_v3_0_vm_pad_ib, }; static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) { + unsigned i; + if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; - adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring; - adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; + for (i = 0; i < adev->sdma.num_instances; i++) + adev->vm_manager.vm_pte_rings[i] = + &adev->sdma.instance[i].ring; + + adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; } } diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c index 361c49a..083893d 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c @@ -272,6 +272,12 @@ static int tonga_smu_upload_firmware_image(struct amdgpu_device *adev) if (!adev->pm.fw) return -EINVAL; + /* Skip SMC ucode loading on SR-IOV capable boards. + * vbios does this for us in asic_init in that case. + */ + if (adev->virtualization.supports_sr_iov) + return 0; + hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data; amdgpu_ucode_print_smc_hdr(&hdr->header); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 5e9f73a..70ed73f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -164,7 +164,7 @@ static int uvd_v4_2_hw_init(void *handle) goto done; } - r = amdgpu_ring_lock(ring, 10); + r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; @@ -189,7 +189,7 @@ static int uvd_v4_2_hw_init(void *handle) amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); done: /* lower clocks again */ @@ -439,33 +439,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * uvd_v4_2_ring_emit_semaphore - emit semaphore command - * - * @ring: amdgpu_ring pointer - * @semaphore: semaphore to emit commands for - * @emit_wait: true if we should emit a wait command - * - * Emit a semaphore command (either wait or signal) to the UVD ring. - */ -static bool uvd_v4_2_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); - amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); - amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); - amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); - - return true; -} - -/** * uvd_v4_2_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -480,7 +453,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) int r; WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); - r = amdgpu_ring_lock(ring, 3); + r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); @@ -488,7 +461,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) @@ -549,7 +522,7 @@ static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring) goto error; } - r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; @@ -603,6 +576,10 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev) addr = (adev->uvd.gpu_addr >> 32) & 0xFF; WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31)); + WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + uvd_v4_2_init_cg(adev); } @@ -804,6 +781,13 @@ static void uvd_v4_2_print_status(void *handle) RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", RREG32(mmUVD_CONTEXT_ID)); + dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); + } static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev, @@ -882,10 +866,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_ib = uvd_v4_2_ring_emit_ib, .emit_fence = uvd_v4_2_ring_emit_fence, - .emit_semaphore = uvd_v4_2_ring_emit_semaphore, .test_ring = uvd_v4_2_ring_test_ring, .test_ib = uvd_v4_2_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 38864f56..578ffb6 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -160,7 +160,7 @@ static int uvd_v5_0_hw_init(void *handle) goto done; } - r = amdgpu_ring_lock(ring, 10); + r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; @@ -185,7 +185,7 @@ static int uvd_v5_0_hw_init(void *handle) amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); done: /* lower clocks again */ @@ -279,6 +279,10 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev) size = AMDGPU_UVD_HEAP_SIZE; WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE2, size); + + WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } /** @@ -483,33 +487,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * uvd_v5_0_ring_emit_semaphore - emit semaphore command - * - * @ring: amdgpu_ring pointer - * @semaphore: semaphore to emit commands for - * @emit_wait: true if we should emit a wait command - * - * Emit a semaphore command (either wait or signal) to the UVD ring. - */ -static bool uvd_v5_0_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); - amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); - amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); - amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); - - return true; -} - -/** * uvd_v5_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -524,7 +501,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) int r; WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); - r = amdgpu_ring_lock(ring, 3); + r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); @@ -532,7 +509,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) @@ -595,7 +572,7 @@ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring) goto error; } - r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; @@ -751,6 +728,12 @@ static void uvd_v5_0_print_status(void *handle) RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", RREG32(mmUVD_CONTEXT_ID)); + dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); } static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev, @@ -821,10 +804,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_ib = uvd_v5_0_ring_emit_ib, .emit_fence = uvd_v5_0_ring_emit_fence, - .emit_semaphore = uvd_v5_0_ring_emit_semaphore, .test_ring = uvd_v5_0_ring_test_ring, .test_ib = uvd_v5_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 3d59139..d4da1f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -157,7 +157,7 @@ static int uvd_v6_0_hw_init(void *handle) goto done; } - r = amdgpu_ring_lock(ring, 10); + r = amdgpu_ring_alloc(ring, 10); if (r) { DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r); goto done; @@ -182,7 +182,7 @@ static int uvd_v6_0_hw_init(void *handle) amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0)); amdgpu_ring_write(ring, 3); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); done: if (!r) @@ -277,6 +277,10 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev) size = AMDGPU_UVD_HEAP_SIZE; WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3); WREG32(mmUVD_VCPU_CACHE_SIZE2, size); + + WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev, @@ -722,33 +726,6 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq } /** - * uvd_v6_0_ring_emit_semaphore - emit semaphore command - * - * @ring: amdgpu_ring pointer - * @semaphore: semaphore to emit commands for - * @emit_wait: true if we should emit a wait command - * - * Emit a semaphore command (either wait or signal) to the UVD ring. - */ -static bool uvd_v6_0_ring_emit_semaphore(struct amdgpu_ring *ring, - struct amdgpu_semaphore *semaphore, - bool emit_wait) -{ - uint64_t addr = semaphore->gpu_addr; - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0)); - amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0)); - amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF); - - amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0)); - amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0)); - - return true; -} - -/** * uvd_v6_0_ring_test_ring - register write test * * @ring: amdgpu_ring pointer @@ -763,7 +740,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); - r = amdgpu_ring_lock(ring, 3); + r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); @@ -771,7 +748,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); - amdgpu_ring_unlock_commit(ring); + amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { tmp = RREG32(mmUVD_CONTEXT_ID); if (tmp == 0xDEADBEEF) @@ -827,7 +804,7 @@ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring) goto error; } - r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence); + r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); if (r) { DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r); goto error; @@ -974,6 +951,12 @@ static void uvd_v6_0_print_status(void *handle) RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL)); dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n", RREG32(mmUVD_CONTEXT_ID)); + dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DB_ADDR_CONFIG)); + dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n", + RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG)); } static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev, @@ -1062,10 +1045,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { .parse_cs = amdgpu_uvd_ring_parse_cs, .emit_ib = uvd_v6_0_ring_emit_ib, .emit_fence = uvd_v6_0_ring_emit_fence, - .emit_semaphore = uvd_v6_0_ring_emit_semaphore, .test_ring = uvd_v6_0_ring_test_ring, .test_ib = uvd_v6_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 52ac7a8..9c804f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -639,10 +639,10 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .parse_cs = amdgpu_vce_ring_parse_cs, .emit_ib = amdgpu_vce_ring_emit_ib, .emit_fence = amdgpu_vce_ring_emit_fence, - .emit_semaphore = amdgpu_vce_ring_emit_semaphore, .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index e99af81..8f8d479 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -759,10 +759,10 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = { .parse_cs = amdgpu_vce_ring_parse_cs, .emit_ib = amdgpu_vce_ring_emit_ib, .emit_fence = amdgpu_vce_ring_emit_fence, - .emit_semaphore = amdgpu_vce_ring_emit_semaphore, .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, + .pad_ib = amdgpu_ring_generic_pad_ib, }; static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 89f5a1f..1250035 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -74,6 +74,9 @@ #include "uvd_v6_0.h" #include "vce_v3_0.h" #include "amdgpu_powerplay.h" +#if defined(CONFIG_DRM_AMD_ACP) +#include "amdgpu_acp.h" +#endif /* * Indirect registers accessor @@ -571,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num, return -EINVAL; } -static void vi_print_gpu_status_regs(struct amdgpu_device *adev) -{ - dev_info(adev->dev, " GRBM_STATUS=0x%08X\n", - RREG32(mmGRBM_STATUS)); - dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n", - RREG32(mmGRBM_STATUS2)); - dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n", - RREG32(mmGRBM_STATUS_SE0)); - dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n", - RREG32(mmGRBM_STATUS_SE1)); - dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n", - RREG32(mmGRBM_STATUS_SE2)); - dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n", - RREG32(mmGRBM_STATUS_SE3)); - dev_info(adev->dev, " SRBM_STATUS=0x%08X\n", - RREG32(mmSRBM_STATUS)); - dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n", - RREG32(mmSRBM_STATUS2)); - dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET)); - if (adev->sdma.num_instances > 1) { - dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n", - RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET)); - } - dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT)); - dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT1)); - dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT2)); - dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n", - RREG32(mmCP_STALLED_STAT3)); - dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n", - RREG32(mmCP_CPF_BUSY_STAT)); - dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPF_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS)); - dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT)); - dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n", - RREG32(mmCP_CPC_STALLED_STAT1)); - dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS)); -} - -/** - * vi_gpu_check_soft_reset - check which blocks are busy - * - * @adev: amdgpu_device pointer - * - * Check which blocks are busy and return the relevant reset - * mask to be used by vi_gpu_soft_reset(). - * Returns a mask of the blocks to be reset. - */ -u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev) -{ - u32 reset_mask = 0; - u32 tmp; - - /* GRBM_STATUS */ - tmp = RREG32(mmGRBM_STATUS); - if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK | - GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK | - GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK | - GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK | - GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK | - GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) - reset_mask |= AMDGPU_RESET_GFX; - - if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* GRBM_STATUS2 */ - tmp = RREG32(mmGRBM_STATUS2); - if (tmp & GRBM_STATUS2__RLC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_RLC; - - if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK | - GRBM_STATUS2__CPC_BUSY_MASK | - GRBM_STATUS2__CPG_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_CP; - - /* SRBM_STATUS2 */ - tmp = RREG32(mmSRBM_STATUS2); - if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA; - - if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_DMA1; - - /* SRBM_STATUS */ - tmp = RREG32(mmSRBM_STATUS); - - if (tmp & SRBM_STATUS__IH_BUSY_MASK) - reset_mask |= AMDGPU_RESET_IH; - - if (tmp & SRBM_STATUS__SEM_BUSY_MASK) - reset_mask |= AMDGPU_RESET_SEM; - - if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK) - reset_mask |= AMDGPU_RESET_GRBM; - - if (adev->asic_type != CHIP_TOPAZ) { - if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK | - SRBM_STATUS__UVD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_UVD; - } - - if (tmp & SRBM_STATUS__VMC_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VMC; - - if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | - SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK)) - reset_mask |= AMDGPU_RESET_MC; - - /* SDMA0_STATUS_REG */ - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA; - - /* SDMA1_STATUS_REG */ - if (adev->sdma.num_instances > 1) { - tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET); - if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) - reset_mask |= AMDGPU_RESET_DMA1; - } -#if 0 - /* VCE_STATUS */ - if (adev->asic_type != CHIP_TOPAZ) { - tmp = RREG32(mmVCE_STATUS); - if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VCE; - if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK) - reset_mask |= AMDGPU_RESET_VCE1; - - } - - if (adev->asic_type != CHIP_TOPAZ) { - if (amdgpu_display_is_display_hung(adev)) - reset_mask |= AMDGPU_RESET_DISPLAY; - } -#endif - - /* Skip MC reset as it's mostly likely not hung, just busy */ - if (reset_mask & AMDGPU_RESET_MC) { - DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask); - reset_mask &= ~AMDGPU_RESET_MC; - } - - return reset_mask; -} - -/** - * vi_gpu_soft_reset - soft reset GPU - * - * @adev: amdgpu_device pointer - * @reset_mask: mask of which blocks to reset - * - * Soft reset the blocks specified in @reset_mask. - */ -static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask) -{ - struct amdgpu_mode_mc_save save; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; - u32 tmp; - - if (reset_mask == 0) - return; - - dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask); - - vi_print_gpu_status_regs(adev); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR)); - dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n", - RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS)); - - /* disable CG/PG */ - - /* stop the rlc */ - //XXX - //gfx_v8_0_rlc_stop(adev); - - /* Disable GFX parsing/prefetching */ - tmp = RREG32(mmCP_ME_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); - WREG32(mmCP_ME_CNTL, tmp); - - /* Disable MEC parsing/prefetching */ - tmp = RREG32(mmCP_MEC_CNTL); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); - WREG32(mmCP_MEC_CNTL, tmp); - - if (reset_mask & AMDGPU_RESET_DMA) { - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - } - if (reset_mask & AMDGPU_RESET_DMA1) { - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - } - - gmc_v8_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timedout !\n"); - } - - if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) { - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1); - } - - if (reset_mask & AMDGPU_RESET_CP) { - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1); - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); - } - - if (reset_mask & AMDGPU_RESET_DMA) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1); - - if (reset_mask & AMDGPU_RESET_DMA1) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1); - - if (reset_mask & AMDGPU_RESET_DISPLAY) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1); - - if (reset_mask & AMDGPU_RESET_RLC) - grbm_soft_reset = - REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); - - if (reset_mask & AMDGPU_RESET_SEM) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1); - - if (reset_mask & AMDGPU_RESET_IH) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1); - - if (reset_mask & AMDGPU_RESET_GRBM) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1); - - if (reset_mask & AMDGPU_RESET_VMC) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1); - - if (reset_mask & AMDGPU_RESET_UVD) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1); - - if (reset_mask & AMDGPU_RESET_VCE) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1); - - if (reset_mask & AMDGPU_RESET_VCE) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1); - - if (!(adev->flags & AMD_IS_APU)) { - if (reset_mask & AMDGPU_RESET_MC) - srbm_soft_reset = - REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1); - } - - if (grbm_soft_reset) { - tmp = RREG32(mmGRBM_SOFT_RESET); - tmp |= grbm_soft_reset; - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~grbm_soft_reset; - WREG32(mmGRBM_SOFT_RESET, tmp); - tmp = RREG32(mmGRBM_SOFT_RESET); - } - - if (srbm_soft_reset) { - tmp = RREG32(mmSRBM_SOFT_RESET); - tmp |= srbm_soft_reset; - dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - - udelay(50); - - tmp &= ~srbm_soft_reset; - WREG32(mmSRBM_SOFT_RESET, tmp); - tmp = RREG32(mmSRBM_SOFT_RESET); - } - - /* Wait a little for things to settle down */ - udelay(50); - - gmc_v8_0_mc_resume(adev, &save); - udelay(50); - - vi_print_gpu_status_regs(adev); -} - static void vi_gpu_pci_config_reset(struct amdgpu_device *adev) { - struct amdgpu_mode_mc_save save; - u32 tmp, i; + u32 i; dev_info(adev->dev, "GPU pci config reset\n"); - /* disable dpm? */ - - /* disable cg/pg */ - - /* Disable GFX parsing/prefetching */ - tmp = RREG32(mmCP_ME_CNTL); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); - WREG32(mmCP_ME_CNTL, tmp); - - /* Disable MEC parsing/prefetching */ - tmp = RREG32(mmCP_MEC_CNTL); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1); - tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1); - WREG32(mmCP_MEC_CNTL, tmp); - - /* Disable GFX parsing/prefetching */ - WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | - CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); - - /* Disable MEC parsing/prefetching */ - WREG32(mmCP_MEC_CNTL, - CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK); - - /* sdma0 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp); - - /* sdma1 */ - tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET); - tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1); - WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp); - - /* XXX other engines? */ - - /* halt the rlc, disable cp internal ints */ - //XXX - //gfx_v8_0_rlc_stop(adev); - - udelay(50); - - /* disable mem access */ - gmc_v8_0_mc_stop(adev, &save); - if (amdgpu_asic_wait_for_mc_idle(adev)) { - dev_warn(adev->dev, "Wait for MC idle timed out !\n"); - } - /* disable BM */ pci_clear_master(adev->pdev); /* reset */ @@ -978,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun */ static int vi_asic_reset(struct amdgpu_device *adev) { - u32 reset_mask; - - reset_mask = vi_gpu_check_soft_reset(adev); - - if (reset_mask) - vi_set_bios_scratch_engine_hung(adev, true); - - /* try soft reset */ - vi_gpu_soft_reset(adev, reset_mask); - - reset_mask = vi_gpu_check_soft_reset(adev); + vi_set_bios_scratch_engine_hung(adev, true); - /* try pci config reset */ - if (reset_mask && amdgpu_hard_reset) - vi_gpu_pci_config_reset(adev); + vi_gpu_pci_config_reset(adev); - reset_mask = vi_gpu_check_soft_reset(adev); - - if (!reset_mask) - vi_set_bios_scratch_engine_hung(adev, false); + vi_set_bios_scratch_engine_hung(adev, false); return 0; } @@ -1347,6 +973,15 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] = .rev = 0, .funcs = &vce_v3_0_ip_funcs, }, +#if defined(CONFIG_DRM_AMD_ACP) + { + .type = AMD_IP_BLOCK_TYPE_ACP, + .major = 2, + .minor = 2, + .rev = 0, + .funcs = &acp_ip_funcs, + }, +#endif }; int vi_set_ip_blocks(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h index 1195d06f..15ff8b2 100644 --- a/drivers/gpu/drm/amd/include/amd_shared.h +++ b/drivers/gpu/drm/amd/include/amd_shared.h @@ -73,6 +73,7 @@ enum amd_ip_block_type { AMD_IP_BLOCK_TYPE_SDMA, AMD_IP_BLOCK_TYPE_UVD, AMD_IP_BLOCK_TYPE_VCE, + AMD_IP_BLOCK_TYPE_ACP, }; enum amd_clockgating_state { diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h index dc52ea0..d3ccf5a 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h @@ -1379,6 +1379,7 @@ #define mmDC_GPIO_PAD_STRENGTH_1 0x1978 #define mmDC_GPIO_PAD_STRENGTH_2 0x1979 #define mmPHY_AUX_CNTL 0x197f +#define mmDC_GPIO_I2CPAD_MASK 0x1974 #define mmDC_GPIO_I2CPAD_A 0x1975 #define mmDC_GPIO_I2CPAD_EN 0x1976 #define mmDC_GPIO_I2CPAD_Y 0x1977 diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h new file mode 100644 index 0000000..6bea30e --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h @@ -0,0 +1,1117 @@ +/* + * DCE_8_0 Register documentation + * + * Copyright (C) 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef DCE_8_0_ENUM_H +#define DCE_8_0_ENUM_H + +typedef enum SurfaceEndian { + ENDIAN_NONE = 0x0, + ENDIAN_8IN16 = 0x1, + ENDIAN_8IN32 = 0x2, + ENDIAN_8IN64 = 0x3, +} SurfaceEndian; +typedef enum ArrayMode { + ARRAY_LINEAR_GENERAL = 0x0, + ARRAY_LINEAR_ALIGNED = 0x1, + ARRAY_1D_TILED_THIN1 = 0x2, + ARRAY_1D_TILED_THICK = 0x3, + ARRAY_2D_TILED_THIN1 = 0x4, + ARRAY_PRT_TILED_THIN1 = 0x5, + ARRAY_PRT_2D_TILED_THIN1 = 0x6, + ARRAY_2D_TILED_THICK = 0x7, + ARRAY_2D_TILED_XTHICK = 0x8, + ARRAY_PRT_TILED_THICK = 0x9, + ARRAY_PRT_2D_TILED_THICK = 0xa, + ARRAY_PRT_3D_TILED_THIN1 = 0xb, + ARRAY_3D_TILED_THIN1 = 0xc, + ARRAY_3D_TILED_THICK = 0xd, + ARRAY_3D_TILED_XTHICK = 0xe, + ARRAY_PRT_3D_TILED_THICK = 0xf, +} ArrayMode; +typedef enum PipeTiling { + CONFIG_1_PIPE = 0x0, + CONFIG_2_PIPE = 0x1, + CONFIG_4_PIPE = 0x2, + CONFIG_8_PIPE = 0x3, +} PipeTiling; +typedef enum BankTiling { + CONFIG_4_BANK = 0x0, + CONFIG_8_BANK = 0x1, +} BankTiling; +typedef enum GroupInterleave { + CONFIG_256B_GROUP = 0x0, + CONFIG_512B_GROUP = 0x1, +} GroupInterleave; +typedef enum RowTiling { + CONFIG_1KB_ROW = 0x0, + CONFIG_2KB_ROW = 0x1, + CONFIG_4KB_ROW = 0x2, + CONFIG_8KB_ROW = 0x3, + CONFIG_1KB_ROW_OPT = 0x4, + CONFIG_2KB_ROW_OPT = 0x5, + CONFIG_4KB_ROW_OPT = 0x6, + CONFIG_8KB_ROW_OPT = 0x7, +} RowTiling; +typedef enum BankSwapBytes { + CONFIG_128B_SWAPS = 0x0, + CONFIG_256B_SWAPS = 0x1, + CONFIG_512B_SWAPS = 0x2, + CONFIG_1KB_SWAPS = 0x3, +} BankSwapBytes; +typedef enum SampleSplitBytes { + CONFIG_1KB_SPLIT = 0x0, + CONFIG_2KB_SPLIT = 0x1, + CONFIG_4KB_SPLIT = 0x2, + CONFIG_8KB_SPLIT = 0x3, +} SampleSplitBytes; +typedef enum NumPipes { + ADDR_CONFIG_1_PIPE = 0x0, + ADDR_CONFIG_2_PIPE = 0x1, + ADDR_CONFIG_4_PIPE = 0x2, + ADDR_CONFIG_8_PIPE = 0x3, +} NumPipes; +typedef enum PipeInterleaveSize { + ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x0, + ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x1, +} PipeInterleaveSize; +typedef enum BankInterleaveSize { + ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x0, + ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x1, + ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x2, + ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x3, +} BankInterleaveSize; +typedef enum NumShaderEngines { + ADDR_CONFIG_1_SHADER_ENGINE = 0x0, + ADDR_CONFIG_2_SHADER_ENGINE = 0x1, +} NumShaderEngines; +typedef enum ShaderEngineTileSize { + ADDR_CONFIG_SE_TILE_16 = 0x0, + ADDR_CONFIG_SE_TILE_32 = 0x1, +} ShaderEngineTileSize; +typedef enum NumGPUs { + ADDR_CONFIG_1_GPU = 0x0, + ADDR_CONFIG_2_GPU = 0x1, + ADDR_CONFIG_4_GPU = 0x2, +} NumGPUs; +typedef enum MultiGPUTileSize { + ADDR_CONFIG_GPU_TILE_16 = 0x0, + ADDR_CONFIG_GPU_TILE_32 = 0x1, + ADDR_CONFIG_GPU_TILE_64 = 0x2, + ADDR_CONFIG_GPU_TILE_128 = 0x3, +} MultiGPUTileSize; +typedef enum RowSize { + ADDR_CONFIG_1KB_ROW = 0x0, + ADDR_CONFIG_2KB_ROW = 0x1, + ADDR_CONFIG_4KB_ROW = 0x2, +} RowSize; +typedef enum NumLowerPipes { + ADDR_CONFIG_1_LOWER_PIPES = 0x0, + ADDR_CONFIG_2_LOWER_PIPES = 0x1, +} NumLowerPipes; +typedef enum DebugBlockId { + DBG_CLIENT_BLKID_RESERVED = 0x0, + DBG_CLIENT_BLKID_dbg = 0x1, + DBG_CLIENT_BLKID_uvdu_0 = 0x2, + DBG_CLIENT_BLKID_uvdu_1 = 0x3, + DBG_CLIENT_BLKID_uvdu_2 = 0x4, + DBG_CLIENT_BLKID_uvdu_3 = 0x5, + DBG_CLIENT_BLKID_uvdu_4 = 0x6, + DBG_CLIENT_BLKID_uvdu_5 = 0x7, + DBG_CLIENT_BLKID_uvdu_6 = 0x8, + DBG_CLIENT_BLKID_uvdm_0 = 0x9, + DBG_CLIENT_BLKID_uvdm_1 = 0xa, + DBG_CLIENT_BLKID_uvdm_2 = 0xb, + DBG_CLIENT_BLKID_uvdm_3 = 0xc, + DBG_CLIENT_BLKID_vcea_0 = 0xd, + DBG_CLIENT_BLKID_vcea_1 = 0xe, + DBG_CLIENT_BLKID_vcea_2 = 0xf, + DBG_CLIENT_BLKID_vcea_3 = 0x10, + DBG_CLIENT_BLKID_vcea_4 = 0x11, + DBG_CLIENT_BLKID_vcea_5 = 0x12, + DBG_CLIENT_BLKID_vcea_6 = 0x13, + DBG_CLIENT_BLKID_vceb_0 = 0x14, + DBG_CLIENT_BLKID_vceb_1 = 0x15, + DBG_CLIENT_BLKID_vceb_2 = 0x16, + DBG_CLIENT_BLKID_dco = 0x17, + DBG_CLIENT_BLKID_xdma = 0x18, + DBG_CLIENT_BLKID_smu_0 = 0x19, + DBG_CLIENT_BLKID_smu_1 = 0x1a, + DBG_CLIENT_BLKID_smu_2 = 0x1b, + DBG_CLIENT_BLKID_gck = 0x1c, + DBG_CLIENT_BLKID_tmonw0 = 0x1d, + DBG_CLIENT_BLKID_tmonw1 = 0x1e, + DBG_CLIENT_BLKID_grbm = 0x1f, + DBG_CLIENT_BLKID_rlc = 0x20, + DBG_CLIENT_BLKID_ds0 = 0x21, + DBG_CLIENT_BLKID_cpg_0 = 0x22, + DBG_CLIENT_BLKID_cpg_1 = 0x23, + DBG_CLIENT_BLKID_cpc_0 = 0x24, + DBG_CLIENT_BLKID_cpc_1 = 0x25, + DBG_CLIENT_BLKID_cpf = 0x26, + DBG_CLIENT_BLKID_scf0 = 0x27, + DBG_CLIENT_BLKID_scf1 = 0x28, + DBG_CLIENT_BLKID_scf2 = 0x29, + DBG_CLIENT_BLKID_scf3 = 0x2a, + DBG_CLIENT_BLKID_pc0 = 0x2b, + DBG_CLIENT_BLKID_pc1 = 0x2c, + DBG_CLIENT_BLKID_pc2 = 0x2d, + DBG_CLIENT_BLKID_pc3 = 0x2e, + DBG_CLIENT_BLKID_vgt0 = 0x2f, + DBG_CLIENT_BLKID_vgt1 = 0x30, + DBG_CLIENT_BLKID_vgt2 = 0x31, + DBG_CLIENT_BLKID_vgt3 = 0x32, + DBG_CLIENT_BLKID_sx00 = 0x33, + DBG_CLIENT_BLKID_sx10 = 0x34, + DBG_CLIENT_BLKID_sx20 = 0x35, + DBG_CLIENT_BLKID_sx30 = 0x36, + DBG_CLIENT_BLKID_cb001 = 0x37, + DBG_CLIENT_BLKID_cb200 = 0x38, + DBG_CLIENT_BLKID_cb201 = 0x39, + DBG_CLIENT_BLKID_cbr0 = 0x3a, + DBG_CLIENT_BLKID_cb000 = 0x3b, + DBG_CLIENT_BLKID_cb101 = 0x3c, + DBG_CLIENT_BLKID_cb300 = 0x3d, + DBG_CLIENT_BLKID_cb301 = 0x3e, + DBG_CLIENT_BLKID_cbr1 = 0x3f, + DBG_CLIENT_BLKID_cb100 = 0x40, + DBG_CLIENT_BLKID_ia0 = 0x41, + DBG_CLIENT_BLKID_ia1 = 0x42, + DBG_CLIENT_BLKID_bci0 = 0x43, + DBG_CLIENT_BLKID_bci1 = 0x44, + DBG_CLIENT_BLKID_bci2 = 0x45, + DBG_CLIENT_BLKID_bci3 = 0x46, + DBG_CLIENT_BLKID_pa0 = 0x47, + DBG_CLIENT_BLKID_pa1 = 0x48, + DBG_CLIENT_BLKID_spim0 = 0x49, + DBG_CLIENT_BLKID_spim1 = 0x4a, + DBG_CLIENT_BLKID_spim2 = 0x4b, + DBG_CLIENT_BLKID_spim3 = 0x4c, + DBG_CLIENT_BLKID_sdma = 0x4d, + DBG_CLIENT_BLKID_ih = 0x4e, + DBG_CLIENT_BLKID_sem = 0x4f, + DBG_CLIENT_BLKID_srbm = 0x50, + DBG_CLIENT_BLKID_hdp = 0x51, + DBG_CLIENT_BLKID_acp_0 = 0x52, + DBG_CLIENT_BLKID_acp_1 = 0x53, + DBG_CLIENT_BLKID_sam = 0x54, + DBG_CLIENT_BLKID_mcc0 = 0x55, + DBG_CLIENT_BLKID_mcc1 = 0x56, + DBG_CLIENT_BLKID_mcc2 = 0x57, + DBG_CLIENT_BLKID_mcc3 = 0x58, + DBG_CLIENT_BLKID_mcd0 = 0x59, + DBG_CLIENT_BLKID_mcd1 = 0x5a, + DBG_CLIENT_BLKID_mcd2 = 0x5b, + DBG_CLIENT_BLKID_mcd3 = 0x5c, + DBG_CLIENT_BLKID_mcb = 0x5d, + DBG_CLIENT_BLKID_vmc = 0x5e, + DBG_CLIENT_BLKID_gmcon = 0x5f, + DBG_CLIENT_BLKID_gdc_0 = 0x60, + DBG_CLIENT_BLKID_gdc_1 = 0x61, + DBG_CLIENT_BLKID_gdc_2 = 0x62, + DBG_CLIENT_BLKID_gdc_3 = 0x63, + DBG_CLIENT_BLKID_gdc_4 = 0x64, + DBG_CLIENT_BLKID_gdc_5 = 0x65, + DBG_CLIENT_BLKID_gdc_6 = 0x66, + DBG_CLIENT_BLKID_gdc_7 = 0x67, + DBG_CLIENT_BLKID_gdc_8 = 0x68, + DBG_CLIENT_BLKID_gdc_9 = 0x69, + DBG_CLIENT_BLKID_gdc_10 = 0x6a, + DBG_CLIENT_BLKID_gdc_11 = 0x6b, + DBG_CLIENT_BLKID_gdc_12 = 0x6c, + DBG_CLIENT_BLKID_gdc_13 = 0x6d, + DBG_CLIENT_BLKID_gdc_14 = 0x6e, + DBG_CLIENT_BLKID_gdc_15 = 0x6f, + DBG_CLIENT_BLKID_gdc_16 = 0x70, + DBG_CLIENT_BLKID_gdc_17 = 0x71, + DBG_CLIENT_BLKID_gdc_18 = 0x72, + DBG_CLIENT_BLKID_gdc_19 = 0x73, + DBG_CLIENT_BLKID_gdc_20 = 0x74, + DBG_CLIENT_BLKID_gdc_21 = 0x75, + DBG_CLIENT_BLKID_gdc_22 = 0x76, + DBG_CLIENT_BLKID_wd = 0x77, + DBG_CLIENT_BLKID_sdma_0 = 0x78, + DBG_CLIENT_BLKID_sdma_1 = 0x79, +} DebugBlockId; +typedef enum DebugBlockId_OLD { + DBG_BLOCK_ID_RESERVED = 0x0, + DBG_BLOCK_ID_DBG = 0x1, + DBG_BLOCK_ID_VMC = 0x2, + DBG_BLOCK_ID_PDMA = 0x3, + DBG_BLOCK_ID_CG = 0x4, + DBG_BLOCK_ID_SRBM = 0x5, + DBG_BLOCK_ID_GRBM = 0x6, + DBG_BLOCK_ID_RLC = 0x7, + DBG_BLOCK_ID_CSC = 0x8, + DBG_BLOCK_ID_SEM = 0x9, + DBG_BLOCK_ID_IH = 0xa, + DBG_BLOCK_ID_SC = 0xb, + DBG_BLOCK_ID_SQ = 0xc, + DBG_BLOCK_ID_AVP = 0xd, + DBG_BLOCK_ID_GMCON = 0xe, + DBG_BLOCK_ID_SMU = 0xf, + DBG_BLOCK_ID_DMA0 = 0x10, + DBG_BLOCK_ID_DMA1 = 0x11, + DBG_BLOCK_ID_SPIM = 0x12, + DBG_BLOCK_ID_GDS = 0x13, + DBG_BLOCK_ID_SPIS = 0x14, + DBG_BLOCK_ID_UNUSED0 = 0x15, + DBG_BLOCK_ID_PA0 = 0x16, + DBG_BLOCK_ID_PA1 = 0x17, + DBG_BLOCK_ID_CP0 = 0x18, + DBG_BLOCK_ID_CP1 = 0x19, + DBG_BLOCK_ID_CP2 = 0x1a, + DBG_BLOCK_ID_UNUSED1 = 0x1b, + DBG_BLOCK_ID_UVDU = 0x1c, + DBG_BLOCK_ID_UVDM = 0x1d, + DBG_BLOCK_ID_VCE = 0x1e, + DBG_BLOCK_ID_UNUSED2 = 0x1f, + DBG_BLOCK_ID_VGT0 = 0x20, + DBG_BLOCK_ID_VGT1 = 0x21, + DBG_BLOCK_ID_IA = 0x22, + DBG_BLOCK_ID_UNUSED3 = 0x23, + DBG_BLOCK_ID_SCT0 = 0x24, + DBG_BLOCK_ID_SCT1 = 0x25, + DBG_BLOCK_ID_SPM0 = 0x26, + DBG_BLOCK_ID_SPM1 = 0x27, + DBG_BLOCK_ID_TCAA = 0x28, + DBG_BLOCK_ID_TCAB = 0x29, + DBG_BLOCK_ID_TCCA = 0x2a, + DBG_BLOCK_ID_TCCB = 0x2b, + DBG_BLOCK_ID_MCC0 = 0x2c, + DBG_BLOCK_ID_MCC1 = 0x2d, + DBG_BLOCK_ID_MCC2 = 0x2e, + DBG_BLOCK_ID_MCC3 = 0x2f, + DBG_BLOCK_ID_SX0 = 0x30, + DBG_BLOCK_ID_SX1 = 0x31, + DBG_BLOCK_ID_SX2 = 0x32, + DBG_BLOCK_ID_SX3 = 0x33, + DBG_BLOCK_ID_UNUSED4 = 0x34, + DBG_BLOCK_ID_UNUSED5 = 0x35, + DBG_BLOCK_ID_UNUSED6 = 0x36, + DBG_BLOCK_ID_UNUSED7 = 0x37, + DBG_BLOCK_ID_PC0 = 0x38, + DBG_BLOCK_ID_PC1 = 0x39, + DBG_BLOCK_ID_UNUSED8 = 0x3a, + DBG_BLOCK_ID_UNUSED9 = 0x3b, + DBG_BLOCK_ID_UNUSED10 = 0x3c, + DBG_BLOCK_ID_UNUSED11 = 0x3d, + DBG_BLOCK_ID_MCB = 0x3e, + DBG_BLOCK_ID_UNUSED12 = 0x3f, + DBG_BLOCK_ID_SCB0 = 0x40, + DBG_BLOCK_ID_SCB1 = 0x41, + DBG_BLOCK_ID_UNUSED13 = 0x42, + DBG_BLOCK_ID_UNUSED14 = 0x43, + DBG_BLOCK_ID_SCF0 = 0x44, + DBG_BLOCK_ID_SCF1 = 0x45, + DBG_BLOCK_ID_UNUSED15 = 0x46, + DBG_BLOCK_ID_UNUSED16 = 0x47, + DBG_BLOCK_ID_BCI0 = 0x48, + DBG_BLOCK_ID_BCI1 = 0x49, + DBG_BLOCK_ID_BCI2 = 0x4a, + DBG_BLOCK_ID_BCI3 = 0x4b, + DBG_BLOCK_ID_UNUSED17 = 0x4c, + DBG_BLOCK_ID_UNUSED18 = 0x4d, + DBG_BLOCK_ID_UNUSED19 = 0x4e, + DBG_BLOCK_ID_UNUSED20 = 0x4f, + DBG_BLOCK_ID_CB00 = 0x50, + DBG_BLOCK_ID_CB01 = 0x51, + DBG_BLOCK_ID_CB02 = 0x52, + DBG_BLOCK_ID_CB03 = 0x53, + DBG_BLOCK_ID_CB04 = 0x54, + DBG_BLOCK_ID_UNUSED21 = 0x55, + DBG_BLOCK_ID_UNUSED22 = 0x56, + DBG_BLOCK_ID_UNUSED23 = 0x57, + DBG_BLOCK_ID_CB10 = 0x58, + DBG_BLOCK_ID_CB11 = 0x59, + DBG_BLOCK_ID_CB12 = 0x5a, + DBG_BLOCK_ID_CB13 = 0x5b, + DBG_BLOCK_ID_CB14 = 0x5c, + DBG_BLOCK_ID_UNUSED24 = 0x5d, + DBG_BLOCK_ID_UNUSED25 = 0x5e, + DBG_BLOCK_ID_UNUSED26 = 0x5f, + DBG_BLOCK_ID_TCP0 = 0x60, + DBG_BLOCK_ID_TCP1 = 0x61, + DBG_BLOCK_ID_TCP2 = 0x62, + DBG_BLOCK_ID_TCP3 = 0x63, + DBG_BLOCK_ID_TCP4 = 0x64, + DBG_BLOCK_ID_TCP5 = 0x65, + DBG_BLOCK_ID_TCP6 = 0x66, + DBG_BLOCK_ID_TCP7 = 0x67, + DBG_BLOCK_ID_TCP8 = 0x68, + DBG_BLOCK_ID_TCP9 = 0x69, + DBG_BLOCK_ID_TCP10 = 0x6a, + DBG_BLOCK_ID_TCP11 = 0x6b, + DBG_BLOCK_ID_TCP12 = 0x6c, + DBG_BLOCK_ID_TCP13 = 0x6d, + DBG_BLOCK_ID_TCP14 = 0x6e, + DBG_BLOCK_ID_TCP15 = 0x6f, + DBG_BLOCK_ID_TCP16 = 0x70, + DBG_BLOCK_ID_TCP17 = 0x71, + DBG_BLOCK_ID_TCP18 = 0x72, + DBG_BLOCK_ID_TCP19 = 0x73, + DBG_BLOCK_ID_TCP20 = 0x74, + DBG_BLOCK_ID_TCP21 = 0x75, + DBG_BLOCK_ID_TCP22 = 0x76, + DBG_BLOCK_ID_TCP23 = 0x77, + DBG_BLOCK_ID_TCP_RESERVED0 = 0x78, + DBG_BLOCK_ID_TCP_RESERVED1 = 0x79, + DBG_BLOCK_ID_TCP_RESERVED2 = 0x7a, + DBG_BLOCK_ID_TCP_RESERVED3 = 0x7b, + DBG_BLOCK_ID_TCP_RESERVED4 = 0x7c, + DBG_BLOCK_ID_TCP_RESERVED5 = 0x7d, + DBG_BLOCK_ID_TCP_RESERVED6 = 0x7e, + DBG_BLOCK_ID_TCP_RESERVED7 = 0x7f, + DBG_BLOCK_ID_DB00 = 0x80, + DBG_BLOCK_ID_DB01 = 0x81, + DBG_BLOCK_ID_DB02 = 0x82, + DBG_BLOCK_ID_DB03 = 0x83, + DBG_BLOCK_ID_DB04 = 0x84, + DBG_BLOCK_ID_UNUSED27 = 0x85, + DBG_BLOCK_ID_UNUSED28 = 0x86, + DBG_BLOCK_ID_UNUSED29 = 0x87, + DBG_BLOCK_ID_DB10 = 0x88, + DBG_BLOCK_ID_DB11 = 0x89, + DBG_BLOCK_ID_DB12 = 0x8a, + DBG_BLOCK_ID_DB13 = 0x8b, + DBG_BLOCK_ID_DB14 = 0x8c, + DBG_BLOCK_ID_UNUSED30 = 0x8d, + DBG_BLOCK_ID_UNUSED31 = 0x8e, + DBG_BLOCK_ID_UNUSED32 = 0x8f, + DBG_BLOCK_ID_TCC0 = 0x90, + DBG_BLOCK_ID_TCC1 = 0x91, + DBG_BLOCK_ID_TCC2 = 0x92, + DBG_BLOCK_ID_TCC3 = 0x93, + DBG_BLOCK_ID_TCC4 = 0x94, + DBG_BLOCK_ID_TCC5 = 0x95, + DBG_BLOCK_ID_TCC6 = 0x96, + DBG_BLOCK_ID_TCC7 = 0x97, + DBG_BLOCK_ID_SPS00 = 0x98, + DBG_BLOCK_ID_SPS01 = 0x99, + DBG_BLOCK_ID_SPS02 = 0x9a, + DBG_BLOCK_ID_SPS10 = 0x9b, + DBG_BLOCK_ID_SPS11 = 0x9c, + DBG_BLOCK_ID_SPS12 = 0x9d, + DBG_BLOCK_ID_UNUSED33 = 0x9e, + DBG_BLOCK_ID_UNUSED34 = 0x9f, + DBG_BLOCK_ID_TA00 = 0xa0, + DBG_BLOCK_ID_TA01 = 0xa1, + DBG_BLOCK_ID_TA02 = 0xa2, + DBG_BLOCK_ID_TA03 = 0xa3, + DBG_BLOCK_ID_TA04 = 0xa4, + DBG_BLOCK_ID_TA05 = 0xa5, + DBG_BLOCK_ID_TA06 = 0xa6, + DBG_BLOCK_ID_TA07 = 0xa7, + DBG_BLOCK_ID_TA08 = 0xa8, + DBG_BLOCK_ID_TA09 = 0xa9, + DBG_BLOCK_ID_TA0A = 0xaa, + DBG_BLOCK_ID_TA0B = 0xab, + DBG_BLOCK_ID_UNUSED35 = 0xac, + DBG_BLOCK_ID_UNUSED36 = 0xad, + DBG_BLOCK_ID_UNUSED37 = 0xae, + DBG_BLOCK_ID_UNUSED38 = 0xaf, + DBG_BLOCK_ID_TA10 = 0xb0, + DBG_BLOCK_ID_TA11 = 0xb1, + DBG_BLOCK_ID_TA12 = 0xb2, + DBG_BLOCK_ID_TA13 = 0xb3, + DBG_BLOCK_ID_TA14 = 0xb4, + DBG_BLOCK_ID_TA15 = 0xb5, + DBG_BLOCK_ID_TA16 = 0xb6, + DBG_BLOCK_ID_TA17 = 0xb7, + DBG_BLOCK_ID_TA18 = 0xb8, + DBG_BLOCK_ID_TA19 = 0xb9, + DBG_BLOCK_ID_TA1A = 0xba, + DBG_BLOCK_ID_TA1B = 0xbb, + DBG_BLOCK_ID_UNUSED39 = 0xbc, + DBG_BLOCK_ID_UNUSED40 = 0xbd, + DBG_BLOCK_ID_UNUSED41 = 0xbe, + DBG_BLOCK_ID_UNUSED42 = 0xbf, + DBG_BLOCK_ID_TD00 = 0xc0, + DBG_BLOCK_ID_TD01 = 0xc1, + DBG_BLOCK_ID_TD02 = 0xc2, + DBG_BLOCK_ID_TD03 = 0xc3, + DBG_BLOCK_ID_TD04 = 0xc4, + DBG_BLOCK_ID_TD05 = 0xc5, + DBG_BLOCK_ID_TD06 = 0xc6, + DBG_BLOCK_ID_TD07 = 0xc7, + DBG_BLOCK_ID_TD08 = 0xc8, + DBG_BLOCK_ID_TD09 = 0xc9, + DBG_BLOCK_ID_TD0A = 0xca, + DBG_BLOCK_ID_TD0B = 0xcb, + DBG_BLOCK_ID_UNUSED43 = 0xcc, + DBG_BLOCK_ID_UNUSED44 = 0xcd, + DBG_BLOCK_ID_UNUSED45 = 0xce, + DBG_BLOCK_ID_UNUSED46 = 0xcf, + DBG_BLOCK_ID_TD10 = 0xd0, + DBG_BLOCK_ID_TD11 = 0xd1, + DBG_BLOCK_ID_TD12 = 0xd2, + DBG_BLOCK_ID_TD13 = 0xd3, + DBG_BLOCK_ID_TD14 = 0xd4, + DBG_BLOCK_ID_TD15 = 0xd5, + DBG_BLOCK_ID_TD16 = 0xd6, + DBG_BLOCK_ID_TD17 = 0xd7, + DBG_BLOCK_ID_TD18 = 0xd8, + DBG_BLOCK_ID_TD19 = 0xd9, + DBG_BLOCK_ID_TD1A = 0xda, + DBG_BLOCK_ID_TD1B = 0xdb, + DBG_BLOCK_ID_UNUSED47 = 0xdc, + DBG_BLOCK_ID_UNUSED48 = 0xdd, + DBG_BLOCK_ID_UNUSED49 = 0xde, + DBG_BLOCK_ID_UNUSED50 = 0xdf, + DBG_BLOCK_ID_MCD0 = 0xe0, + DBG_BLOCK_ID_MCD1 = 0xe1, + DBG_BLOCK_ID_MCD2 = 0xe2, + DBG_BLOCK_ID_MCD3 = 0xe3, + DBG_BLOCK_ID_MCD4 = 0xe4, + DBG_BLOCK_ID_MCD5 = 0xe5, + DBG_BLOCK_ID_UNUSED51 = 0xe6, + DBG_BLOCK_ID_UNUSED52 = 0xe7, +} DebugBlockId_OLD; +typedef enum DebugBlockId_BY2 { + DBG_BLOCK_ID_RESERVED_BY2 = 0x0, + DBG_BLOCK_ID_VMC_BY2 = 0x1, + DBG_BLOCK_ID_CG_BY2 = 0x2, + DBG_BLOCK_ID_GRBM_BY2 = 0x3, + DBG_BLOCK_ID_CSC_BY2 = 0x4, + DBG_BLOCK_ID_IH_BY2 = 0x5, + DBG_BLOCK_ID_SQ_BY2 = 0x6, + DBG_BLOCK_ID_GMCON_BY2 = 0x7, + DBG_BLOCK_ID_DMA0_BY2 = 0x8, + DBG_BLOCK_ID_SPIM_BY2 = 0x9, + DBG_BLOCK_ID_SPIS_BY2 = 0xa, + DBG_BLOCK_ID_PA0_BY2 = 0xb, + DBG_BLOCK_ID_CP0_BY2 = 0xc, + DBG_BLOCK_ID_CP2_BY2 = 0xd, + DBG_BLOCK_ID_UVDU_BY2 = 0xe, + DBG_BLOCK_ID_VCE_BY2 = 0xf, + DBG_BLOCK_ID_VGT0_BY2 = 0x10, + DBG_BLOCK_ID_IA_BY2 = 0x11, + DBG_BLOCK_ID_SCT0_BY2 = 0x12, + DBG_BLOCK_ID_SPM0_BY2 = 0x13, + DBG_BLOCK_ID_TCAA_BY2 = 0x14, + DBG_BLOCK_ID_TCCA_BY2 = 0x15, + DBG_BLOCK_ID_MCC0_BY2 = 0x16, + DBG_BLOCK_ID_MCC2_BY2 = 0x17, + DBG_BLOCK_ID_SX0_BY2 = 0x18, + DBG_BLOCK_ID_SX2_BY2 = 0x19, + DBG_BLOCK_ID_UNUSED4_BY2 = 0x1a, + DBG_BLOCK_ID_UNUSED6_BY2 = 0x1b, + DBG_BLOCK_ID_PC0_BY2 = 0x1c, + DBG_BLOCK_ID_UNUSED8_BY2 = 0x1d, + DBG_BLOCK_ID_UNUSED10_BY2 = 0x1e, + DBG_BLOCK_ID_MCB_BY2 = 0x1f, + DBG_BLOCK_ID_SCB0_BY2 = 0x20, + DBG_BLOCK_ID_UNUSED13_BY2 = 0x21, + DBG_BLOCK_ID_SCF0_BY2 = 0x22, + DBG_BLOCK_ID_UNUSED15_BY2 = 0x23, + DBG_BLOCK_ID_BCI0_BY2 = 0x24, + DBG_BLOCK_ID_BCI2_BY2 = 0x25, + DBG_BLOCK_ID_UNUSED17_BY2 = 0x26, + DBG_BLOCK_ID_UNUSED19_BY2 = 0x27, + DBG_BLOCK_ID_CB00_BY2 = 0x28, + DBG_BLOCK_ID_CB02_BY2 = 0x29, + DBG_BLOCK_ID_CB04_BY2 = 0x2a, + DBG_BLOCK_ID_UNUSED22_BY2 = 0x2b, + DBG_BLOCK_ID_CB10_BY2 = 0x2c, + DBG_BLOCK_ID_CB12_BY2 = 0x2d, + DBG_BLOCK_ID_CB14_BY2 = 0x2e, + DBG_BLOCK_ID_UNUSED25_BY2 = 0x2f, + DBG_BLOCK_ID_TCP0_BY2 = 0x30, + DBG_BLOCK_ID_TCP2_BY2 = 0x31, + DBG_BLOCK_ID_TCP4_BY2 = 0x32, + DBG_BLOCK_ID_TCP6_BY2 = 0x33, + DBG_BLOCK_ID_TCP8_BY2 = 0x34, + DBG_BLOCK_ID_TCP10_BY2 = 0x35, + DBG_BLOCK_ID_TCP12_BY2 = 0x36, + DBG_BLOCK_ID_TCP14_BY2 = 0x37, + DBG_BLOCK_ID_TCP16_BY2 = 0x38, + DBG_BLOCK_ID_TCP18_BY2 = 0x39, + DBG_BLOCK_ID_TCP20_BY2 = 0x3a, + DBG_BLOCK_ID_TCP22_BY2 = 0x3b, + DBG_BLOCK_ID_TCP_RESERVED0_BY2 = 0x3c, + DBG_BLOCK_ID_TCP_RESERVED2_BY2 = 0x3d, + DBG_BLOCK_ID_TCP_RESERVED4_BY2 = 0x3e, + DBG_BLOCK_ID_TCP_RESERVED6_BY2 = 0x3f, + DBG_BLOCK_ID_DB00_BY2 = 0x40, + DBG_BLOCK_ID_DB02_BY2 = 0x41, + DBG_BLOCK_ID_DB04_BY2 = 0x42, + DBG_BLOCK_ID_UNUSED28_BY2 = 0x43, + DBG_BLOCK_ID_DB10_BY2 = 0x44, + DBG_BLOCK_ID_DB12_BY2 = 0x45, + DBG_BLOCK_ID_DB14_BY2 = 0x46, + DBG_BLOCK_ID_UNUSED31_BY2 = 0x47, + DBG_BLOCK_ID_TCC0_BY2 = 0x48, + DBG_BLOCK_ID_TCC2_BY2 = 0x49, + DBG_BLOCK_ID_TCC4_BY2 = 0x4a, + DBG_BLOCK_ID_TCC6_BY2 = 0x4b, + DBG_BLOCK_ID_SPS00_BY2 = 0x4c, + DBG_BLOCK_ID_SPS02_BY2 = 0x4d, + DBG_BLOCK_ID_SPS11_BY2 = 0x4e, + DBG_BLOCK_ID_UNUSED33_BY2 = 0x4f, + DBG_BLOCK_ID_TA00_BY2 = 0x50, + DBG_BLOCK_ID_TA02_BY2 = 0x51, + DBG_BLOCK_ID_TA04_BY2 = 0x52, + DBG_BLOCK_ID_TA06_BY2 = 0x53, + DBG_BLOCK_ID_TA08_BY2 = 0x54, + DBG_BLOCK_ID_TA0A_BY2 = 0x55, + DBG_BLOCK_ID_UNUSED35_BY2 = 0x56, + DBG_BLOCK_ID_UNUSED37_BY2 = 0x57, + DBG_BLOCK_ID_TA10_BY2 = 0x58, + DBG_BLOCK_ID_TA12_BY2 = 0x59, + DBG_BLOCK_ID_TA14_BY2 = 0x5a, + DBG_BLOCK_ID_TA16_BY2 = 0x5b, + DBG_BLOCK_ID_TA18_BY2 = 0x5c, + DBG_BLOCK_ID_TA1A_BY2 = 0x5d, + DBG_BLOCK_ID_UNUSED39_BY2 = 0x5e, + DBG_BLOCK_ID_UNUSED41_BY2 = 0x5f, + DBG_BLOCK_ID_TD00_BY2 = 0x60, + DBG_BLOCK_ID_TD02_BY2 = 0x61, + DBG_BLOCK_ID_TD04_BY2 = 0x62, + DBG_BLOCK_ID_TD06_BY2 = 0x63, + DBG_BLOCK_ID_TD08_BY2 = 0x64, + DBG_BLOCK_ID_TD0A_BY2 = 0x65, + DBG_BLOCK_ID_UNUSED43_BY2 = 0x66, + DBG_BLOCK_ID_UNUSED45_BY2 = 0x67, + DBG_BLOCK_ID_TD10_BY2 = 0x68, + DBG_BLOCK_ID_TD12_BY2 = 0x69, + DBG_BLOCK_ID_TD14_BY2 = 0x6a, + DBG_BLOCK_ID_TD16_BY2 = 0x6b, + DBG_BLOCK_ID_TD18_BY2 = 0x6c, + DBG_BLOCK_ID_TD1A_BY2 = 0x6d, + DBG_BLOCK_ID_UNUSED47_BY2 = 0x6e, + DBG_BLOCK_ID_UNUSED49_BY2 = 0x6f, + DBG_BLOCK_ID_MCD0_BY2 = 0x70, + DBG_BLOCK_ID_MCD2_BY2 = 0x71, + DBG_BLOCK_ID_MCD4_BY2 = 0x72, + DBG_BLOCK_ID_UNUSED51_BY2 = 0x73, +} DebugBlockId_BY2; +typedef enum DebugBlockId_BY4 { + DBG_BLOCK_ID_RESERVED_BY4 = 0x0, + DBG_BLOCK_ID_CG_BY4 = 0x1, + DBG_BLOCK_ID_CSC_BY4 = 0x2, + DBG_BLOCK_ID_SQ_BY4 = 0x3, + DBG_BLOCK_ID_DMA0_BY4 = 0x4, + DBG_BLOCK_ID_SPIS_BY4 = 0x5, + DBG_BLOCK_ID_CP0_BY4 = 0x6, + DBG_BLOCK_ID_UVDU_BY4 = 0x7, + DBG_BLOCK_ID_VGT0_BY4 = 0x8, + DBG_BLOCK_ID_SCT0_BY4 = 0x9, + DBG_BLOCK_ID_TCAA_BY4 = 0xa, + DBG_BLOCK_ID_MCC0_BY4 = 0xb, + DBG_BLOCK_ID_SX0_BY4 = 0xc, + DBG_BLOCK_ID_UNUSED4_BY4 = 0xd, + DBG_BLOCK_ID_PC0_BY4 = 0xe, + DBG_BLOCK_ID_UNUSED10_BY4 = 0xf, + DBG_BLOCK_ID_SCB0_BY4 = 0x10, + DBG_BLOCK_ID_SCF0_BY4 = 0x11, + DBG_BLOCK_ID_BCI0_BY4 = 0x12, + DBG_BLOCK_ID_UNUSED17_BY4 = 0x13, + DBG_BLOCK_ID_CB00_BY4 = 0x14, + DBG_BLOCK_ID_CB04_BY4 = 0x15, + DBG_BLOCK_ID_CB10_BY4 = 0x16, + DBG_BLOCK_ID_CB14_BY4 = 0x17, + DBG_BLOCK_ID_TCP0_BY4 = 0x18, + DBG_BLOCK_ID_TCP4_BY4 = 0x19, + DBG_BLOCK_ID_TCP8_BY4 = 0x1a, + DBG_BLOCK_ID_TCP12_BY4 = 0x1b, + DBG_BLOCK_ID_TCP16_BY4 = 0x1c, + DBG_BLOCK_ID_TCP20_BY4 = 0x1d, + DBG_BLOCK_ID_TCP_RESERVED0_BY4 = 0x1e, + DBG_BLOCK_ID_TCP_RESERVED4_BY4 = 0x1f, + DBG_BLOCK_ID_DB_BY4 = 0x20, + DBG_BLOCK_ID_DB04_BY4 = 0x21, + DBG_BLOCK_ID_DB10_BY4 = 0x22, + DBG_BLOCK_ID_DB14_BY4 = 0x23, + DBG_BLOCK_ID_TCC0_BY4 = 0x24, + DBG_BLOCK_ID_TCC4_BY4 = 0x25, + DBG_BLOCK_ID_SPS00_BY4 = 0x26, + DBG_BLOCK_ID_SPS11_BY4 = 0x27, + DBG_BLOCK_ID_TA00_BY4 = 0x28, + DBG_BLOCK_ID_TA04_BY4 = 0x29, + DBG_BLOCK_ID_TA08_BY4 = 0x2a, + DBG_BLOCK_ID_UNUSED35_BY4 = 0x2b, + DBG_BLOCK_ID_TA10_BY4 = 0x2c, + DBG_BLOCK_ID_TA14_BY4 = 0x2d, + DBG_BLOCK_ID_TA18_BY4 = 0x2e, + DBG_BLOCK_ID_UNUSED39_BY4 = 0x2f, + DBG_BLOCK_ID_TD00_BY4 = 0x30, + DBG_BLOCK_ID_TD04_BY4 = 0x31, + DBG_BLOCK_ID_TD08_BY4 = 0x32, + DBG_BLOCK_ID_UNUSED43_BY4 = 0x33, + DBG_BLOCK_ID_TD10_BY4 = 0x34, + DBG_BLOCK_ID_TD14_BY4 = 0x35, + DBG_BLOCK_ID_TD18_BY4 = 0x36, + DBG_BLOCK_ID_UNUSED47_BY4 = 0x37, + DBG_BLOCK_ID_MCD0_BY4 = 0x38, + DBG_BLOCK_ID_MCD4_BY4 = 0x39, +} DebugBlockId_BY4; +typedef enum DebugBlockId_BY8 { + DBG_BLOCK_ID_RESERVED_BY8 = 0x0, + DBG_BLOCK_ID_CSC_BY8 = 0x1, + DBG_BLOCK_ID_DMA0_BY8 = 0x2, + DBG_BLOCK_ID_CP0_BY8 = 0x3, + DBG_BLOCK_ID_VGT0_BY8 = 0x4, + DBG_BLOCK_ID_TCAA_BY8 = 0x5, + DBG_BLOCK_ID_SX0_BY8 = 0x6, + DBG_BLOCK_ID_PC0_BY8 = 0x7, + DBG_BLOCK_ID_SCB0_BY8 = 0x8, + DBG_BLOCK_ID_BCI0_BY8 = 0x9, + DBG_BLOCK_ID_CB00_BY8 = 0xa, + DBG_BLOCK_ID_CB10_BY8 = 0xb, + DBG_BLOCK_ID_TCP0_BY8 = 0xc, + DBG_BLOCK_ID_TCP8_BY8 = 0xd, + DBG_BLOCK_ID_TCP16_BY8 = 0xe, + DBG_BLOCK_ID_TCP_RESERVED0_BY8 = 0xf, + DBG_BLOCK_ID_DB00_BY8 = 0x10, + DBG_BLOCK_ID_DB10_BY8 = 0x11, + DBG_BLOCK_ID_TCC0_BY8 = 0x12, + DBG_BLOCK_ID_SPS00_BY8 = 0x13, + DBG_BLOCK_ID_TA00_BY8 = 0x14, + DBG_BLOCK_ID_TA08_BY8 = 0x15, + DBG_BLOCK_ID_TA10_BY8 = 0x16, + DBG_BLOCK_ID_TA18_BY8 = 0x17, + DBG_BLOCK_ID_TD00_BY8 = 0x18, + DBG_BLOCK_ID_TD08_BY8 = 0x19, + DBG_BLOCK_ID_TD10_BY8 = 0x1a, + DBG_BLOCK_ID_TD18_BY8 = 0x1b, + DBG_BLOCK_ID_MCD0_BY8 = 0x1c, +} DebugBlockId_BY8; +typedef enum DebugBlockId_BY16 { + DBG_BLOCK_ID_RESERVED_BY16 = 0x0, + DBG_BLOCK_ID_DMA0_BY16 = 0x1, + DBG_BLOCK_ID_VGT0_BY16 = 0x2, + DBG_BLOCK_ID_SX0_BY16 = 0x3, + DBG_BLOCK_ID_SCB0_BY16 = 0x4, + DBG_BLOCK_ID_CB00_BY16 = 0x5, + DBG_BLOCK_ID_TCP0_BY16 = 0x6, + DBG_BLOCK_ID_TCP16_BY16 = 0x7, + DBG_BLOCK_ID_DB00_BY16 = 0x8, + DBG_BLOCK_ID_TCC0_BY16 = 0x9, + DBG_BLOCK_ID_TA00_BY16 = 0xa, + DBG_BLOCK_ID_TA10_BY16 = 0xb, + DBG_BLOCK_ID_TD00_BY16 = 0xc, + DBG_BLOCK_ID_TD10_BY16 = 0xd, + DBG_BLOCK_ID_MCD0_BY16 = 0xe, +} DebugBlockId_BY16; +typedef enum CompareRef { + REF_NEVER = 0x0, + REF_LESS = 0x1, + REF_EQUAL = 0x2, + REF_LEQUAL = 0x3, + REF_GREATER = 0x4, + REF_NOTEQUAL = 0x5, + REF_GEQUAL = 0x6, + REF_ALWAYS = 0x7, +} CompareRef; +typedef enum ReadSize { + READ_256_BITS = 0x0, + READ_512_BITS = 0x1, +} ReadSize; +typedef enum DepthFormat { + DEPTH_INVALID = 0x0, + DEPTH_16 = 0x1, + DEPTH_X8_24 = 0x2, + DEPTH_8_24 = 0x3, + DEPTH_X8_24_FLOAT = 0x4, + DEPTH_8_24_FLOAT = 0x5, + DEPTH_32_FLOAT = 0x6, + DEPTH_X24_8_32_FLOAT = 0x7, +} DepthFormat; +typedef enum ZFormat { + Z_INVALID = 0x0, + Z_16 = 0x1, + Z_24 = 0x2, + Z_32_FLOAT = 0x3, +} ZFormat; +typedef enum StencilFormat { + STENCIL_INVALID = 0x0, + STENCIL_8 = 0x1, +} StencilFormat; +typedef enum CmaskMode { + CMASK_CLEAR_NONE = 0x0, + CMASK_CLEAR_ONE = 0x1, + CMASK_CLEAR_ALL = 0x2, + CMASK_ANY_EXPANDED = 0x3, + CMASK_ALPHA0_FRAG1 = 0x4, + CMASK_ALPHA0_FRAG2 = 0x5, + CMASK_ALPHA0_FRAG4 = 0x6, + CMASK_ALPHA0_FRAGS = 0x7, + CMASK_ALPHA1_FRAG1 = 0x8, + CMASK_ALPHA1_FRAG2 = 0x9, + CMASK_ALPHA1_FRAG4 = 0xa, + CMASK_ALPHA1_FRAGS = 0xb, + CMASK_ALPHAX_FRAG1 = 0xc, + CMASK_ALPHAX_FRAG2 = 0xd, + CMASK_ALPHAX_FRAG4 = 0xe, + CMASK_ALPHAX_FRAGS = 0xf, +} CmaskMode; +typedef enum QuadExportFormat { + EXPORT_UNUSED = 0x0, + EXPORT_32_R = 0x1, + EXPORT_32_GR = 0x2, + EXPORT_32_AR = 0x3, + EXPORT_FP16_ABGR = 0x4, + EXPORT_UNSIGNED16_ABGR = 0x5, + EXPORT_SIGNED16_ABGR = 0x6, + EXPORT_32_ABGR = 0x7, +} QuadExportFormat; +typedef enum QuadExportFormatOld { + EXPORT_4P_32BPC_ABGR = 0x0, + EXPORT_4P_16BPC_ABGR = 0x1, + EXPORT_4P_32BPC_GR = 0x2, + EXPORT_4P_32BPC_AR = 0x3, + EXPORT_2P_32BPC_ABGR = 0x4, + EXPORT_8P_32BPC_R = 0x5, +} QuadExportFormatOld; +typedef enum ColorFormat { + COLOR_INVALID = 0x0, + COLOR_8 = 0x1, + COLOR_16 = 0x2, + COLOR_8_8 = 0x3, + COLOR_32 = 0x4, + COLOR_16_16 = 0x5, + COLOR_10_11_11 = 0x6, + COLOR_11_11_10 = 0x7, + COLOR_10_10_10_2 = 0x8, + COLOR_2_10_10_10 = 0x9, + COLOR_8_8_8_8 = 0xa, + COLOR_32_32 = 0xb, + COLOR_16_16_16_16 = 0xc, + COLOR_RESERVED_13 = 0xd, + COLOR_32_32_32_32 = 0xe, + COLOR_RESERVED_15 = 0xf, + COLOR_5_6_5 = 0x10, + COLOR_1_5_5_5 = 0x11, + COLOR_5_5_5_1 = 0x12, + COLOR_4_4_4_4 = 0x13, + COLOR_8_24 = 0x14, + COLOR_24_8 = 0x15, + COLOR_X24_8_32_FLOAT = 0x16, + COLOR_RESERVED_23 = 0x17, +} ColorFormat; +typedef enum SurfaceFormat { + FMT_INVALID = 0x0, + FMT_8 = 0x1, + FMT_16 = 0x2, + FMT_8_8 = 0x3, + FMT_32 = 0x4, + FMT_16_16 = 0x5, + FMT_10_11_11 = 0x6, + FMT_11_11_10 = 0x7, + FMT_10_10_10_2 = 0x8, + FMT_2_10_10_10 = 0x9, + FMT_8_8_8_8 = 0xa, + FMT_32_32 = 0xb, + FMT_16_16_16_16 = 0xc, + FMT_32_32_32 = 0xd, + FMT_32_32_32_32 = 0xe, + FMT_RESERVED_4 = 0xf, + FMT_5_6_5 = 0x10, + FMT_1_5_5_5 = 0x11, + FMT_5_5_5_1 = 0x12, + FMT_4_4_4_4 = 0x13, + FMT_8_24 = 0x14, + FMT_24_8 = 0x15, + FMT_X24_8_32_FLOAT = 0x16, + FMT_RESERVED_33 = 0x17, + FMT_11_11_10_FLOAT = 0x18, + FMT_16_FLOAT = 0x19, + FMT_32_FLOAT = 0x1a, + FMT_16_16_FLOAT = 0x1b, + FMT_8_24_FLOAT = 0x1c, + FMT_24_8_FLOAT = 0x1d, + FMT_32_32_FLOAT = 0x1e, + FMT_10_11_11_FLOAT = 0x1f, + FMT_16_16_16_16_FLOAT = 0x20, + FMT_3_3_2 = 0x21, + FMT_6_5_5 = 0x22, + FMT_32_32_32_32_FLOAT = 0x23, + FMT_RESERVED_36 = 0x24, + FMT_1 = 0x25, + FMT_1_REVERSED = 0x26, + FMT_GB_GR = 0x27, + FMT_BG_RG = 0x28, + FMT_32_AS_8 = 0x29, + FMT_32_AS_8_8 = 0x2a, + FMT_5_9_9_9_SHAREDEXP = 0x2b, + FMT_8_8_8 = 0x2c, + FMT_16_16_16 = 0x2d, + FMT_16_16_16_FLOAT = 0x2e, + FMT_4_4 = 0x2f, + FMT_32_32_32_FLOAT = 0x30, + FMT_BC1 = 0x31, + FMT_BC2 = 0x32, + FMT_BC3 = 0x33, + FMT_BC4 = 0x34, + FMT_BC5 = 0x35, + FMT_BC6 = 0x36, + FMT_BC7 = 0x37, + FMT_32_AS_32_32_32_32 = 0x38, + FMT_APC3 = 0x39, + FMT_APC4 = 0x3a, + FMT_APC5 = 0x3b, + FMT_APC6 = 0x3c, + FMT_APC7 = 0x3d, + FMT_CTX1 = 0x3e, + FMT_RESERVED_63 = 0x3f, +} SurfaceFormat; +typedef enum BUF_DATA_FORMAT { + BUF_DATA_FORMAT_INVALID = 0x0, + BUF_DATA_FORMAT_8 = 0x1, + BUF_DATA_FORMAT_16 = 0x2, + BUF_DATA_FORMAT_8_8 = 0x3, + BUF_DATA_FORMAT_32 = 0x4, + BUF_DATA_FORMAT_16_16 = 0x5, + BUF_DATA_FORMAT_10_11_11 = 0x6, + BUF_DATA_FORMAT_11_11_10 = 0x7, + BUF_DATA_FORMAT_10_10_10_2 = 0x8, + BUF_DATA_FORMAT_2_10_10_10 = 0x9, + BUF_DATA_FORMAT_8_8_8_8 = 0xa, + BUF_DATA_FORMAT_32_32 = 0xb, + BUF_DATA_FORMAT_16_16_16_16 = 0xc, + BUF_DATA_FORMAT_32_32_32 = 0xd, + BUF_DATA_FORMAT_32_32_32_32 = 0xe, + BUF_DATA_FORMAT_RESERVED_15 = 0xf, +} BUF_DATA_FORMAT; +typedef enum IMG_DATA_FORMAT { + IMG_DATA_FORMAT_INVALID = 0x0, + IMG_DATA_FORMAT_8 = 0x1, + IMG_DATA_FORMAT_16 = 0x2, + IMG_DATA_FORMAT_8_8 = 0x3, + IMG_DATA_FORMAT_32 = 0x4, + IMG_DATA_FORMAT_16_16 = 0x5, + IMG_DATA_FORMAT_10_11_11 = 0x6, + IMG_DATA_FORMAT_11_11_10 = 0x7, + IMG_DATA_FORMAT_10_10_10_2 = 0x8, + IMG_DATA_FORMAT_2_10_10_10 = 0x9, + IMG_DATA_FORMAT_8_8_8_8 = 0xa, + IMG_DATA_FORMAT_32_32 = 0xb, + IMG_DATA_FORMAT_16_16_16_16 = 0xc, + IMG_DATA_FORMAT_32_32_32 = 0xd, + IMG_DATA_FORMAT_32_32_32_32 = 0xe, + IMG_DATA_FORMAT_RESERVED_15 = 0xf, + IMG_DATA_FORMAT_5_6_5 = 0x10, + IMG_DATA_FORMAT_1_5_5_5 = 0x11, + IMG_DATA_FORMAT_5_5_5_1 = 0x12, + IMG_DATA_FORMAT_4_4_4_4 = 0x13, + IMG_DATA_FORMAT_8_24 = 0x14, + IMG_DATA_FORMAT_24_8 = 0x15, + IMG_DATA_FORMAT_X24_8_32 = 0x16, + IMG_DATA_FORMAT_RESERVED_23 = 0x17, + IMG_DATA_FORMAT_RESERVED_24 = 0x18, + IMG_DATA_FORMAT_RESERVED_25 = 0x19, + IMG_DATA_FORMAT_RESERVED_26 = 0x1a, + IMG_DATA_FORMAT_RESERVED_27 = 0x1b, + IMG_DATA_FORMAT_RESERVED_28 = 0x1c, + IMG_DATA_FORMAT_RESERVED_29 = 0x1d, + IMG_DATA_FORMAT_RESERVED_30 = 0x1e, + IMG_DATA_FORMAT_RESERVED_31 = 0x1f, + IMG_DATA_FORMAT_GB_GR = 0x20, + IMG_DATA_FORMAT_BG_RG = 0x21, + IMG_DATA_FORMAT_5_9_9_9 = 0x22, + IMG_DATA_FORMAT_BC1 = 0x23, + IMG_DATA_FORMAT_BC2 = 0x24, + IMG_DATA_FORMAT_BC3 = 0x25, + IMG_DATA_FORMAT_BC4 = 0x26, + IMG_DATA_FORMAT_BC5 = 0x27, + IMG_DATA_FORMAT_BC6 = 0x28, + IMG_DATA_FORMAT_BC7 = 0x29, + IMG_DATA_FORMAT_RESERVED_42 = 0x2a, + IMG_DATA_FORMAT_RESERVED_43 = 0x2b, + IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c, + IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d, + IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e, + IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f, + IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30, + IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31, + IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32, + IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33, + IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34, + IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35, + IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36, + IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37, + IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38, + IMG_DATA_FORMAT_4_4 = 0x39, + IMG_DATA_FORMAT_6_5_5 = 0x3a, + IMG_DATA_FORMAT_1 = 0x3b, + IMG_DATA_FORMAT_1_REVERSED = 0x3c, + IMG_DATA_FORMAT_32_AS_8 = 0x3d, + IMG_DATA_FORMAT_32_AS_8_8 = 0x3e, + IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f, +} IMG_DATA_FORMAT; +typedef enum BUF_NUM_FORMAT { + BUF_NUM_FORMAT_UNORM = 0x0, + BUF_NUM_FORMAT_SNORM = 0x1, + BUF_NUM_FORMAT_USCALED = 0x2, + BUF_NUM_FORMAT_SSCALED = 0x3, + BUF_NUM_FORMAT_UINT = 0x4, + BUF_NUM_FORMAT_SINT = 0x5, + BUF_NUM_FORMAT_SNORM_OGL = 0x6, + BUF_NUM_FORMAT_FLOAT = 0x7, +} BUF_NUM_FORMAT; +typedef enum IMG_NUM_FORMAT { + IMG_NUM_FORMAT_UNORM = 0x0, + IMG_NUM_FORMAT_SNORM = 0x1, + IMG_NUM_FORMAT_USCALED = 0x2, + IMG_NUM_FORMAT_SSCALED = 0x3, + IMG_NUM_FORMAT_UINT = 0x4, + IMG_NUM_FORMAT_SINT = 0x5, + IMG_NUM_FORMAT_SNORM_OGL = 0x6, + IMG_NUM_FORMAT_FLOAT = 0x7, + IMG_NUM_FORMAT_RESERVED_8 = 0x8, + IMG_NUM_FORMAT_SRGB = 0x9, + IMG_NUM_FORMAT_UBNORM = 0xa, + IMG_NUM_FORMAT_UBNORM_OGL = 0xb, + IMG_NUM_FORMAT_UBINT = 0xc, + IMG_NUM_FORMAT_UBSCALED = 0xd, + IMG_NUM_FORMAT_RESERVED_14 = 0xe, + IMG_NUM_FORMAT_RESERVED_15 = 0xf, +} IMG_NUM_FORMAT; +typedef enum TileType { + ARRAY_COLOR_TILE = 0x0, + ARRAY_DEPTH_TILE = 0x1, +} TileType; +typedef enum NonDispTilingOrder { + ADDR_SURF_MICRO_TILING_DISPLAY = 0x0, + ADDR_SURF_MICRO_TILING_NON_DISPLAY = 0x1, +} NonDispTilingOrder; +typedef enum MicroTileMode { + ADDR_SURF_DISPLAY_MICRO_TILING = 0x0, + ADDR_SURF_THIN_MICRO_TILING = 0x1, + ADDR_SURF_DEPTH_MICRO_TILING = 0x2, + ADDR_SURF_ROTATED_MICRO_TILING = 0x3, + ADDR_SURF_THICK_MICRO_TILING = 0x4, +} MicroTileMode; +typedef enum TileSplit { + ADDR_SURF_TILE_SPLIT_64B = 0x0, + ADDR_SURF_TILE_SPLIT_128B = 0x1, + ADDR_SURF_TILE_SPLIT_256B = 0x2, + ADDR_SURF_TILE_SPLIT_512B = 0x3, + ADDR_SURF_TILE_SPLIT_1KB = 0x4, + ADDR_SURF_TILE_SPLIT_2KB = 0x5, + ADDR_SURF_TILE_SPLIT_4KB = 0x6, +} TileSplit; +typedef enum SampleSplit { + ADDR_SURF_SAMPLE_SPLIT_1 = 0x0, + ADDR_SURF_SAMPLE_SPLIT_2 = 0x1, + ADDR_SURF_SAMPLE_SPLIT_4 = 0x2, + ADDR_SURF_SAMPLE_SPLIT_8 = 0x3, +} SampleSplit; +typedef enum PipeConfig { + ADDR_SURF_P2 = 0x0, + ADDR_SURF_P2_RESERVED0 = 0x1, + ADDR_SURF_P2_RESERVED1 = 0x2, + ADDR_SURF_P2_RESERVED2 = 0x3, + ADDR_SURF_P4_8x16 = 0x4, + ADDR_SURF_P4_16x16 = 0x5, + ADDR_SURF_P4_16x32 = 0x6, + ADDR_SURF_P4_32x32 = 0x7, + ADDR_SURF_P8_16x16_8x16 = 0x8, + ADDR_SURF_P8_16x32_8x16 = 0x9, + ADDR_SURF_P8_32x32_8x16 = 0xa, + ADDR_SURF_P8_16x32_16x16 = 0xb, + ADDR_SURF_P8_32x32_16x16 = 0xc, + ADDR_SURF_P8_32x32_16x32 = 0xd, + ADDR_SURF_P8_32x64_32x32 = 0xe, +} PipeConfig; +typedef enum NumBanks { + ADDR_SURF_2_BANK = 0x0, + ADDR_SURF_4_BANK = 0x1, + ADDR_SURF_8_BANK = 0x2, + ADDR_SURF_16_BANK = 0x3, +} NumBanks; +typedef enum BankWidth { + ADDR_SURF_BANK_WIDTH_1 = 0x0, + ADDR_SURF_BANK_WIDTH_2 = 0x1, + ADDR_SURF_BANK_WIDTH_4 = 0x2, + ADDR_SURF_BANK_WIDTH_8 = 0x3, +} BankWidth; +typedef enum BankHeight { + ADDR_SURF_BANK_HEIGHT_1 = 0x0, + ADDR_SURF_BANK_HEIGHT_2 = 0x1, + ADDR_SURF_BANK_HEIGHT_4 = 0x2, + ADDR_SURF_BANK_HEIGHT_8 = 0x3, +} BankHeight; +typedef enum BankWidthHeight { + ADDR_SURF_BANK_WH_1 = 0x0, + ADDR_SURF_BANK_WH_2 = 0x1, + ADDR_SURF_BANK_WH_4 = 0x2, + ADDR_SURF_BANK_WH_8 = 0x3, +} BankWidthHeight; +typedef enum MacroTileAspect { + ADDR_SURF_MACRO_ASPECT_1 = 0x0, + ADDR_SURF_MACRO_ASPECT_2 = 0x1, + ADDR_SURF_MACRO_ASPECT_4 = 0x2, + ADDR_SURF_MACRO_ASPECT_8 = 0x3, +} MacroTileAspect; +typedef enum TCC_CACHE_POLICIES { + TCC_CACHE_POLICY_LRU = 0x0, + TCC_CACHE_POLICY_STREAM = 0x1, + TCC_CACHE_POLICY_BYPASS = 0x2, +} TCC_CACHE_POLICIES; +typedef enum PERFMON_COUNTER_MODE { + PERFMON_COUNTER_MODE_ACCUM = 0x0, + PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x1, + PERFMON_COUNTER_MODE_MAX = 0x2, + PERFMON_COUNTER_MODE_DIRTY = 0x3, + PERFMON_COUNTER_MODE_SAMPLE = 0x4, + PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x5, + PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x6, + PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x7, + PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x8, + PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x9, + PERFMON_COUNTER_MODE_RESERVED = 0xf, +} PERFMON_COUNTER_MODE; +typedef enum PERFMON_SPM_MODE { + PERFMON_SPM_MODE_OFF = 0x0, + PERFMON_SPM_MODE_16BIT_CLAMP = 0x1, + PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x2, + PERFMON_SPM_MODE_32BIT_CLAMP = 0x3, + PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x4, + PERFMON_SPM_MODE_RESERVED_5 = 0x5, + PERFMON_SPM_MODE_RESERVED_6 = 0x6, + PERFMON_SPM_MODE_RESERVED_7 = 0x7, + PERFMON_SPM_MODE_TEST_MODE_0 = 0x8, + PERFMON_SPM_MODE_TEST_MODE_1 = 0x9, + PERFMON_SPM_MODE_TEST_MODE_2 = 0xa, +} PERFMON_SPM_MODE; +typedef enum SurfaceTiling { + ARRAY_LINEAR = 0x0, + ARRAY_TILED = 0x1, +} SurfaceTiling; +typedef enum SurfaceArray { + ARRAY_1D = 0x0, + ARRAY_2D = 0x1, + ARRAY_3D = 0x2, + ARRAY_3D_SLICE = 0x3, +} SurfaceArray; +typedef enum ColorArray { + ARRAY_2D_ALT_COLOR = 0x0, + ARRAY_2D_COLOR = 0x1, + ARRAY_3D_SLICE_COLOR = 0x3, +} ColorArray; +typedef enum DepthArray { + ARRAY_2D_ALT_DEPTH = 0x0, + ARRAY_2D_DEPTH = 0x1, +} DepthArray; + +#endif /* DCE_8_0_ENUM_H */ diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h index 8a29307..c331c9f 100644 --- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h +++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h @@ -4130,6 +4130,18 @@ #define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe #define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000 #define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK_MASK 0x1 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK__SHIFT 0x0 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS_MASK 0x2 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS__SHIFT 0x1 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV_MASK 0x4 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV__SHIFT 0x2 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK_MASK 0x10 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK__SHIFT 0x4 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS_MASK 0x20 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS__SHIFT 0x5 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV_MASK 0x40 +#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV__SHIFT 0x6 #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1 #define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0 #define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2 diff --git a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h new file mode 100644 index 0000000..d21c6b1 --- /dev/null +++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h @@ -0,0 +1,102 @@ +/* + * Volcanic Islands IV SRC Register documentation + * + * Copyright (C) 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IVSRCID_VISLANDS30_H_ +#define _IVSRCID_VISLANDS30_H_ + + +// IV Source IDs + +#define VISLANDS30_IV_SRCID_D1_V_UPDATE_INT 7 // 0x07 +#define VISLANDS30_IV_EXTID_D1_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D1_GRPH_PFLIP 8 // 0x08 +#define VISLANDS30_IV_EXTID_D1_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_D2_V_UPDATE_INT 9 // 0x09 +#define VISLANDS30_IV_EXTID_D2_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D2_GRPH_PFLIP 10 // 0x0a +#define VISLANDS30_IV_EXTID_D2_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_D3_V_UPDATE_INT 11 // 0x0b +#define VISLANDS30_IV_EXTID_D3_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D3_GRPH_PFLIP 12 // 0x0c +#define VISLANDS30_IV_EXTID_D3_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_D4_V_UPDATE_INT 13 // 0x0d +#define VISLANDS30_IV_EXTID_D4_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D4_GRPH_PFLIP 14 // 0x0e +#define VISLANDS30_IV_EXTID_D4_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_D5_V_UPDATE_INT 15 // 0x0f +#define VISLANDS30_IV_EXTID_D5_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D5_GRPH_PFLIP 16 // 0x10 +#define VISLANDS30_IV_EXTID_D5_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_D6_V_UPDATE_INT 17 // 0x11 +#define VISLANDS30_IV_EXTID_D6_V_UPDATE_INT 0 + +#define VISLANDS30_IV_SRCID_D6_GRPH_PFLIP 18 // 0x12 +#define VISLANDS30_IV_EXTID_D6_GRPH_PFLIP 0 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_A 0 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_B 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_B 1 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_C 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_C 2 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_D 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_D 3 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_E 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_E 4 + +#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_F 42 // 0x2a +#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_F 5 + +#define VISLANDS30_IV_SRCID_HPD_RX_A 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_A 6 + +#define VISLANDS30_IV_SRCID_HPD_RX_B 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_B 7 + +#define VISLANDS30_IV_SRCID_HPD_RX_C 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_C 8 + +#define VISLANDS30_IV_SRCID_HPD_RX_D 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_D 9 + +#define VISLANDS30_IV_SRCID_HPD_RX_E 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_E 10 + +#define VISLANDS30_IV_SRCID_HPD_RX_F 42 // 0x2a +#define VISLANDS30_IV_EXTID_HPD_RX_F 11 + +#endif // _IVSRCID_VISLANDS30_H_ diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c index aa67244..2ee4190f 100644 --- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c +++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c @@ -29,6 +29,7 @@ #include "pp_instance.h" #include "power_state.h" #include "eventmanager.h" +#include "pp_debug.h" #define PP_CHECK(handle) \ do { \ @@ -433,7 +434,10 @@ enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle) case PP_StateUILabel_Performance: return POWER_STATE_TYPE_PERFORMANCE; default: - return POWER_STATE_TYPE_DEFAULT; + if (state->classification.flags & PP_StateClassificationFlag_Boot) + return POWER_STATE_TYPE_INTERNAL_BOOT; + else + return POWER_STATE_TYPE_DEFAULT; } } @@ -535,6 +539,112 @@ static int pp_dpm_get_temperature(void *handle) return hwmgr->hwmgr_func->get_temperature(hwmgr); } +static int pp_dpm_get_pp_num_states(void *handle, + struct pp_states_info *data) +{ + struct pp_hwmgr *hwmgr; + int i; + + if (!handle) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (hwmgr == NULL || hwmgr->ps == NULL) + return -EINVAL; + + data->nums = hwmgr->num_ps; + + for (i = 0; i < hwmgr->num_ps; i++) { + struct pp_power_state *state = (struct pp_power_state *) + ((unsigned long)hwmgr->ps + i * hwmgr->ps_size); + switch (state->classification.ui_label) { + case PP_StateUILabel_Battery: + data->states[i] = POWER_STATE_TYPE_BATTERY; + break; + case PP_StateUILabel_Balanced: + data->states[i] = POWER_STATE_TYPE_BALANCED; + break; + case PP_StateUILabel_Performance: + data->states[i] = POWER_STATE_TYPE_PERFORMANCE; + break; + default: + if (state->classification.flags & PP_StateClassificationFlag_Boot) + data->states[i] = POWER_STATE_TYPE_INTERNAL_BOOT; + else + data->states[i] = POWER_STATE_TYPE_DEFAULT; + } + } + + return 0; +} + +static int pp_dpm_get_pp_table(void *handle, char **table) +{ + struct pp_hwmgr *hwmgr; + + if (!handle) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || + hwmgr->hwmgr_func->get_pp_table == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->get_pp_table(hwmgr, table); +} + +static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size) +{ + struct pp_hwmgr *hwmgr; + + if (!handle) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || + hwmgr->hwmgr_func->set_pp_table == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size); +} + +static int pp_dpm_force_clock_level(void *handle, + enum pp_clock_type type, int level) +{ + struct pp_hwmgr *hwmgr; + + if (!handle) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || + hwmgr->hwmgr_func->force_clock_level == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, level); +} + +static int pp_dpm_print_clock_levels(void *handle, + enum pp_clock_type type, char *buf) +{ + struct pp_hwmgr *hwmgr; + + if (!handle) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (hwmgr == NULL || hwmgr->hwmgr_func == NULL || + hwmgr->hwmgr_func->print_clock_levels == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf); +} + const struct amd_powerplay_funcs pp_dpm_funcs = { .get_temperature = pp_dpm_get_temperature, .load_firmware = pp_dpm_load_fw, @@ -552,6 +662,11 @@ const struct amd_powerplay_funcs pp_dpm_funcs = { .get_fan_control_mode = pp_dpm_get_fan_control_mode, .set_fan_speed_percent = pp_dpm_set_fan_speed_percent, .get_fan_speed_percent = pp_dpm_get_fan_speed_percent, + .get_pp_num_states = pp_dpm_get_pp_num_states, + .get_pp_table = pp_dpm_get_pp_table, + .set_pp_table = pp_dpm_set_pp_table, + .force_clock_level = pp_dpm_force_clock_level, + .print_clock_levels = pp_dpm_print_clock_levels, }; static int amd_pp_instance_init(struct amd_pp_init *pp_init, @@ -635,10 +750,10 @@ int amd_powerplay_fini(void *handle) /* export this function to DAL */ -int amd_powerplay_display_configuration_change(void *handle, const void *input) +int amd_powerplay_display_configuration_change(void *handle, + const struct amd_pp_display_configuration *display_config) { struct pp_hwmgr *hwmgr; - const struct amd_pp_display_configuration *display_config = input; PP_CHECK((struct pp_instance *)handle); @@ -650,7 +765,7 @@ int amd_powerplay_display_configuration_change(void *handle, const void *input) } int amd_powerplay_get_display_power_level(void *handle, - struct amd_pp_dal_clock_info *output) + struct amd_pp_simple_clock_info *output) { struct pp_hwmgr *hwmgr; @@ -663,3 +778,86 @@ int amd_powerplay_get_display_power_level(void *handle, return phm_get_dal_power_level(hwmgr, output); } + +int amd_powerplay_get_current_clocks(void *handle, + struct amd_pp_clock_info *clocks) +{ + struct pp_hwmgr *hwmgr; + struct amd_pp_simple_clock_info simple_clocks; + struct pp_clock_info hw_clocks; + + PP_CHECK((struct pp_instance *)handle); + + if (clocks == NULL) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + phm_get_dal_power_level(hwmgr, &simple_clocks); + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment)) { + if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_PowerContainment)) + PP_ASSERT_WITH_CODE(0, "Error in PHM_GetPowerContainmentClockInfo", return -1); + } else { + if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_Activity)) + PP_ASSERT_WITH_CODE(0, "Error in PHM_GetClockInfo", return -1); + } + + clocks->min_engine_clock = hw_clocks.min_eng_clk; + clocks->max_engine_clock = hw_clocks.max_eng_clk; + clocks->min_memory_clock = hw_clocks.min_mem_clk; + clocks->max_memory_clock = hw_clocks.max_mem_clk; + clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth; + clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth; + + clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; + clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; + + clocks->max_clocks_state = simple_clocks.level; + + if (0 == phm_get_current_shallow_sleep_clocks(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks)) { + clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk; + clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk; + } + + return 0; + +} + +int amd_powerplay_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks) +{ + int result = -1; + + struct pp_hwmgr *hwmgr; + + PP_CHECK((struct pp_instance *)handle); + + if (clocks == NULL) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + result = phm_get_clock_by_type(hwmgr, type, clocks); + + return result; +} + +int amd_powerplay_get_display_mode_validation_clocks(void *handle, + struct amd_pp_simple_clock_info *clocks) +{ + int result = -1; + struct pp_hwmgr *hwmgr; + + PP_CHECK((struct pp_instance *)handle); + + if (clocks == NULL) + return -EINVAL; + + hwmgr = ((struct pp_instance *)handle)->hwmgr; + + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState)) + result = phm_get_max_high_clocks(hwmgr, clocks); + + return result; +} + diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c index 0874ab4..ef1daf1 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c @@ -715,7 +715,6 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr, unsigned long clock = 0; unsigned long level; unsigned long stable_pstate_sclk; - struct PP_Clocks clocks; unsigned long percentage; cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk; @@ -726,8 +725,9 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr, else cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk; - /*PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks);*/ - clock = clocks.engineClock; + clock = hwmgr->display_config.min_core_set_clock; + if (clock == 0) + printk(KERN_ERR "[ powerplay ] min_core_set_clock not set\n"); if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) { cz_hwmgr->sclk_dpm.hard_min_clk = clock; @@ -883,9 +883,9 @@ static int cz_tf_update_low_mem_pstate(struct pp_hwmgr *hwmgr, if (pnew_state->action == FORCE_HIGH) cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); - else if(pnew_state->action == CANCEL_FORCE_HIGH) - cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch); - else + else if (pnew_state->action == CANCEL_FORCE_HIGH) + cz_nbdpm_pstate_enable_disable(hwmgr, true, disable_switch); + else cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch); } return 0; @@ -1110,9 +1110,10 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, cast_const_PhwCzPowerState(&pcurrent_ps->hardware); struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend); - struct PP_Clocks clocks; + struct PP_Clocks clocks = {0, 0, 0, 0}; bool force_high; - unsigned long num_of_active_displays = 4; + uint32_t num_of_active_displays = 0; + struct cgs_display_info info = {0}; cz_ps->evclk = hwmgr->vce_arbiter.evclk; cz_ps->ecclk = hwmgr->vce_arbiter.ecclk; @@ -1124,12 +1125,15 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr, cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label); - /* to do PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks); */ - /* PECI_GetNumberOfActiveDisplays(pHwMgr->pPECI, &numOfActiveDisplays); */ + clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ? + hwmgr->display_config.min_mem_set_clock : + cz_hwmgr->sys_info.nbp_memory_clock[1]; + + cgs_get_active_displays_info(hwmgr->device, &info); + num_of_active_displays = info.display_count; + if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState)) clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk; - else - clocks.memoryClock = 0; if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk) clocks.memoryClock = hwmgr->gfx_arbiter.mclk; @@ -1199,6 +1203,7 @@ static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr) printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n"); return result; } + hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = CZ_MAX_HARDWARE_POWERLEVELS; result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings)); if (result != 0) { @@ -1630,10 +1635,10 @@ static void cz_hw_print_display_cfg( & PWRMGT_SEPARATION_TIME_MASK) << PWRMGT_SEPARATION_TIME_SHIFT; - data|= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0) + data |= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0) << PWRMGT_DISABLE_CPU_CSTATES_SHIFT; - data|= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0) + data |= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0) << PWRMGT_DISABLE_CPU_PSTATES_SHIFT; PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n", @@ -1648,9 +1653,9 @@ static void cz_hw_print_display_cfg( } - static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, +static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time, bool cc6_disable, bool pstate_disable, bool pstate_switch_disable) - { +{ struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend); if (separation_time != @@ -1678,20 +1683,19 @@ static void cz_hw_print_display_cfg( return 0; } - static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, - struct amd_pp_dal_clock_info*info) +static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr, + struct amd_pp_simple_clock_info *info) { uint32_t i; - const struct phm_clock_voltage_dependency_table * table = + const struct phm_clock_voltage_dependency_table *table = hwmgr->dyn_state.vddc_dep_on_dal_pwrl; - const struct phm_clock_and_voltage_limits* limits = + const struct phm_clock_and_voltage_limits *limits = &hwmgr->dyn_state.max_clock_voltage_on_ac; info->engine_max_clock = limits->sclk; info->memory_max_clock = limits->mclk; for (i = table->count - 1; i > 0; i--) { - if (limits->vddc >= table->entries[i].v) { info->level = table->entries[i].clk; return 0; @@ -1700,6 +1704,158 @@ static void cz_hw_print_display_cfg( return -EINVAL; } +static int cz_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, int level) +{ + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + switch (type) { + case PP_SCLK: + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMin, + (1 << level)); + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SetSclkSoftMax, + (1 << level)); + break; + default: + break; + } + + return 0; +} + +static int cz_print_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf) +{ + struct phm_clock_voltage_dependency_table *sclk_table = + hwmgr->dyn_state.vddc_dependency_on_sclk; + int i, now, size = 0; + + switch (type) { + case PP_SCLK: + now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device, + CGS_IND_REG__SMC, + ixTARGET_AND_CURRENT_PROFILE_INDEX), + TARGET_AND_CURRENT_PROFILE_INDEX, + CURR_SCLK_INDEX); + + for (i = 0; i < sclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, sclk_table->entries[i].clk / 100, + (i == now) ? "*" : ""); + break; + default: + break; + } + return size; +} + +static int cz_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + PHM_PerformanceLevelDesignation designation, uint32_t index, + PHM_PerformanceLevel *level) +{ + const struct cz_power_state *ps; + struct cz_hwmgr *data; + uint32_t level_index; + uint32_t i; + + if (level == NULL || hwmgr == NULL || state == NULL) + return -EINVAL; + + data = (struct cz_hwmgr *)(hwmgr->backend); + ps = cast_const_PhwCzPowerState(state); + + level_index = index > ps->level - 1 ? ps->level - 1 : index; + + level->coreClock = ps->levels[level_index].engineClock; + + if (designation == PHM_PerformanceLevelDesignation_PowerContainment) { + for (i = 1; i < ps->level; i++) { + if (ps->levels[i].engineClock > data->dce_slow_sclk_threshold) { + level->coreClock = ps->levels[i].engineClock; + break; + } + } + } + + if (level_index == 0) + level->memory_clock = data->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1]; + else + level->memory_clock = data->sys_info.nbp_memory_clock[0]; + + level->vddc = (cz_convert_8Bit_index_to_voltage(hwmgr, ps->levels[level_index].vddcIndex) + 2) / 4; + level->nonLocalMemoryFreq = 0; + level->nonLocalMemoryWidth = 0; + + return 0; +} + +static int cz_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, + const struct pp_hw_power_state *state, struct pp_clock_info *clock_info) +{ + const struct cz_power_state *ps = cast_const_PhwCzPowerState(state); + + clock_info->min_eng_clk = ps->levels[0].engineClock / (1 << (ps->levels[0].ssDividerIndex)); + clock_info->max_eng_clk = ps->levels[ps->level - 1].engineClock / (1 << (ps->levels[ps->level - 1].ssDividerIndex)); + + return 0; +} + +static int cz_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, + struct amd_pp_clocks *clocks) +{ + struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend); + int i; + struct phm_clock_voltage_dependency_table *table; + + clocks->count = cz_get_max_sclk_level(hwmgr); + switch (type) { + case amd_pp_disp_clock: + for (i = 0; i < clocks->count; i++) + clocks->clock[i] = data->sys_info.display_clock[i]; + break; + case amd_pp_sys_clock: + table = hwmgr->dyn_state.vddc_dependency_on_sclk; + for (i = 0; i < clocks->count; i++) + clocks->clock[i] = table->entries[i].clk; + break; + case amd_pp_mem_clock: + clocks->count = CZ_NUM_NBPMEMORYCLOCK; + for (i = 0; i < clocks->count; i++) + clocks->clock[i] = data->sys_info.nbp_memory_clock[clocks->count - 1 - i]; + break; + default: + return -1; + } + + return 0; +} + +static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks) +{ + struct phm_clock_voltage_dependency_table *table = + hwmgr->dyn_state.vddc_dependency_on_sclk; + unsigned long level; + const struct phm_clock_and_voltage_limits *limits = + &hwmgr->dyn_state.max_clock_voltage_on_ac; + + if ((NULL == table) || (table->count <= 0) || (clocks == NULL)) + return -EINVAL; + + level = cz_get_max_sclk_level(hwmgr) - 1; + + if (level < table->count) + clocks->engine_max_clock = table->entries[level].clk; + else + clocks->engine_max_clock = table->entries[table->count - 1].clk; + + clocks->memory_max_clock = limits->mclk; + + return 0; +} + static const struct pp_hwmgr_func cz_hwmgr_funcs = { .backend_init = cz_hwmgr_backend_init, .backend_fini = cz_hwmgr_backend_fini, @@ -1718,7 +1874,13 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = { .print_current_perforce_level = cz_print_current_perforce_level, .set_cpu_power_state = cz_set_cpu_power_state, .store_cc6_data = cz_store_cc6_data, - .get_dal_power_level= cz_get_dal_power_level, + .force_clock_level = cz_force_clock_level, + .print_clock_levels = cz_print_clock_levels, + .get_dal_power_level = cz_get_dal_power_level, + .get_performance_level = cz_get_performance_level, + .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks, + .get_clock_by_type = cz_get_clock_by_type, + .get_max_high_clocks = cz_get_max_high_clocks, }; int cz_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c index 28031a7..5cca2ec 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c @@ -5073,6 +5073,125 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr) CG_FDO_CTRL2, FDO_PWM_MODE); } +static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table) +{ + struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); + + *table = (char *)&data->smc_state_table; + + return sizeof(struct SMU73_Discrete_DpmTable); +} + +static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size) +{ + struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); + + void *table = (void *)&data->smc_state_table; + + memcpy(table, buf, size); + + return 0; +} + +static int fiji_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, int level) +{ + struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); + + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + switch (type) { + case PP_SCLK: + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + (1 << level)); + break; + case PP_MCLK: + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + (1 << level)); + break; + case PP_PCIE: + if (!data->pcie_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_ForceLevel, + (1 << level)); + break; + default: + break; + } + + return 0; +} + +static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf) +{ + struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend); + struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); + struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); + struct fiji_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); + int i, now, size = 0; + uint32_t clock, pcie_speed; + + switch (type) { + case PP_SCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < sclk_table->count; i++) { + if (clock > sclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < sclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, sclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_MCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < mclk_table->count; i++) { + if (clock > mclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < mclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, mclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_PCIE: + pcie_speed = fiji_get_current_pcie_speed(hwmgr); + for (i = 0; i < pcie_table->count; i++) { + if (pcie_speed != pcie_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < pcie_table->count; i++) + size += sprintf(buf + size, "%d: %s %s\n", i, + (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" : + (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : + (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", + (i == now) ? "*" : ""); + break; + default: + break; + } + return size; +} + static const struct pp_hwmgr_func fiji_hwmgr_funcs = { .backend_init = &fiji_hwmgr_backend_init, .backend_fini = &tonga_hwmgr_backend_fini, @@ -5108,6 +5227,10 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = { .register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt, .set_fan_control_mode = fiji_set_fan_control_mode, .get_fan_control_mode = fiji_get_fan_control_mode, + .get_pp_table = fiji_get_pp_table, + .set_pp_table = fiji_set_pp_table, + .force_clock_level = fiji_force_clock_level, + .print_clock_levels = fiji_print_clock_levels, }; int fiji_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c index 0f2d5e4..be31bed 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c @@ -26,7 +26,7 @@ #include "power_state.h" #include "pp_acpi.h" #include "amd_acpi.h" -#include "amd_powerplay.h" +#include "pp_debug.h" #define PHM_FUNC_CHECK(hw) \ do { \ @@ -313,13 +313,12 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, } int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, - struct amd_pp_dal_clock_info *info) + struct amd_pp_simple_clock_info *info) { PHM_FUNC_CHECK(hwmgr); if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL) return -EINVAL; - return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info); } @@ -332,3 +331,91 @@ int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr) return 0; } + + +int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + PHM_PerformanceLevelDesignation designation, uint32_t index, + PHM_PerformanceLevel *level) +{ + PHM_FUNC_CHECK(hwmgr); + if (hwmgr->hwmgr_func->get_performance_level == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->get_performance_level(hwmgr, state, designation, index, level); + + +} + + +/** +* Gets Clock Info. +* +* @param pHwMgr the address of the powerplay hardware manager. +* @param pPowerState the address of the Power State structure. +* @param pClockInfo the address of PP_ClockInfo structure where the result will be returned. +* @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the back-end. +*/ +int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *pclock_info, + PHM_PerformanceLevelDesignation designation) +{ + int result; + PHM_PerformanceLevel performance_level; + + PHM_FUNC_CHECK(hwmgr); + + PP_ASSERT_WITH_CODE((NULL != state), "Invalid Input!", return -EINVAL); + PP_ASSERT_WITH_CODE((NULL != pclock_info), "Invalid Input!", return -EINVAL); + + result = phm_get_performance_level(hwmgr, state, PHM_PerformanceLevelDesignation_Activity, 0, &performance_level); + + PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve minimum clocks.", return result); + + + pclock_info->min_mem_clk = performance_level.memory_clock; + pclock_info->min_eng_clk = performance_level.coreClock; + pclock_info->min_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth; + + + result = phm_get_performance_level(hwmgr, state, designation, + (hwmgr->platform_descriptor.hardwareActivityPerformanceLevels - 1), &performance_level); + + PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve maximum clocks.", return result); + + pclock_info->max_mem_clk = performance_level.memory_clock; + pclock_info->max_eng_clk = performance_level.coreClock; + pclock_info->max_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth; + + return 0; +} + +int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info) +{ + PHM_FUNC_CHECK(hwmgr); + + if (hwmgr->hwmgr_func->get_current_shallow_sleep_clocks == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->get_current_shallow_sleep_clocks(hwmgr, state, clock_info); + +} + +int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks) +{ + PHM_FUNC_CHECK(hwmgr); + + if (hwmgr->hwmgr_func->get_clock_by_type == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->get_clock_by_type(hwmgr, type, clocks); + +} + +int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks) +{ + PHM_FUNC_CHECK(hwmgr); + + if (hwmgr->hwmgr_func->get_max_high_clocks == NULL) + return -EINVAL; + + return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks); +} diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h index b7429a5..b10df32 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h @@ -293,7 +293,7 @@ fInt GetScaledFraction(int X, int factor) } if (factor == 1) - return (ConvertToFraction(X)); + return ConvertToFraction(X); fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor)); @@ -371,7 +371,7 @@ fInt fDivide (fInt X, fInt Y) fZERO = ConvertToFraction(0); if (Equal(Y, fZERO)) - return fZERO; + return fZERO; longlongX = (int64_t)X.full; longlongY = (int64_t)Y.full; diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c index 44a9250..bc83fa3 100644 --- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c +++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c @@ -6018,6 +6018,125 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr) CG_FDO_CTRL2, FDO_PWM_MODE); } +static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table) +{ + struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); + + *table = (char *)&data->smc_state_table; + + return sizeof(struct SMU72_Discrete_DpmTable); +} + +static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size) +{ + struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); + + void *table = (void *)&data->smc_state_table; + + memcpy(table, buf, size); + + return 0; +} + +static int tonga_force_clock_level(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, int level) +{ + struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); + + if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) + return -EINVAL; + + switch (type) { + case PP_SCLK: + if (!data->sclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_SCLKDPM_SetEnabledMask, + (1 << level)); + break; + case PP_MCLK: + if (!data->mclk_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_MCLKDPM_SetEnabledMask, + (1 << level)); + break; + case PP_PCIE: + if (!data->pcie_dpm_key_disabled) + smum_send_msg_to_smc_with_parameter(hwmgr->smumgr, + PPSMC_MSG_PCIeDPM_ForceLevel, + (1 << level)); + break; + default: + break; + } + + return 0; +} + +static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr, + enum pp_clock_type type, char *buf) +{ + struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend); + struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table); + struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table); + struct tonga_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table); + int i, now, size = 0; + uint32_t clock, pcie_speed; + + switch (type) { + case PP_SCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < sclk_table->count; i++) { + if (clock > sclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < sclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, sclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_MCLK: + smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency); + clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0); + + for (i = 0; i < mclk_table->count; i++) { + if (clock > mclk_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < mclk_table->count; i++) + size += sprintf(buf + size, "%d: %uMhz %s\n", + i, mclk_table->dpm_levels[i].value / 100, + (i == now) ? "*" : ""); + break; + case PP_PCIE: + pcie_speed = tonga_get_current_pcie_speed(hwmgr); + for (i = 0; i < pcie_table->count; i++) { + if (pcie_speed != pcie_table->dpm_levels[i].value) + continue; + break; + } + now = i; + + for (i = 0; i < pcie_table->count; i++) + size += sprintf(buf + size, "%d: %s %s\n", i, + (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" : + (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" : + (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "", + (i == now) ? "*" : ""); + break; + default: + break; + } + return size; +} + static const struct pp_hwmgr_func tonga_hwmgr_funcs = { .backend_init = &tonga_hwmgr_backend_init, .backend_fini = &tonga_hwmgr_backend_fini, @@ -6055,6 +6174,10 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = { .check_states_equal = tonga_check_states_equal, .set_fan_control_mode = tonga_set_fan_control_mode, .get_fan_control_mode = tonga_get_fan_control_mode, + .get_pp_table = tonga_get_pp_table, + .set_pp_table = tonga_set_pp_table, + .force_clock_level = tonga_force_clock_level, + .print_clock_levels = tonga_print_clock_levels, }; int tonga_hwmgr_init(struct pp_hwmgr *hwmgr) diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h index e61a3e6..7255f7d 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h +++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h @@ -29,6 +29,7 @@ #include "amd_shared.h" #include "cgs_common.h" + enum amd_pp_event { AMD_PP_EVENT_INITIALIZE = 0, AMD_PP_EVENT_UNINITIALIZE, @@ -123,6 +124,7 @@ enum amd_dpm_forced_level { AMD_DPM_FORCED_LEVEL_AUTO = 0, AMD_DPM_FORCED_LEVEL_LOW = 1, AMD_DPM_FORCED_LEVEL_HIGH = 2, + AMD_DPM_FORCED_LEVEL_MANUAL = 3, }; struct amd_pp_init { @@ -212,12 +214,55 @@ struct amd_pp_display_configuration { uint32_t dce_tolerable_mclk_in_active_latency; }; -struct amd_pp_dal_clock_info { +struct amd_pp_simple_clock_info { uint32_t engine_max_clock; uint32_t memory_max_clock; uint32_t level; }; +enum PP_DAL_POWERLEVEL { + PP_DAL_POWERLEVEL_INVALID = 0, + PP_DAL_POWERLEVEL_ULTRALOW, + PP_DAL_POWERLEVEL_LOW, + PP_DAL_POWERLEVEL_NOMINAL, + PP_DAL_POWERLEVEL_PERFORMANCE, + + PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW, + PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW, + PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL, + PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE, + PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1, + PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1, + PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1, + PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1, +}; + +struct amd_pp_clock_info { + uint32_t min_engine_clock; + uint32_t max_engine_clock; + uint32_t min_memory_clock; + uint32_t max_memory_clock; + uint32_t min_bus_bandwidth; + uint32_t max_bus_bandwidth; + uint32_t max_engine_clock_in_sr; + uint32_t min_engine_clock_in_sr; + enum PP_DAL_POWERLEVEL max_clocks_state; +}; + +enum amd_pp_clock_type { + amd_pp_disp_clock = 1, + amd_pp_sys_clock, + amd_pp_mem_clock +}; + +#define MAX_NUM_CLOCKS 16 + +struct amd_pp_clocks { + uint32_t count; + uint32_t clock[MAX_NUM_CLOCKS]; +}; + + enum { PP_GROUP_UNKNOWN = 0, PP_GROUP_GFX = 1, @@ -225,6 +270,17 @@ enum { PP_GROUP_MAX }; +enum pp_clock_type { + PP_SCLK, + PP_MCLK, + PP_PCIE, +}; + +struct pp_states_info { + uint32_t nums; + uint32_t states[16]; +}; + #define PP_GROUP_MASK 0xF0000000 #define PP_GROUP_SHIFT 28 @@ -278,6 +334,11 @@ struct amd_powerplay_funcs { int (*get_fan_control_mode)(void *handle); int (*set_fan_speed_percent)(void *handle, uint32_t percent); int (*get_fan_speed_percent)(void *handle, uint32_t *speed); + int (*get_pp_num_states)(void *handle, struct pp_states_info *data); + int (*get_pp_table)(void *handle, char **table); + int (*set_pp_table)(void *handle, const char *buf, size_t size); + int (*force_clock_level)(void *handle, enum pp_clock_type type, int level); + int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf); }; struct amd_powerplay { @@ -288,12 +349,23 @@ struct amd_powerplay { int amd_powerplay_init(struct amd_pp_init *pp_init, struct amd_powerplay *amd_pp); + int amd_powerplay_fini(void *handle); -int amd_powerplay_display_configuration_change(void *handle, const void *input); +int amd_powerplay_display_configuration_change(void *handle, + const struct amd_pp_display_configuration *input); int amd_powerplay_get_display_power_level(void *handle, - struct amd_pp_dal_clock_info *output); + struct amd_pp_simple_clock_info *output); + +int amd_powerplay_get_current_clocks(void *handle, + struct amd_pp_clock_info *output); + +int amd_powerplay_get_clock_by_type(void *handle, + enum amd_pp_clock_type type, + struct amd_pp_clocks *clocks); +int amd_powerplay_get_display_mode_validation_clocks(void *handle, + struct amd_pp_simple_clock_info *output); #endif /* _AMD_POWERPLAY_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h index 91795ef..040d3f7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h @@ -31,6 +31,7 @@ struct pp_power_state; enum amd_dpm_forced_level; struct PP_TemperatureRange; + struct phm_fan_speed_info { uint32_t min_percent; uint32_t max_percent; @@ -290,6 +291,15 @@ struct PP_Clocks { uint32_t engineClockInSR; }; +struct pp_clock_info { + uint32_t min_mem_clk; + uint32_t max_mem_clk; + uint32_t min_eng_clk; + uint32_t max_eng_clk; + uint32_t min_bus_bandwidth; + uint32_t max_bus_bandwidth; +}; + struct phm_platform_descriptor { uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES]; uint32_t vbiosInterruptId; @@ -323,24 +333,6 @@ struct phm_clocks { uint32_t clock[MAX_NUM_CLOCKS]; }; -enum PP_DAL_POWERLEVEL { - PP_DAL_POWERLEVEL_INVALID = 0, - PP_DAL_POWERLEVEL_ULTRALOW, - PP_DAL_POWERLEVEL_LOW, - PP_DAL_POWERLEVEL_NOMINAL, - PP_DAL_POWERLEVEL_PERFORMANCE, - - PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW, - PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW, - PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL, - PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE, - PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1, - PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1, - PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1, - PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1, -}; - - extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr); extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate); extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate); @@ -375,11 +367,25 @@ extern int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr, const struct amd_pp_display_configuration *display_config); extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr, - struct amd_pp_dal_clock_info*info); + struct amd_pp_simple_clock_info *info); extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr); extern int phm_power_down_asic(struct pp_hwmgr *hwmgr); +extern int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + PHM_PerformanceLevelDesignation designation, uint32_t index, + PHM_PerformanceLevel *level); + +extern int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, + struct pp_clock_info *pclock_info, + PHM_PerformanceLevelDesignation designation); + +extern int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info); + +extern int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks); + +extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); + #endif /* _HARDWARE_MANAGER_H_ */ diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h index aeaa3db..928f5a7 100644 --- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h +++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h @@ -325,8 +325,18 @@ struct pp_hwmgr_func { bool cc6_disable, bool pstate_disable, bool pstate_switch_disable); int (*get_dal_power_level)(struct pp_hwmgr *hwmgr, - struct amd_pp_dal_clock_info *info); + struct amd_pp_simple_clock_info *info); + int (*get_performance_level)(struct pp_hwmgr *, const struct pp_hw_power_state *, + PHM_PerformanceLevelDesignation, uint32_t, PHM_PerformanceLevel *); + int (*get_current_shallow_sleep_clocks)(struct pp_hwmgr *hwmgr, + const struct pp_hw_power_state *state, struct pp_clock_info *clock_info); + int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks); + int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks); int (*power_off_asic)(struct pp_hwmgr *hwmgr); + int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table); + int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size); + int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, int level); + int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf); }; struct pp_table_func { diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 8b2becd..a5ff945 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -229,6 +229,14 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb) amd_sched_wakeup(entity->sched); } +static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb) +{ + struct amd_sched_entity *entity = + container_of(cb, struct amd_sched_entity, cb); + entity->dependency = NULL; + fence_put(f); +} + static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) { struct amd_gpu_scheduler *sched = entity->sched; @@ -251,7 +259,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) } /* Wait for fence to be scheduled */ - entity->cb.func = amd_sched_entity_wakeup; + entity->cb.func = amd_sched_entity_clear_dep; list_add_tail(&entity->cb.node, &s_fence->scheduled_cb); return true; } diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 4c30d8c..0600140 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4219,13 +4219,20 @@ int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } - r = radeon_fence_wait(ib.fence, false); - if (r) { + r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); radeon_scratch_free(rdev, scratch); radeon_ib_free(rdev, &ib); return r; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + radeon_scratch_free(rdev, scratch); + radeon_ib_free(rdev, &ib); + return -ETIMEDOUT; } + r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index d16f2ee..9c351dc 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -737,11 +737,16 @@ int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } - r = radeon_fence_wait(ib.fence, false); - if (r) { + r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); return r; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + return -ETIMEDOUT; } + r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); if (tmp == 0xDEADBEEF) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 5eae0a8..6e478a2 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -3732,11 +3732,17 @@ int r100_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; } - r = radeon_fence_wait(ib.fence, false); - if (r) { + r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); goto free_ib; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + r = -ETIMEDOUT; + goto free_ib; } + r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) { diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index cc2fdf0..ed12104 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -3381,11 +3381,17 @@ int r600_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); goto free_ib; } - r = radeon_fence_wait(ib.fence, false); - if (r) { + r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); goto free_ib; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + r = -ETIMEDOUT; + goto free_ib; } + r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) diff --git a/drivers/gpu/drm/radeon/r600_dma.c b/drivers/gpu/drm/radeon/r600_dma.c index d2dd29a..fb65e6f 100644 --- a/drivers/gpu/drm/radeon/r600_dma.c +++ b/drivers/gpu/drm/radeon/r600_dma.c @@ -368,11 +368,16 @@ int r600_dma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) DRM_ERROR("radeon: failed to schedule ib (%d).\n", r); return r; } - r = radeon_fence_wait(ib.fence, false); - if (r) { + r = radeon_fence_wait_timeout(ib.fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); return r; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + return -ETIMEDOUT; } + r = 0; for (i = 0; i < rdev->usec_timeout; i++) { tmp = le32_to_cpu(rdev->wb.wb[index/4]); if (tmp == 0xDEADBEEF) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 78a51b3..007be29 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -120,6 +120,7 @@ extern int radeon_mst; */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ #define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) +#define RADEON_USEC_IB_TEST_TIMEOUT 1000000 /* 1s */ /* RADEON_IB_POOL_SIZE must be a power of 2 */ #define RADEON_IB_POOL_SIZE 16 #define RADEON_DEBUGFS_MAX_COMPONENTS 32 @@ -382,6 +383,7 @@ void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); +long radeon_fence_wait_timeout(struct radeon_fence *fence, bool interruptible, long timeout); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next(struct radeon_device *rdev, int ring); int radeon_fence_wait_empty(struct radeon_device *rdev, int ring); diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 298ea1c..a4674bf 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -1686,6 +1686,9 @@ void radeon_modeset_fini(struct radeon_device *rdev) radeon_fbdev_fini(rdev); kfree(rdev->mode_info.bios_hardcoded_edid); + /* free i2c buses */ + radeon_i2c_fini(rdev); + if (rdev->mode_info.mode_config_initialized) { radeon_afmt_fini(rdev); drm_kms_helper_poll_fini(rdev->ddev); @@ -1693,8 +1696,6 @@ void radeon_modeset_fini(struct radeon_device *rdev) drm_mode_config_cleanup(rdev->ddev); rdev->mode_info.mode_config_initialized = false; } - /* free i2c buses */ - radeon_i2c_fini(rdev); } static bool is_hdtv_mode(const struct drm_display_mode *mode) diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 05815c4..7ef075a 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -527,7 +527,7 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, } /** - * radeon_fence_wait - wait for a fence to signal + * radeon_fence_wait_timeout - wait for a fence to signal with timeout * * @fence: radeon fence object * @intr: use interruptible sleep @@ -535,12 +535,15 @@ static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev, * Wait for the requested fence to signal (all asics). * @intr selects whether to use interruptable (true) or non-interruptable * (false) sleep when waiting for the fence. - * Returns 0 if the fence has passed, error for all other cases. + * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait + * Returns remaining time if the sequence number has passed, 0 when + * the wait timeout, or an error for all other cases. */ -int radeon_fence_wait(struct radeon_fence *fence, bool intr) +long radeon_fence_wait_timeout(struct radeon_fence *fence, bool intr, long timeout) { uint64_t seq[RADEON_NUM_RINGS] = {}; long r; + int r_sig; /* * This function should not be called on !radeon fences. @@ -552,15 +555,36 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr) return fence_wait(&fence->base, intr); seq[fence->ring] = fence->seq; - r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT); - if (r < 0) { + r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, timeout); + if (r <= 0) { return r; } - r = fence_signal(&fence->base); - if (!r) + r_sig = fence_signal(&fence->base); + if (!r_sig) FENCE_TRACE(&fence->base, "signaled from fence_wait\n"); - return 0; + return r; +} + +/** + * radeon_fence_wait - wait for a fence to signal + * + * @fence: radeon fence object + * @intr: use interruptible sleep + * + * Wait for the requested fence to signal (all asics). + * @intr selects whether to use interruptable (true) or non-interruptable + * (false) sleep when waiting for the fence. + * Returns 0 if the fence has passed, error for all other cases. + */ +int radeon_fence_wait(struct radeon_fence *fence, bool intr) +{ + long r = radeon_fence_wait_timeout(fence, intr, MAX_SCHEDULE_TIMEOUT); + if (r > 0) { + return 0; + } else { + return r; + } } /** diff --git a/drivers/gpu/drm/radeon/radeon_vce.c b/drivers/gpu/drm/radeon/radeon_vce.c index 7eb1ae7..566a1a0 100644 --- a/drivers/gpu/drm/radeon/radeon_vce.c +++ b/drivers/gpu/drm/radeon/radeon_vce.c @@ -810,11 +810,16 @@ int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) goto error; } - r = radeon_fence_wait(fence, false); - if (r) { + r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + r = -ETIMEDOUT; } else { - DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + DRM_INFO("ib test on ring %d succeeded\n", ring->idx); + r = 0; } error: radeon_fence_unref(&fence); diff --git a/drivers/gpu/drm/radeon/uvd_v1_0.c b/drivers/gpu/drm/radeon/uvd_v1_0.c index c6b1cbc..12ddcfa 100644 --- a/drivers/gpu/drm/radeon/uvd_v1_0.c +++ b/drivers/gpu/drm/radeon/uvd_v1_0.c @@ -522,11 +522,17 @@ int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring) goto error; } - r = radeon_fence_wait(fence, false); - if (r) { + r = radeon_fence_wait_timeout(fence, false, usecs_to_jiffies( + RADEON_USEC_IB_TEST_TIMEOUT)); + if (r < 0) { DRM_ERROR("radeon: fence wait failed (%d).\n", r); goto error; + } else if (r == 0) { + DRM_ERROR("radeon: fence wait timed out.\n"); + r = -ETIMEDOUT; + goto error; } + r = 0; DRM_INFO("ib test on ring %d succeeded\n", ring->idx); error: radeon_fence_unref(&fence); |