From a22fc85495575d81c36db24b12f66fd314b7ced1 Mon Sep 17 00:00:00 2001 From: Ben Goz Date: Mon, 12 Jan 2015 14:28:46 +0200 Subject: drm/amdkfd: Add initial VI support for DQM This patch starts to add support for the VI APU in the DQM module. Because most (more than 90%) of the DQM code is shared among AMD's APUs, we chose a design that performs most/all the code in the shared DQM file (kfd_device_queue_manager.c). If there is H/W specific code to be executed, than it is written in an asic-specific extension function for that H/W. That asic-specific extension function is called from the shared function at the appropriate time. This requires that for every asic-specific extension function that is implemented in a specific ASIC, there will be an equivalent implementation in ALL ASICs, even if those implementations are just stubs. That way we achieve: - Maintainability: by having one copy of most of the code, we only need to fix bugs at one locations - Readability: very clear what is the shared code and what is done per ASIC - Extensibility: very easy to add new H/W specific files/functions Signed-off-by: Ben Goz Signed-off-by: Oded Gabbay Reviewed-by: Alex Deucher --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 113 +++++---------------- 1 file changed, 28 insertions(+), 85 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 12c8448..b201624 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -61,7 +61,7 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) return KFD_MQD_TYPE_CP; } -static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) +inline unsigned int get_pipes_num(struct device_queue_manager *dqm) { BUG_ON(!dqm || !dqm->dev); return dqm->dev->shared_resources.compute_pipe_count; @@ -78,7 +78,7 @@ static inline unsigned int get_pipes_num_cpsch(void) return PIPE_PER_ME_CP_SCHEDULING; } -static inline unsigned int +inline unsigned int get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) { uint32_t nybble; @@ -88,7 +88,7 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) return nybble; } -static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) +inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { unsigned int shared_base; @@ -97,41 +97,7 @@ static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) return shared_base; } -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); -static void init_process_memory(struct device_queue_manager *dqm, - struct qcm_process_device *qpd) -{ - struct kfd_process_device *pdd; - unsigned int temp; - - BUG_ON(!dqm || !qpd); - - pdd = qpd_to_pdd(qpd); - - /* check if sh_mem_config register already configured */ - if (qpd->sh_mem_config == 0) { - qpd->sh_mem_config = - ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | - DEFAULT_MTYPE(MTYPE_NONCACHED) | - APE1_MTYPE(MTYPE_NONCACHED); - qpd->sh_mem_ape1_limit = 0; - qpd->sh_mem_ape1_base = 0; - } - - if (qpd->pqm->process->is_32bit_user_mode) { - temp = get_sh_mem_bases_32(pdd); - qpd->sh_mem_bases = SHARED_BASE(temp); - qpd->sh_mem_config |= PTR32; - } else { - temp = get_sh_mem_bases_nybble_64(pdd); - qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); - } - - pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", - qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); -} - -static void program_sh_mem_settings(struct device_queue_manager *dqm, +void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, @@ -391,6 +357,7 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { struct device_process_node *n; + int retval; BUG_ON(!dqm || !qpd); @@ -405,12 +372,13 @@ static int register_process_nocpsch(struct device_queue_manager *dqm, mutex_lock(&dqm->lock); list_add(&n->list, &dqm->queues); - init_process_memory(dqm, qpd); + retval = dqm->ops_asic_specific.register_process(dqm, qpd); + dqm->processes_count++; mutex_unlock(&dqm->lock); - return 0; + return retval; } static int unregister_process_nocpsch(struct device_queue_manager *dqm, @@ -455,34 +423,7 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, vmid); } -static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) -{ - /* In 64-bit mode, we can only control the top 3 bits of the LDS, - * scratch and GPUVM apertures. - * The hardware fills in the remaining 59 bits according to the - * following pattern: - * LDS: X0000000'00000000 - X0000001'00000000 (4GB) - * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) - * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) - * - * (where X/Y is the configurable nybble with the low-bit 0) - * - * LDS and scratch will have the same top nybble programmed in the - * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. - * GPUVM can have a different top nybble programmed in the - * top 3 bits of SH_MEM_BASES.SHARED_BASE. - * We don't bother to support different top nybbles - * for LDS/Scratch and GPUVM. - */ - - BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || - top_address_nybble == 0); - - return PRIVATE_BASE(top_address_nybble << 12) | - SHARED_BASE(top_address_nybble << 12); -} - -static int init_pipelines(struct device_queue_manager *dqm, +int init_pipelines(struct device_queue_manager *dqm, unsigned int pipes_num, unsigned int first_pipe) { void *hpdptr; @@ -715,7 +656,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; dqm->active_runlist = false; - retval = init_pipelines(dqm, get_pipes_num(dqm), 0); + retval = dqm->ops_asic_specific.initialize(dqm); if (retval != 0) goto fail_init_pipelines; @@ -1035,8 +976,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, void __user *alternate_aperture_base, uint64_t alternate_aperture_size) { - uint32_t default_mtype; - uint32_t ape1_mtype; + bool retval; pr_debug("kfd: In func %s\n", __func__); @@ -1073,18 +1013,13 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit = limit >> 16; } - default_mtype = (default_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - ape1_mtype = (alternate_policy == cache_policy_coherent) ? - MTYPE_NONCACHED : - MTYPE_CACHED; - - qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) - | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) - | DEFAULT_MTYPE(default_mtype) - | APE1_MTYPE(ape1_mtype); + retval = dqm->ops_asic_specific.set_cache_memory_policy( + dqm, + qpd, + default_policy, + alternate_policy, + alternate_aperture_base, + alternate_aperture_size); if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) program_sh_mem_settings(dqm, qpd); @@ -1094,7 +1029,7 @@ static bool set_cache_memory_policy(struct device_queue_manager *dqm, qpd->sh_mem_ape1_limit); mutex_unlock(&dqm->lock); - return true; + return retval; out: mutex_unlock(&dqm->lock); @@ -1107,6 +1042,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) BUG_ON(!dev); + pr_debug("kfd: loading device queue manager\n"); + dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); if (!dqm) return NULL; @@ -1149,6 +1086,13 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) break; } + switch (dev->device_info->asic_family) { + case CHIP_CARRIZO: + device_queue_manager_init_vi(&dqm->ops_asic_specific); + case CHIP_KAVERI: + device_queue_manager_init_cik(&dqm->ops_asic_specific); + } + if (dqm->ops.initialize(dqm) != 0) { kfree(dqm); return NULL; @@ -1164,4 +1108,3 @@ void device_queue_manager_uninit(struct device_queue_manager *dqm) dqm->ops.uninitialize(dqm); kfree(dqm); } - -- cgit v1.1