summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2016-08-11 14:06:54 +0200
committerAlex Deucher <alexander.deucher@amd.com>2016-08-16 10:43:44 -0400
commitb0456f93063ec8629cfeee6d03758f92793d96cb (patch)
tree59f9cef61f1d68cf3bb6b4baa70b47ba610d4b6a /drivers/gpu
parent478feaf6cc420e66c071c0a743b334abfe8f18c9 (diff)
downloadop-kernel-dev-b0456f93063ec8629cfeee6d03758f92793d96cb.zip
op-kernel-dev-b0456f93063ec8629cfeee6d03758f92793d96cb.tar.gz
drm/amdgpu: write PTEs directly into the IB.
Write the PTEs at the end of the IB instead of directly into the SDMA commands. This can save quite some CPU cycles building the entries. This doesn't change the DW estimation because PTEs where embedded into the IB before as well. It just moves them to the end of the IB. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c26
1 files changed, 21 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1a474fa1f..e5095b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -911,15 +911,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
/* padding, etc. */
ndw = 64;
- if (params.src) {
+ if (src) {
/* only copy commands needed */
ndw += ncmds * 7;
- } else if (params.pages_addr) {
- /* header for write data commands */
- ndw += ncmds * 4;
+ } else if (pages_addr) {
+ /* copy commands needed */
+ ndw += ncmds * 7;
- /* body of write data command */
+ /* and also PTEs */
ndw += nptes * 2;
} else {
@@ -936,6 +936,22 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
params.ib = &job->ibs[0];
+ if (!src && pages_addr) {
+ uint64_t *pte;
+ unsigned i;
+
+ /* Put the PTEs at the end of the IB. */
+ i = ndw - nptes * 2;
+ pte= (uint64_t *)&(job->ibs->ptr[i]);
+ params.src = job->ibs->gpu_addr + i * 4;
+
+ for (i = 0; i < nptes; ++i) {
+ pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i *
+ AMDGPU_GPU_PAGE_SIZE);
+ pte[i] |= flags;
+ }
+ }
+
r = amdgpu_sync_fence(adev, &job->sync, exclusive);
if (r)
goto error_free;
OpenPOWER on IntegriCloud