summaryrefslogtreecommitdiffstats
path: root/drivers/gpu
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2016-08-12 12:59:59 +0200
committerAlex Deucher <alexander.deucher@amd.com>2016-08-16 10:44:34 -0400
commit96105e5375892f63cc56fa707a1db0d74abc764d (patch)
treea217f3deefda61d3f50194afc41c9ccc1ab43e0a /drivers/gpu
parentdc157c6daa317c75058def8f9753909e5224cbec (diff)
downloadop-kernel-dev-96105e5375892f63cc56fa707a1db0d74abc764d.zip
op-kernel-dev-96105e5375892f63cc56fa707a1db0d74abc764d.tar.gz
drm/amdgpu: stop splitting PTE commands into smaller ones
It doesn't make much sense to create bigger commands first which we then need to split into smaller one again. Just make sure the commands we create aren't to big in the first place. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Edward O'Callaghan <funfunctor@folklore1984.net> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c71
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c71
5 files changed, 73 insertions, 149 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 387b497..1a7e05d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -833,6 +833,9 @@ struct amdgpu_ring {
/* maximum number of VMIDs */
#define AMDGPU_NUM_VM 16
+/* Maximum number of PTEs the hardware can write with one command */
+#define AMDGPU_VM_MAX_UPDATE_SIZE 0x3FFFF
+
/* number of entries in page table */
#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1292501..673c258 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -639,7 +639,8 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
pde = pd_addr + pt_idx * 8;
if (((last_pde + 8 * count) != pde) ||
- ((last_pt + incr * count) != pt)) {
+ ((last_pt + incr * count) != pt) ||
+ (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
if (count) {
amdgpu_vm_update_pages(&params, last_pde,
@@ -743,7 +744,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
next_pe_start = amdgpu_bo_gpu_offset(pt);
next_pe_start += (addr & mask) * 8;
- if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
+ if ((cur_pe_start + 8 * cur_nptes) == next_pe_start &&
+ ((cur_nptes + nptes) <= AMDGPU_VM_MAX_UPDATE_SIZE)) {
/* The next ptb is consecutive to current ptb.
* Don't call amdgpu_vm_update_pages now.
* Will update two ptbs together in future.
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index e5e44f4..e71cd12 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -694,24 +694,16 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
- SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY,
+ SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@@ -755,40 +747,21 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
-static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index af0f0d2..e822296 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -749,24 +749,16 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@@ -810,40 +802,21 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
-static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 88faaee..bee4978 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -976,24 +976,16 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
uint64_t pe, uint64_t src,
unsigned count)
{
- while (count) {
- unsigned bytes = count * 8;
- if (bytes > 0x1FFFF8)
- bytes = 0x1FFFF8;
-
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
- SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
- ib->ptr[ib->length_dw++] = bytes;
- ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
- ib->ptr[ib->length_dw++] = lower_32_bits(src);
- ib->ptr[ib->length_dw++] = upper_32_bits(src);
- ib->ptr[ib->length_dw++] = lower_32_bits(pe);
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
-
- pe += bytes;
- src += bytes;
- count -= bytes / 8;
- }
+ unsigned bytes = count * 8;
+
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
+ SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
+ ib->ptr[ib->length_dw++] = bytes;
+ ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
+ ib->ptr[ib->length_dw++] = lower_32_bits(src);
+ ib->ptr[ib->length_dw++] = upper_32_bits(src);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe);
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
}
/**
@@ -1037,40 +1029,21 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
*
* Update the page tables using sDMA (CIK).
*/
-static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
- uint64_t pe,
+static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
- uint64_t value;
- unsigned ndw;
-
- while (count) {
- ndw = count;
- if (ndw > 0x7FFFF)
- ndw = 0x7FFFF;
-
- if (flags & AMDGPU_PTE_VALID)
- value = addr;
- else
- value = 0;
-
- /* for physically contiguous pages (vram) */
- ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
- ib->ptr[ib->length_dw++] = pe; /* dst addr */
- ib->ptr[ib->length_dw++] = upper_32_bits(pe);
- ib->ptr[ib->length_dw++] = flags; /* mask */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = value; /* value */
- ib->ptr[ib->length_dw++] = upper_32_bits(value);
- ib->ptr[ib->length_dw++] = incr; /* increment size */
- ib->ptr[ib->length_dw++] = 0;
- ib->ptr[ib->length_dw++] = ndw; /* number of entries */
-
- pe += ndw * 8;
- addr += ndw * incr;
- count -= ndw;
- }
+ /* for physically contiguous pages (vram) */
+ ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
+ ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
+ ib->ptr[ib->length_dw++] = upper_32_bits(pe);
+ ib->ptr[ib->length_dw++] = flags; /* mask */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
+ ib->ptr[ib->length_dw++] = upper_32_bits(addr);
+ ib->ptr[ib->length_dw++] = incr; /* increment size */
+ ib->ptr[ib->length_dw++] = 0;
+ ib->ptr[ib->length_dw++] = count; /* number of entries */
}
/**
OpenPOWER on IntegriCloud