summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2015-07-07 17:24:49 +0200
committerAlex Deucher <alexander.deucher@amd.com>2015-08-17 16:50:14 -0400
commit21c16bf634e62cf9673946f509b469e7f0953ecf (patch)
tree91595b4cd4064a7867bbdd32cb37fc090c7ec37d
parent91e1a5207edec9e4f888e44478a9a254186e0ba8 (diff)
downloadop-kernel-dev-21c16bf634e62cf9673946f509b469e7f0953ecf.zip
op-kernel-dev-21c16bf634e62cf9673946f509b469e7f0953ecf.tar.gz
drm/amdgpu: add user fence context map v2
This is a prerequisite for the GPU scheduler to make the order of submission independent from the order of execution. v2: properly implement the locking Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Jammy Zhou <Jammy.Zhou@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c4
4 files changed, 110 insertions, 30 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 70e783a..0220d98 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -415,6 +415,8 @@ struct amdgpu_user_fence {
struct amdgpu_bo *bo;
/* write-back address offset to bo start */
uint32_t offset;
+ /* resulting sequence number */
+ uint64_t sequence;
};
int amdgpu_fence_driver_init(struct amdgpu_device *adev);
@@ -985,9 +987,18 @@ struct amdgpu_vm_manager {
* context related structures
*/
+#define AMDGPU_CTX_MAX_CS_PENDING 16
+
+struct amdgpu_ctx_ring {
+ uint64_t sequence;
+ struct fence *fences[AMDGPU_CTX_MAX_CS_PENDING];
+};
+
struct amdgpu_ctx {
struct kref refcount;
unsigned reset_counter;
+ spinlock_t ring_lock;
+ struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
};
struct amdgpu_ctx_mgr {
@@ -1007,6 +1018,11 @@ void amdgpu_ctx_fini(struct amdgpu_fpriv *fpriv);
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
+ struct fence *fence);
+struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+ struct amdgpu_ring *ring, uint64_t seq);
+
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 53e6a10f..cef8360 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -698,9 +698,9 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
sizeof(struct drm_amdgpu_cs_chunk_dep);
for (j = 0; j < num_deps; ++j) {
- struct amdgpu_fence *fence;
struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx;
+ struct fence *fence;
r = amdgpu_cs_get_ring(adev, deps[j].ip_type,
deps[j].ip_instance,
@@ -712,20 +712,20 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_fence_recreate(ring, p->filp,
- deps[j].handle,
- &fence);
- if (r) {
+ fence = amdgpu_ctx_get_fence(ctx, ring,
+ deps[j].handle);
+ if (IS_ERR(fence)) {
+ r = PTR_ERR(fence);
amdgpu_ctx_put(ctx);
return r;
- }
-
- r = amdgpu_sync_fence(adev, &ib->sync, &fence->base);
- amdgpu_fence_unref(&fence);
- amdgpu_ctx_put(ctx);
- if (r)
- return r;
+ } else if (fence) {
+ r = amdgpu_sync_fence(adev, &ib->sync, fence);
+ fence_put(fence);
+ amdgpu_ctx_put(ctx);
+ if (r)
+ return r;
+ }
}
}
@@ -773,8 +773,11 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_ib_fill(adev, &parser);
}
- if (!r)
+ if (!r) {
r = amdgpu_cs_dependencies(adev, &parser);
+ if (r)
+ DRM_ERROR("Failed in the dependencies handling %d!\n", r);
+ }
if (r) {
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
@@ -791,7 +794,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
goto out;
}
- cs->out.handle = parser.ibs[parser.num_ibs - 1].fence->seq;
+ cs->out.handle = parser.uf.sequence;
out:
amdgpu_cs_parser_fini(&parser, r, true);
up_read(&adev->exclusive_lock);
@@ -814,30 +817,31 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
- struct amdgpu_fence *fence = NULL;
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
+ struct fence *fence;
long r;
+ r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
+ wait->in.ring, &ring);
+ if (r)
+ return r;
+
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
- r = amdgpu_cs_get_ring(adev, wait->in.ip_type, wait->in.ip_instance,
- wait->in.ring, &ring);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
+ fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle);
+ if (IS_ERR(fence))
+ r = PTR_ERR(fence);
- r = amdgpu_fence_recreate(ring, filp, wait->in.handle, &fence);
- if (r) {
- amdgpu_ctx_put(ctx);
- return r;
- }
+ else if (fence) {
+ r = fence_wait_timeout(fence, true, timeout);
+ fence_put(fence);
+
+ } else
+ r = 1;
- r = fence_wait_timeout(&fence->base, true, timeout);
- amdgpu_fence_unref(&fence);
amdgpu_ctx_put(ctx);
if (r < 0)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index e63cfb7..c23bfd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -28,17 +28,22 @@
static void amdgpu_ctx_do_release(struct kref *ref)
{
struct amdgpu_ctx *ctx;
+ unsigned i, j;
ctx = container_of(ref, struct amdgpu_ctx, refcount);
+
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+ for (j = 0; j < AMDGPU_CTX_MAX_CS_PENDING; ++j)
+ fence_put(ctx->rings[i].fences[j]);
kfree(ctx);
}
int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
uint32_t *id)
{
- int r;
struct amdgpu_ctx *ctx;
struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
+ int i, r;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -55,6 +60,9 @@ int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv,
memset(ctx, 0, sizeof(*ctx));
kref_init(&ctx->refcount);
+ spin_lock_init(&ctx->ring_lock);
+ for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+ ctx->rings[i].sequence = 1;
mutex_unlock(&mgr->lock);
return 0;
@@ -177,3 +185,53 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
kref_put(&ctx->refcount, amdgpu_ctx_do_release);
return 0;
}
+
+uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
+ struct fence *fence)
+{
+ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+ uint64_t seq = cring->sequence;
+ unsigned idx = seq % AMDGPU_CTX_MAX_CS_PENDING;
+ struct fence *other = cring->fences[idx];
+
+ if (other) {
+ signed long r;
+ r = fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ DRM_ERROR("Error (%ld) waiting for fence!\n", r);
+ }
+
+ fence_get(fence);
+
+ spin_lock(&ctx->ring_lock);
+ cring->fences[idx] = fence;
+ cring->sequence++;
+ spin_unlock(&ctx->ring_lock);
+
+ fence_put(other);
+
+ return seq;
+}
+
+struct fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
+ struct amdgpu_ring *ring, uint64_t seq)
+{
+ struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
+ struct fence *fence;
+
+ spin_lock(&ctx->ring_lock);
+ if (seq >= cring->sequence) {
+ spin_unlock(&ctx->ring_lock);
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (seq < cring->sequence - AMDGPU_CTX_MAX_CS_PENDING) {
+ spin_unlock(&ctx->ring_lock);
+ return NULL;
+ }
+
+ fence = fence_get(cring->fences[seq % AMDGPU_CTX_MAX_CS_PENDING]);
+ spin_unlock(&ctx->ring_lock);
+
+ return fence;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2722815..95d5334 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -219,8 +219,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
/* wrap the last IB with fence */
if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
+ ib->user->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
+ &ib->fence->base);
addr += ib->user->offset;
- amdgpu_ring_emit_fence(ring, addr, ib->fence->seq,
+ amdgpu_ring_emit_fence(ring, addr, ib->user->sequence,
AMDGPU_FENCE_FLAG_64BIT);
}
OpenPOWER on IntegriCloud