diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 503 |
1 files changed, 269 insertions, 234 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b882e81..9392e50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -25,52 +25,12 @@ * Jerome Glisse <glisse@freedesktop.org> */ #include <linux/list_sort.h> +#include <linux/pagemap.h> #include <drm/drmP.h> #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_trace.h" -#define AMDGPU_CS_MAX_PRIORITY 32u -#define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1) - -/* This is based on the bucket sort with O(n) time complexity. - * An item with priority "i" is added to bucket[i]. The lists are then - * concatenated in descending order. - */ -struct amdgpu_cs_buckets { - struct list_head bucket[AMDGPU_CS_NUM_BUCKETS]; -}; - -static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b) -{ - unsigned i; - - for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) - INIT_LIST_HEAD(&b->bucket[i]); -} - -static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b, - struct list_head *item, unsigned priority) -{ - /* Since buffers which appear sooner in the relocation list are - * likely to be used more often than buffers which appear later - * in the list, the sort mustn't change the ordering of buffers - * with the same priority, i.e. it must be stable. - */ - list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]); -} - -static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b, - struct list_head *out_list) -{ - unsigned i; - - /* Connect the sorted buckets in the output list. */ - for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) { - list_splice(&b->bucket[i], out_list); - } -} - int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, u32 ip_instance, u32 ring, struct amdgpu_ring **out_ring) @@ -128,6 +88,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type, } static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, + struct amdgpu_user_fence *uf, struct drm_amdgpu_cs_chunk_fence *fence_data) { struct drm_gem_object *gobj; @@ -139,20 +100,19 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, if (gobj == NULL) return -EINVAL; - p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); - p->uf.offset = fence_data->offset; + uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + uf->offset = fence_data->offset; - if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) { + if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) { drm_gem_object_unreference_unlocked(gobj); return -EINVAL; } - p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo); - p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT; - p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + p->uf_entry.robj = amdgpu_bo_ref(uf->bo); p->uf_entry.priority = 0; p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; p->uf_entry.tv.shared = true; + p->uf_entry.user_pages = NULL; drm_gem_object_unreference_unlocked(gobj); return 0; @@ -160,11 +120,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) { + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; union drm_amdgpu_cs *cs = data; uint64_t *chunk_array_user; uint64_t *chunk_array; - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - unsigned size; + struct amdgpu_user_fence uf = {}; + unsigned size, num_ibs = 0; int i; int ret; @@ -181,15 +142,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } - p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); - /* get chunks */ - INIT_LIST_HEAD(&p->validated); chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_bo_list; + goto put_ctx; } p->nchunks = cs->in.num_chunks; @@ -197,7 +155,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_bo_list; + goto put_ctx; } for (i = 0; i < p->nchunks; i++) { @@ -217,7 +175,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) size = p->chunks[i].length_dw; cdata = (void __user *)(unsigned long)user_chunk.chunk_data; - p->chunks[i].user_ptr = cdata; p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); if (p->chunks[i].kdata == NULL) { @@ -233,7 +190,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) switch (p->chunks[i].chunk_id) { case AMDGPU_CHUNK_ID_IB: - p->num_ibs++; + ++num_ibs; break; case AMDGPU_CHUNK_ID_FENCE: @@ -243,7 +200,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_partial_kdata; } - ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata); + ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata); if (ret) goto free_partial_kdata; @@ -258,12 +215,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } } - - p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); - if (!p->ibs) { - ret = -ENOMEM; + ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job); + if (ret) goto free_all_kdata; - } + + p->job->uf = uf; kfree(chunk_array); return 0; @@ -274,9 +230,7 @@ free_partial_kdata: for (; i >= 0; i--) drm_free_large(p->chunks[i].kdata); kfree(p->chunks); -put_bo_list: - if (p->bo_list) - amdgpu_bo_list_put(p->bo_list); +put_ctx: amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); @@ -336,96 +290,198 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) return max(bytes_moved_threshold, 1024*1024ull); } -int amdgpu_cs_list_validate(struct amdgpu_device *adev, - struct amdgpu_vm *vm, +int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, struct list_head *validated) { struct amdgpu_bo_list_entry *lobj; - struct amdgpu_bo *bo; - u64 bytes_moved = 0, initial_bytes_moved; - u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev); + u64 initial_bytes_moved; int r; list_for_each_entry(lobj, validated, tv.head) { - bo = lobj->robj; - if (!bo->pin_count) { - u32 domain = lobj->prefered_domains; - u32 current_domain = - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - - /* Check if this buffer will be moved and don't move it - * if we have moved too many buffers for this IB already. - * - * Note that this allows moving at least one buffer of - * any size, because it doesn't take the current "bo" - * into account. We don't want to disallow buffer moves - * completely. - */ - if ((lobj->allowed_domains & current_domain) != 0 && - (domain & current_domain) == 0 && /* will be moved */ - bytes_moved > bytes_moved_threshold) { - /* don't move it */ - domain = current_domain; - } + struct amdgpu_bo *bo = lobj->robj; + bool binding_userptr = false; + struct mm_struct *usermm; + uint32_t domain; + + usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm); + if (usermm && usermm != current->mm) + return -EPERM; + + /* Check if we have user pages and nobody bound the BO already */ + if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) { + size_t size = sizeof(struct page *); + + size *= bo->tbo.ttm->num_pages; + memcpy(bo->tbo.ttm->pages, lobj->user_pages, size); + binding_userptr = true; + } - retry: - amdgpu_ttm_placement_from_domain(bo, domain); - initial_bytes_moved = atomic64_read(&adev->num_bytes_moved); - r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); - bytes_moved += atomic64_read(&adev->num_bytes_moved) - - initial_bytes_moved; - - if (unlikely(r)) { - if (r != -ERESTARTSYS && domain != lobj->allowed_domains) { - domain = lobj->allowed_domains; - goto retry; - } - return r; + if (bo->pin_count) + continue; + + /* Avoid moving this one if we have moved too many buffers + * for this IB already. + * + * Note that this allows moving at least one buffer of + * any size, because it doesn't take the current "bo" + * into account. We don't want to disallow buffer moves + * completely. + */ + if (p->bytes_moved <= p->bytes_moved_threshold) + domain = bo->prefered_domains; + else + domain = bo->allowed_domains; + + retry: + amdgpu_ttm_placement_from_domain(bo, domain); + initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved); + r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false); + p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) - + initial_bytes_moved; + + if (unlikely(r)) { + if (r != -ERESTARTSYS && domain != bo->allowed_domains) { + domain = bo->allowed_domains; + goto retry; } + return r; + } + + if (binding_userptr) { + drm_free_large(lobj->user_pages); + lobj->user_pages = NULL; } - lobj->bo_va = amdgpu_vm_bo_find(vm, bo); } return 0; } -static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) +static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_cs_buckets buckets; + struct amdgpu_bo_list_entry *e; struct list_head duplicates; bool need_mmap_lock = false; - int i, r; + unsigned i, tries = 10; + int r; - if (p->bo_list) { - need_mmap_lock = p->bo_list->has_userptr; - amdgpu_cs_buckets_init(&buckets); - for (i = 0; i < p->bo_list->num_entries; i++) - amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head, - p->bo_list->array[i].priority); + INIT_LIST_HEAD(&p->validated); - amdgpu_cs_buckets_get_list(&buckets, &p->validated); + p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle); + if (p->bo_list) { + need_mmap_lock = p->bo_list->first_userptr != + p->bo_list->num_entries; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); } INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf.bo) + if (p->job->uf.bo) list_add(&p->uf_entry.tv.head, &p->validated); if (need_mmap_lock) down_read(¤t->mm->mmap_sem); - r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates); - if (unlikely(r != 0)) - goto error_reserve; + while (1) { + struct list_head need_pages; + unsigned i; + + r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, + &duplicates); + if (unlikely(r != 0)) + goto error_free_pages; + + /* Without a BO list we don't have userptr BOs */ + if (!p->bo_list) + break; + + INIT_LIST_HEAD(&need_pages); + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + + e = &p->bo_list->array[i]; + + if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm, + &e->user_invalidated) && e->user_pages) { + + /* We acquired a page array, but somebody + * invalidated it. Free it an try again + */ + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + e->user_pages = NULL; + } + + if (e->robj->tbo.ttm->state != tt_bound && + !e->user_pages) { + list_del(&e->tv.head); + list_add(&e->tv.head, &need_pages); + + amdgpu_bo_unreserve(e->robj); + } + } + + if (list_empty(&need_pages)) + break; + + /* Unreserve everything again. */ + ttm_eu_backoff_reservation(&p->ticket, &p->validated); + + /* We tried to often, just abort */ + if (!--tries) { + r = -EDEADLK; + goto error_free_pages; + } + + /* Fill the page arrays for all useptrs. */ + list_for_each_entry(e, &need_pages, tv.head) { + struct ttm_tt *ttm = e->robj->tbo.ttm; + + e->user_pages = drm_calloc_large(ttm->num_pages, + sizeof(struct page*)); + if (!e->user_pages) { + r = -ENOMEM; + goto error_free_pages; + } + + r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages); + if (r) { + drm_free_large(e->user_pages); + e->user_pages = NULL; + goto error_free_pages; + } + } + + /* And try again. */ + list_splice(&need_pages, &p->validated); + } amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); - r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates); + p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev); + p->bytes_moved = 0; + + r = amdgpu_cs_list_validate(p, &duplicates); + if (r) + goto error_validate; + + r = amdgpu_cs_list_validate(p, &p->validated); if (r) goto error_validate; - r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); + if (p->bo_list) { + struct amdgpu_vm *vm = &fpriv->vm; + unsigned i; + + for (i = 0; i < p->bo_list->num_entries; i++) { + struct amdgpu_bo *bo = p->bo_list->array[i].robj; + + p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo); + } + } error_validate: if (r) { @@ -433,10 +489,26 @@ error_validate: ttm_eu_backoff_reservation(&p->ticket, &p->validated); } -error_reserve: +error_free_pages: + if (need_mmap_lock) up_read(¤t->mm->mmap_sem); + if (p->bo_list) { + for (i = p->bo_list->first_userptr; + i < p->bo_list->num_entries; ++i) { + e = &p->bo_list->array[i]; + + if (!e->user_pages) + continue; + + release_pages(e->user_pages, + e->robj->tbo.ttm->num_pages, + false); + drm_free_large(e->user_pages); + } + } + return r; } @@ -447,7 +519,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); if (r) return r; @@ -510,11 +582,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo for (i = 0; i < parser->nchunks; i++) drm_free_large(parser->chunks[i].kdata); kfree(parser->chunks); - if (parser->ibs) - for (i = 0; i < parser->num_ibs; i++) - amdgpu_ib_free(parser->adev, &parser->ibs[i]); - kfree(parser->ibs); - amdgpu_bo_unref(&parser->uf.bo); + if (parser->job) + amdgpu_job_free(parser->job); amdgpu_bo_unref(&parser->uf_entry.robj); } @@ -530,7 +599,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, if (r) return r; - r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence); + r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence); if (r) return r; @@ -556,14 +625,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, return r; f = bo_va->last_pt_update; - r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f); + r = amdgpu_sync_fence(adev, &p->job->sync, f); if (r) return r; } } - r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync); + r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync); if (amdgpu_vm_debug && p->bo_list) { /* Invalidate all BOs to test for userspace bugs */ @@ -581,29 +650,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p, } static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *parser) + struct amdgpu_cs_parser *p) { - struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; + struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = p->job->ring; int i, r; - if (parser->num_ibs == 0) - return 0; - /* Only for UVD/VCE VM emulation */ - for (i = 0; i < parser->num_ibs; i++) { - ring = parser->ibs[i].ring; - if (ring->funcs->parse_cs) { - r = amdgpu_ring_parse_cs(ring, parser, i); + if (ring->funcs->parse_cs) { + for (i = 0; i < p->job->num_ibs; i++) { + r = amdgpu_ring_parse_cs(ring, p, i); if (r) return r; } } - r = amdgpu_bo_vm_update_pte(parser, vm); + r = amdgpu_bo_vm_update_pte(p, vm); if (!r) - amdgpu_cs_sync_rings(parser); + amdgpu_cs_sync_rings(p); return r; } @@ -626,14 +691,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, int i, j; int r; - for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) { + for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; struct amdgpu_ring *ring; chunk = &parser->chunks[i]; - ib = &parser->ibs[j]; + ib = &parser->job->ibs[j]; chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata; if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) @@ -645,6 +710,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; + if (parser->job->ring && parser->job->ring != ring) + return -EINVAL; + + parser->job->ring = ring; + if (ring->funcs->parse_cs) { struct amdgpu_bo_va_mapping *m; struct amdgpu_bo *aobj = NULL; @@ -673,7 +743,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE; kptr += chunk_ib->va_start - offset; - r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib); + r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -682,7 +752,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); amdgpu_bo_kunmap(aobj); } else { - r = amdgpu_ib_get(ring, vm, 0, ib); + r = amdgpu_ib_get(adev, vm, 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -697,15 +767,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, j++; } - if (!parser->num_ibs) - return 0; - /* add GDS resources to first IB */ if (parser->bo_list) { struct amdgpu_bo *gds = parser->bo_list->gds_obj; struct amdgpu_bo *gws = parser->bo_list->gws_obj; struct amdgpu_bo *oa = parser->bo_list->oa_obj; - struct amdgpu_ib *ib = &parser->ibs[0]; + struct amdgpu_ib *ib = &parser->job->ibs[0]; if (gds) { ib->gds_base = amdgpu_bo_gpu_offset(gds); @@ -721,15 +788,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } } /* wrap the last IB with user fence */ - if (parser->uf.bo) { - struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1]; + if (parser->job->uf.bo) { + struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1]; /* UVD & VCE fw doesn't support user fences */ - if (ib->ring->type == AMDGPU_RING_TYPE_UVD || - ib->ring->type == AMDGPU_RING_TYPE_VCE) + if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD || + parser->job->ring->type == AMDGPU_RING_TYPE_VCE) return -EINVAL; - ib->user = &parser->uf; + ib->user = &parser->job->uf; } return 0; @@ -739,14 +806,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_ib *ib; int i, j, r; - if (!p->num_ibs) - return 0; - - /* Add dependencies to first IB */ - ib = &p->ibs[0]; for (i = 0; i < p->nchunks; ++i) { struct drm_amdgpu_cs_chunk_dep *deps; struct amdgpu_cs_chunk *chunk; @@ -784,7 +845,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return r; } else if (fence) { - r = amdgpu_sync_fence(adev, &ib->sync, fence); + r = amdgpu_sync_fence(adev, &p->job->sync, + fence); fence_put(fence); amdgpu_ctx_put(ctx); if (r) @@ -796,15 +858,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev, return 0; } -static int amdgpu_cs_free_job(struct amdgpu_job *job) +static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, + union drm_amdgpu_cs *cs) { - int i; - if (job->ibs) - for (i = 0; i < job->num_ibs; i++) - amdgpu_ib_free(job->adev, &job->ibs[i]); - kfree(job->ibs); - if (job->uf.bo) - amdgpu_bo_unref(&job->uf.bo); + struct amdgpu_ring *ring = p->job->ring; + struct amd_sched_fence *fence; + struct amdgpu_job *job; + + job = p->job; + p->job = NULL; + + job->base.sched = &ring->sched; + job->base.s_entity = &p->ctx->rings[ring->idx].entity; + job->owner = p->filp; + + fence = amd_sched_fence_create(job->base.s_entity, p->filp); + if (!fence) { + amdgpu_job_free(job); + return -ENOMEM; + } + + job->base.s_fence = fence; + p->fence = fence_get(&fence->base); + + cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, + &fence->base); + job->ibs[job->num_ibs - 1].sequence = cs->out.handle; + + trace_amdgpu_cs_ioctl(job); + amd_sched_entity_push_job(&job->base); + return 0; } @@ -829,7 +912,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_handle_lockup(adev, r); return r; } - r = amdgpu_cs_parser_relocs(&parser); + r = amdgpu_cs_parser_bos(&parser, data); if (r == -ENOMEM) DRM_ERROR("Not enough memory for command submission!\n"); else if (r && r != -ERESTARTSYS) @@ -848,68 +931,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; - for (i = 0; i < parser.num_ibs; i++) + for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); r = amdgpu_cs_ib_vm_chunk(adev, &parser); if (r) goto out; - if (amdgpu_enable_scheduler && parser.num_ibs) { - struct amdgpu_ring * ring = parser.ibs->ring; - struct amd_sched_fence *fence; - struct amdgpu_job *job; - - job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL); - if (!job) { - r = -ENOMEM; - goto out; - } - - job->base.sched = &ring->sched; - job->base.s_entity = &parser.ctx->rings[ring->idx].entity; - job->adev = parser.adev; - job->owner = parser.filp; - job->free_job = amdgpu_cs_free_job; - - job->ibs = parser.ibs; - job->num_ibs = parser.num_ibs; - parser.ibs = NULL; - parser.num_ibs = 0; - - if (job->ibs[job->num_ibs - 1].user) { - job->uf = parser.uf; - job->ibs[job->num_ibs - 1].user = &job->uf; - parser.uf.bo = NULL; - } - - fence = amd_sched_fence_create(job->base.s_entity, - parser.filp); - if (!fence) { - r = -ENOMEM; - amdgpu_cs_free_job(job); - kfree(job); - goto out; - } - job->base.s_fence = fence; - parser.fence = fence_get(&fence->base); - - cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring, - &fence->base); - job->ibs[job->num_ibs - 1].sequence = cs->out.handle; - - trace_amdgpu_cs_ioctl(job); - amd_sched_entity_push_job(&job->base); - - } else { - struct amdgpu_fence *fence; - - r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs, - parser.filp); - fence = parser.ibs[parser.num_ibs - 1].fence; - parser.fence = fence_get(&fence->base); - cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence; - } + r = amdgpu_cs_submit(&parser, cs); out: amdgpu_cs_parser_fini(&parser, r, reserved_buffers); @@ -980,30 +1009,36 @@ struct amdgpu_bo_va_mapping * amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, uint64_t addr, struct amdgpu_bo **bo) { - struct amdgpu_bo_list_entry *reloc; struct amdgpu_bo_va_mapping *mapping; + unsigned i; + + if (!parser->bo_list) + return NULL; addr /= AMDGPU_GPU_PAGE_SIZE; - list_for_each_entry(reloc, &parser->validated, tv.head) { - if (!reloc->bo_va) + for (i = 0; i < parser->bo_list->num_entries; i++) { + struct amdgpu_bo_list_entry *lobj; + + lobj = &parser->bo_list->array[i]; + if (!lobj->bo_va) continue; - list_for_each_entry(mapping, &reloc->bo_va->valids, list) { + list_for_each_entry(mapping, &lobj->bo_va->valids, list) { if (mapping->it.start > addr || addr > mapping->it.last) continue; - *bo = reloc->bo_va->bo; + *bo = lobj->bo_va->bo; return mapping; } - list_for_each_entry(mapping, &reloc->bo_va->invalids, list) { + list_for_each_entry(mapping, &lobj->bo_va->invalids, list) { if (mapping->it.start > addr || addr > mapping->it.last) continue; - *bo = reloc->bo_va->bo; + *bo = lobj->bo_va->bo; return mapping; } } |