diff options
author | Jesse Barnes <jbarnes@virtuousgeek.org> | 2009-12-17 22:05:42 -0500 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2010-01-06 09:39:39 -0800 |
commit | 76446cac68568fc7f5168a27deaf803ed22a4360 (patch) | |
tree | 66e05e6932edd763d75fc7be7c20d4593fe2b35f /drivers | |
parent | c566ec49159b806db95a90fd8f37448376cd0ad2 (diff) | |
download | op-kernel-dev-76446cac68568fc7f5168a27deaf803ed22a4360.zip op-kernel-dev-76446cac68568fc7f5168a27deaf803ed22a4360.tar.gz |
drm/i915: execbuf2 support
This patch adds a new execbuf ioctl, execbuf2, for use by clients that
want to control fence register allocation more finely. The buffer
passed in to the new ioctl includes a new relocation type to indicate
whether a given object needs a fence register assigned for the command
buffer in question.
Compatibility with the existing execbuf ioctl is implemented in terms
of the new code, preserving the assumption that fence registers are
required for pre-965 rendering commands.
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[ickle: Remove pre-emptive clear_fence_reg()]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
[anholt: Removed dmesg spam]
Signed-off-by: Eric Anholt <eric@anholt.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/gpu/drm/i915/i915_dma.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 239 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_tiling.c | 46 |
4 files changed, 219 insertions, 78 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index e3e5d50..d67be65 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -813,9 +813,13 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_PAGEFLIPPING: value = 1; break; + case I915_PARAM_HAS_EXECBUF2: + /* depends on GEM */ + value = dev_priv->has_gem; + break; default: DRM_DEBUG_DRIVER("Unknown parameter %d\n", - param->param); + param->param); return -EINVAL; } @@ -1646,6 +1650,7 @@ struct drm_ioctl_desc i915_ioctls[] = { DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), + DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH), DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH), diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9a05f1a..7eb4ad5 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -815,6 +815,8 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv); +int i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file_priv); int i915_gem_pin_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv); int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, @@ -881,6 +883,9 @@ void i915_gem_shrinker_exit(void); void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj); void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj); +bool i915_tiling_ok(struct drm_device *dev, int stride, int size, + int tiling_mode); +bool i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj); /* i915_gem_debug.c */ void i915_gem_dump_object(struct drm_gem_object *obj, int len, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9e81a0d..0330c3a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3199,7 +3199,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj, static int i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_file *file_priv, - struct drm_i915_gem_exec_object *entry, + struct drm_i915_gem_exec_object2 *entry, struct drm_i915_gem_relocation_entry *relocs) { struct drm_device *dev = obj->dev; @@ -3207,12 +3207,35 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, struct drm_i915_gem_object *obj_priv = obj->driver_private; int i, ret; void __iomem *reloc_page; + bool need_fence; + + need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE && + obj_priv->tiling_mode != I915_TILING_NONE; + + /* Check fence reg constraints and rebind if necessary */ + if (need_fence && !i915_obj_fenceable(dev, obj)) + i915_gem_object_unbind(obj); /* Choose the GTT offset for our buffer and put it there. */ ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment); if (ret) return ret; + /* + * Pre-965 chips need a fence register set up in order to + * properly handle blits to/from tiled surfaces. + */ + if (need_fence) { + ret = i915_gem_object_get_fence_reg(obj); + if (ret != 0) { + if (ret != -EBUSY && ret != -ERESTARTSYS) + DRM_ERROR("Failure to install fence: %d\n", + ret); + i915_gem_object_unpin(obj); + return ret; + } + } + entry->offset = obj_priv->gtt_offset; /* Apply the relocations, using the GTT aperture to avoid cache @@ -3374,7 +3397,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj, */ static int i915_dispatch_gem_execbuffer(struct drm_device *dev, - struct drm_i915_gem_execbuffer *exec, + struct drm_i915_gem_execbuffer2 *exec, struct drm_clip_rect *cliprects, uint64_t exec_offset) { @@ -3464,7 +3487,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv) } static int -i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, +i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list, uint32_t buffer_count, struct drm_i915_gem_relocation_entry **relocs) { @@ -3479,8 +3502,10 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, } *relocs = drm_calloc_large(reloc_count, sizeof(**relocs)); - if (*relocs == NULL) + if (*relocs == NULL) { + DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count); return -ENOMEM; + } for (i = 0; i < buffer_count; i++) { struct drm_i915_gem_relocation_entry __user *user_relocs; @@ -3504,7 +3529,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list, } static int -i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list, +i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list, uint32_t buffer_count, struct drm_i915_gem_relocation_entry *relocs) { @@ -3537,7 +3562,7 @@ err: } static int -i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, +i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec, uint64_t exec_offset) { uint32_t exec_start, exec_len; @@ -3590,18 +3615,18 @@ i915_gem_wait_for_pending_flip(struct drm_device *dev, } int -i915_gem_execbuffer(struct drm_device *dev, void *data, - struct drm_file *file_priv) +i915_gem_do_execbuffer(struct drm_device *dev, void *data, + struct drm_file *file_priv, + struct drm_i915_gem_execbuffer2 *args, + struct drm_i915_gem_exec_object2 *exec_list) { drm_i915_private_t *dev_priv = dev->dev_private; - struct drm_i915_gem_execbuffer *args = data; - struct drm_i915_gem_exec_object *exec_list = NULL; struct drm_gem_object **object_list = NULL; struct drm_gem_object *batch_obj; struct drm_i915_gem_object *obj_priv; struct drm_clip_rect *cliprects = NULL; struct drm_i915_gem_relocation_entry *relocs; - int ret, ret2, i, pinned = 0; + int ret = 0, ret2, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; int pin_tries, flips; @@ -3615,25 +3640,13 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); return -EINVAL; } - /* Copy in the exec list from userland */ - exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count); - if (exec_list == NULL || object_list == NULL) { - DRM_ERROR("Failed to allocate exec or object list " - "for %d buffers\n", + if (object_list == NULL) { + DRM_ERROR("Failed to allocate object list for %d buffers\n", args->buffer_count); ret = -ENOMEM; goto pre_mutex_err; } - ret = copy_from_user(exec_list, - (struct drm_i915_relocation_entry __user *) - (uintptr_t) args->buffers_ptr, - sizeof(*exec_list) * args->buffer_count); - if (ret != 0) { - DRM_ERROR("copy %d exec entries failed %d\n", - args->buffer_count, ret); - goto pre_mutex_err; - } if (args->num_cliprects != 0) { cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects), @@ -3885,20 +3898,6 @@ err: mutex_unlock(&dev->struct_mutex); - if (!ret) { - /* Copy the new buffer offsets back to the user's exec list. */ - ret = copy_to_user((struct drm_i915_relocation_entry __user *) - (uintptr_t) args->buffers_ptr, - exec_list, - sizeof(*exec_list) * args->buffer_count); - if (ret) { - ret = -EFAULT; - DRM_ERROR("failed to copy %d exec entries " - "back to user (%d)\n", - args->buffer_count, ret); - } - } - /* Copy the updated relocations out regardless of current error * state. Failure to update the relocs would mean that the next * time userland calls execbuf, it would do so with presumed offset @@ -3915,12 +3914,158 @@ err: pre_mutex_err: drm_free_large(object_list); - drm_free_large(exec_list); kfree(cliprects); return ret; } +/* + * Legacy execbuffer just creates an exec2 list from the original exec object + * list array and passes it to the real function. + */ +int +i915_gem_execbuffer(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_execbuffer *args = data; + struct drm_i915_gem_execbuffer2 exec2; + struct drm_i915_gem_exec_object *exec_list = NULL; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + int ret, i; + +#if WATCH_EXEC + DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", + (int) args->buffers_ptr, args->buffer_count, args->batch_len); +#endif + + if (args->buffer_count < 1) { + DRM_ERROR("execbuf with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + /* Copy in the exec list from userland */ + exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count); + exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); + if (exec_list == NULL || exec2_list == NULL) { + DRM_ERROR("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + drm_free_large(exec_list); + drm_free_large(exec2_list); + return -ENOMEM; + } + ret = copy_from_user(exec_list, + (struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + sizeof(*exec_list) * args->buffer_count); + if (ret != 0) { + DRM_ERROR("copy %d exec entries failed %d\n", + args->buffer_count, ret); + drm_free_large(exec_list); + drm_free_large(exec2_list); + return -EFAULT; + } + + for (i = 0; i < args->buffer_count; i++) { + exec2_list[i].handle = exec_list[i].handle; + exec2_list[i].relocation_count = exec_list[i].relocation_count; + exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr; + exec2_list[i].alignment = exec_list[i].alignment; + exec2_list[i].offset = exec_list[i].offset; + if (!IS_I965G(dev)) + exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE; + else + exec2_list[i].flags = 0; + } + + exec2.buffers_ptr = args->buffers_ptr; + exec2.buffer_count = args->buffer_count; + exec2.batch_start_offset = args->batch_start_offset; + exec2.batch_len = args->batch_len; + exec2.DR1 = args->DR1; + exec2.DR4 = args->DR4; + exec2.num_cliprects = args->num_cliprects; + exec2.cliprects_ptr = args->cliprects_ptr; + exec2.flags = 0; + + ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + for (i = 0; i < args->buffer_count; i++) + exec_list[i].offset = exec2_list[i].offset; + /* ... and back out to userspace */ + ret = copy_to_user((struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + exec_list, + sizeof(*exec_list) * args->buffer_count); + if (ret) { + ret = -EFAULT; + DRM_ERROR("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + } else { + DRM_ERROR("i915_gem_do_execbuffer returns %d\n", ret); + } + + drm_free_large(exec_list); + drm_free_large(exec2_list); + return ret; +} + +int +i915_gem_execbuffer2(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_execbuffer2 *args = data; + struct drm_i915_gem_exec_object2 *exec2_list = NULL; + int ret; + +#if WATCH_EXEC + DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", + (int) args->buffers_ptr, args->buffer_count, args->batch_len); +#endif + + if (args->buffer_count < 1) { + DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count); + return -EINVAL; + } + + exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count); + if (exec2_list == NULL) { + DRM_ERROR("Failed to allocate exec list for %d buffers\n", + args->buffer_count); + return -ENOMEM; + } + ret = copy_from_user(exec2_list, + (struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + sizeof(*exec2_list) * args->buffer_count); + if (ret != 0) { + DRM_ERROR("copy %d exec entries failed %d\n", + args->buffer_count, ret); + drm_free_large(exec2_list); + return -EFAULT; + } + + ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list); + if (!ret) { + /* Copy the new buffer offsets back to the user's exec list. */ + ret = copy_to_user((struct drm_i915_relocation_entry __user *) + (uintptr_t) args->buffers_ptr, + exec2_list, + sizeof(*exec2_list) * args->buffer_count); + if (ret) { + ret = -EFAULT; + DRM_ERROR("failed to copy %d exec entries " + "back to user (%d)\n", + args->buffer_count, ret); + } + } + + drm_free_large(exec2_list); + return ret; +} + int i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) { @@ -3934,19 +4079,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment) if (ret) return ret; } - /* - * Pre-965 chips need a fence register set up in order to - * properly handle tiled surfaces. - */ - if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) { - ret = i915_gem_object_get_fence_reg(obj); - if (ret != 0) { - if (ret != -EBUSY && ret != -ERESTARTSYS) - DRM_ERROR("Failure to install fence: %d\n", - ret); - return ret; - } - } + obj_priv->pin_count++; /* If the object is not active and not pending a flush, diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 30d6af6c..df278b2 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -304,35 +304,39 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) /** - * Returns the size of the fence for a tiled object of the given size. + * Returns whether an object is currently fenceable. If not, it may need + * to be unbound and have its pitch adjusted. */ -static int -i915_get_fence_size(struct drm_device *dev, int size) +bool +i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj) { - int i; - int start; + struct drm_i915_gem_object *obj_priv = obj->driver_private; if (IS_I965G(dev)) { /* The 965 can have fences at any page boundary. */ - return ALIGN(size, 4096); + if (obj->size & 4095) + return false; + return true; + } else if (IS_I9XX(dev)) { + if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK) + return false; } else { - /* Align the size to a power of two greater than the smallest - * fence size. - */ - if (IS_I9XX(dev)) - start = 1024 * 1024; - else - start = 512 * 1024; + if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK) + return false; + } - for (i = start; i < size; i <<= 1) - ; + /* Power of two sized... */ + if (obj->size & (obj->size - 1)) + return false; - return i; - } + /* Objects must be size aligned as well */ + if (obj_priv->gtt_offset & (obj->size - 1)) + return false; + return true; } /* Check pitch constriants for all chips & tiling formats */ -static bool +bool i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) { int tile_width; @@ -384,12 +388,6 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode) if (stride & (stride - 1)) return false; - /* We don't 0handle the aperture area covered by the fence being bigger - * than the object size. - */ - if (i915_get_fence_size(dev, size) != size) - return false; - return true; } |