summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2010-10-14 12:10:41 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2010-10-19 09:18:36 +0100
commit2549d6c26ce1c85a76990b972a2c7e8f440455cd (patch)
tree1e9e3f948ca2f15a1c98c76f1307f87174fd7252
parent55b7d6e8c4690047ac001026cb75a47f747db816 (diff)
downloadop-kernel-dev-2549d6c26ce1c85a76990b972a2c7e8f440455cd.zip
op-kernel-dev-2549d6c26ce1c85a76990b972a2c7e8f440455cd.tar.gz
drm/i915: Avoid vmallocing a buffer for the relocations
... perform an access validation check up front instead and copy them in on-demand, during i915_gem_object_pin_and_relocate(). As around 20% of the CPU overhead may be spent inside vmalloc for the relocation entries when submitting an execbuffer [for x11perf -aa10text], the savings are considerable and result in around a 10% throughput increase [for glyphs]. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c224
1 files changed, 75 insertions, 149 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 72ab303..67998e8 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3291,12 +3291,12 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
static int
i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
struct drm_file *file_priv,
- struct drm_i915_gem_exec_object2 *entry,
- struct drm_i915_gem_relocation_entry *relocs)
+ struct drm_i915_gem_exec_object2 *entry)
{
struct drm_device *dev = obj->dev;
drm_i915_private_t *dev_priv = dev->dev_private;
struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
+ struct drm_i915_gem_relocation_entry __user *user_relocs;
int i, ret;
void __iomem *reloc_page;
bool need_fence;
@@ -3337,15 +3337,24 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
/* Apply the relocations, using the GTT aperture to avoid cache
* flushing requirements.
*/
+ user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
for (i = 0; i < entry->relocation_count; i++) {
- struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
+ struct drm_i915_gem_relocation_entry reloc;
struct drm_gem_object *target_obj;
struct drm_i915_gem_object *target_obj_priv;
uint32_t reloc_val, reloc_offset;
uint32_t __iomem *reloc_entry;
+ ret = __copy_from_user_inatomic(&reloc,
+ user_relocs+i,
+ sizeof(reloc));
+ if (ret) {
+ i915_gem_object_unpin(obj);
+ return -EFAULT;
+ }
+
target_obj = drm_gem_object_lookup(obj->dev, file_priv,
- reloc->target_handle);
+ reloc.target_handle);
if (target_obj == NULL) {
i915_gem_object_unpin(obj);
return -ENOENT;
@@ -3358,13 +3367,13 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
"presumed %08x delta %08x\n",
__func__,
obj,
- (int) reloc->offset,
- (int) reloc->target_handle,
- (int) reloc->read_domains,
- (int) reloc->write_domain,
+ (int) reloc.offset,
+ (int) reloc.target_handle,
+ (int) reloc.read_domains,
+ (int) reloc.write_domain,
(int) target_obj_priv->gtt_offset,
- (int) reloc->presumed_offset,
- reloc->delta);
+ (int) reloc.presumed_offset,
+ reloc.delta);
#endif
/* The target buffer should have appeared before us in the
@@ -3372,89 +3381,89 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
*/
if (target_obj_priv->gtt_space == NULL) {
DRM_ERROR("No GTT space found for object %d\n",
- reloc->target_handle);
+ reloc.target_handle);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
/* Validate that the target is in a valid r/w GPU domain */
- if (reloc->write_domain & (reloc->write_domain - 1)) {
+ if (reloc.write_domain & (reloc.write_domain - 1)) {
DRM_ERROR("reloc with multiple write domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
- (int) reloc->offset,
- reloc->read_domains,
- reloc->write_domain);
+ obj, reloc.target_handle,
+ (int) reloc.offset,
+ reloc.read_domains,
+ reloc.write_domain);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
- if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
- reloc->read_domains & I915_GEM_DOMAIN_CPU) {
+ if (reloc.write_domain & I915_GEM_DOMAIN_CPU ||
+ reloc.read_domains & I915_GEM_DOMAIN_CPU) {
DRM_ERROR("reloc with read/write CPU domains: "
"obj %p target %d offset %d "
"read %08x write %08x",
- obj, reloc->target_handle,
- (int) reloc->offset,
- reloc->read_domains,
- reloc->write_domain);
+ obj, reloc.target_handle,
+ (int) reloc.offset,
+ reloc.read_domains,
+ reloc.write_domain);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
- if (reloc->write_domain && target_obj->pending_write_domain &&
- reloc->write_domain != target_obj->pending_write_domain) {
+ if (reloc.write_domain && target_obj->pending_write_domain &&
+ reloc.write_domain != target_obj->pending_write_domain) {
DRM_ERROR("Write domain conflict: "
"obj %p target %d offset %d "
"new %08x old %08x\n",
- obj, reloc->target_handle,
- (int) reloc->offset,
- reloc->write_domain,
+ obj, reloc.target_handle,
+ (int) reloc.offset,
+ reloc.write_domain,
target_obj->pending_write_domain);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
- target_obj->pending_read_domains |= reloc->read_domains;
- target_obj->pending_write_domain |= reloc->write_domain;
+ target_obj->pending_read_domains |= reloc.read_domains;
+ target_obj->pending_write_domain |= reloc.write_domain;
/* If the relocation already has the right value in it, no
* more work needs to be done.
*/
- if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
+ if (target_obj_priv->gtt_offset == reloc.presumed_offset) {
drm_gem_object_unreference(target_obj);
continue;
}
/* Check that the relocation address is valid... */
- if (reloc->offset > obj->size - 4) {
+ if (reloc.offset > obj->size - 4) {
DRM_ERROR("Relocation beyond object bounds: "
"obj %p target %d offset %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset, (int) obj->size);
+ obj, reloc.target_handle,
+ (int) reloc.offset, (int) obj->size);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
- if (reloc->offset & 3) {
+ if (reloc.offset & 3) {
DRM_ERROR("Relocation not 4-byte aligned: "
"obj %p target %d offset %d.\n",
- obj, reloc->target_handle,
- (int) reloc->offset);
+ obj, reloc.target_handle,
+ (int) reloc.offset);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
}
/* and points to somewhere within the target object. */
- if (reloc->delta >= target_obj->size) {
+ if (reloc.delta >= target_obj->size) {
DRM_ERROR("Relocation beyond target object bounds: "
"obj %p target %d delta %d size %d.\n",
- obj, reloc->target_handle,
- (int) reloc->delta, (int) target_obj->size);
+ obj, reloc.target_handle,
+ (int) reloc.delta, (int) target_obj->size);
drm_gem_object_unreference(target_obj);
i915_gem_object_unpin(obj);
return -EINVAL;
@@ -3470,23 +3479,18 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
/* Map the page containing the relocation we're going to
* perform.
*/
- reloc_offset = obj_priv->gtt_offset + reloc->offset;
+ reloc_offset = obj_priv->gtt_offset + reloc.offset;
reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
(reloc_offset &
~(PAGE_SIZE - 1)),
KM_USER0);
reloc_entry = (uint32_t __iomem *)(reloc_page +
(reloc_offset & (PAGE_SIZE - 1)));
- reloc_val = target_obj_priv->gtt_offset + reloc->delta;
+ reloc_val = target_obj_priv->gtt_offset + reloc.delta;
writel(reloc_val, reloc_entry);
io_mapping_unmap_atomic(reloc_page, KM_USER0);
- /* The updated presumed offset for this entry will be
- * copied back out to the user.
- */
- reloc->presumed_offset = target_obj_priv->gtt_offset;
-
drm_gem_object_unreference(target_obj);
}
@@ -3551,98 +3555,40 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
}
static int
-i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
- uint32_t buffer_count,
- struct drm_i915_gem_relocation_entry **relocs)
+i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec,
+ uint64_t exec_offset)
{
- uint32_t reloc_count = 0, reloc_index = 0, i;
- int ret;
-
- *relocs = NULL;
- for (i = 0; i < buffer_count; i++) {
- if (reloc_count + exec_list[i].relocation_count < reloc_count)
- return -EINVAL;
- reloc_count += exec_list[i].relocation_count;
- }
-
- *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
- if (*relocs == NULL) {
- DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
- return -ENOMEM;
- }
-
- for (i = 0; i < buffer_count; i++) {
- struct drm_i915_gem_relocation_entry __user *user_relocs;
+ uint32_t exec_start, exec_len;
- user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
+ exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
+ exec_len = (uint32_t) exec->batch_len;
- ret = copy_from_user(&(*relocs)[reloc_index],
- user_relocs,
- exec_list[i].relocation_count *
- sizeof(**relocs));
- if (ret != 0) {
- drm_free_large(*relocs);
- *relocs = NULL;
- return -EFAULT;
- }
+ if ((exec_start | exec_len) & 0x7)
+ return -EINVAL;
- reloc_index += exec_list[i].relocation_count;
- }
+ if (!exec_start)
+ return -EINVAL;
return 0;
}
static int
-i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
- uint32_t buffer_count,
- struct drm_i915_gem_relocation_entry *relocs)
+validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
+ int count)
{
- uint32_t reloc_count = 0, i;
- int ret = 0;
-
- if (relocs == NULL)
- return 0;
-
- for (i = 0; i < buffer_count; i++) {
- struct drm_i915_gem_relocation_entry __user *user_relocs;
- int unwritten;
-
- user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
+ int i;
- unwritten = copy_to_user(user_relocs,
- &relocs[reloc_count],
- exec_list[i].relocation_count *
- sizeof(*relocs));
+ for (i = 0; i < count; i++) {
+ char __user *ptr = (char __user *)(uintptr_t)exec[i].relocs_ptr;
+ size_t length = exec[i].relocation_count * sizeof(struct drm_i915_gem_relocation_entry);
- if (unwritten) {
- ret = -EFAULT;
- goto err;
- }
+ if (!access_ok(VERIFY_READ, ptr, length))
+ return -EFAULT;
- reloc_count += exec_list[i].relocation_count;
+ if (fault_in_pages_readable(ptr, length))
+ return -EFAULT;
}
-err:
- drm_free_large(relocs);
-
- return ret;
-}
-
-static int
-i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
- uint64_t exec_offset)
-{
- uint32_t exec_start, exec_len;
-
- exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
- exec_len = (uint32_t) exec->batch_len;
-
- if ((exec_start | exec_len) & 0x7)
- return -EINVAL;
-
- if (!exec_start)
- return -EINVAL;
-
return 0;
}
@@ -3657,11 +3603,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_gem_object *batch_obj;
struct drm_i915_gem_object *obj_priv;
struct drm_clip_rect *cliprects = NULL;
- struct drm_i915_gem_relocation_entry *relocs = NULL;
struct drm_i915_gem_request *request = NULL;
- int ret, ret2, i, pinned = 0;
+ int ret, i, pinned = 0;
uint64_t exec_offset;
- uint32_t reloc_index;
int pin_tries, flips;
struct intel_ring_buffer *ring = NULL;
@@ -3670,6 +3614,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (ret)
return ret;
+ ret = validate_exec_list(exec_list, args->buffer_count);
+ if (ret)
+ return ret;
+
#if WATCH_EXEC
DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
(int) args->buffers_ptr, args->buffer_count, args->batch_len);
@@ -3722,11 +3670,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
- ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
- &relocs);
- if (ret != 0)
- goto pre_mutex_err;
-
ret = i915_mutex_lock_interruptible(dev);
if (ret)
goto pre_mutex_err;
@@ -3765,19 +3708,16 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* Pin and relocate */
for (pin_tries = 0; ; pin_tries++) {
ret = 0;
- reloc_index = 0;
for (i = 0; i < args->buffer_count; i++) {
object_list[i]->pending_read_domains = 0;
object_list[i]->pending_write_domain = 0;
ret = i915_gem_object_pin_and_relocate(object_list[i],
file_priv,
- &exec_list[i],
- &relocs[reloc_index]);
+ &exec_list[i]);
if (ret)
break;
pinned = i + 1;
- reloc_index += exec_list[i].relocation_count;
}
/* success */
if (ret == 0)
@@ -3967,20 +3907,6 @@ err:
mutex_unlock(&dev->struct_mutex);
pre_mutex_err:
- /* Copy the updated relocations out regardless of current error
- * state. Failure to update the relocs would mean that the next
- * time userland calls execbuf, it would do so with presumed offset
- * state that didn't match the actual object state.
- */
- ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
- relocs);
- if (ret2 != 0) {
- DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
-
- if (ret == 0)
- ret = ret2;
- }
-
drm_free_large(object_list);
kfree(cliprects);
kfree(request);
OpenPOWER on IntegriCloud