summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2013-01-08 10:53:17 +0000
committerDaniel Vetter <daniel.vetter@ffwll.ch>2013-01-17 22:23:47 +0100
commiteef90ccb8a4d50b219a95cc53878ebb007315b32 (patch)
treee10ef8121e9b6f0c061df75967c7af77a942feb3
parented5982e6ce5f106abcbf071f80730db344a6da42 (diff)
downloadop-kernel-dev-eef90ccb8a4d50b219a95cc53878ebb007315b32.zip
op-kernel-dev-eef90ccb8a4d50b219a95cc53878ebb007315b32.tar.gz
drm/i915: Use the reloc.handle as an index into the execbuffer array
Using copywinwin10 as an example that is dependent upon emitting a lot of relocations (2 per operation), we see improvements of: c2d/gm45: 618000.0/sec to 623000.0/sec. i3-330m: 748000.0/sec to 789000.0/sec. (measured relative to a baseline with neither optimisations applied). Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Reviewed-by: Imre Deak <imre.deak@intel.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-rw-r--r--drivers/gpu/drm/i915/i915_dma.c3
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c100
-rw-r--r--include/uapi/drm/i915_drm.h8
3 files changed, 71 insertions, 40 deletions
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a6e047d..4421182 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -995,6 +995,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_NO_RELOC:
value = 1;
break;
+ case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+ value = 1;
+ break;
default:
DRM_DEBUG_DRIVER("Unknown parameter %d\n",
param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 34f6cdf..f5a11ec 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -36,24 +36,40 @@
struct eb_objects {
struct list_head objects;
int and;
- struct hlist_head buckets[0];
+ union {
+ struct drm_i915_gem_object *lut[0];
+ struct hlist_head buckets[0];
+ };
};
static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
{
- struct eb_objects *eb;
- int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
- BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
- while (count > size)
- count >>= 1;
- eb = kzalloc(count*sizeof(struct hlist_head) +
- sizeof(struct eb_objects),
- GFP_KERNEL);
- if (eb == NULL)
- return eb;
-
- eb->and = count - 1;
+ struct eb_objects *eb = NULL;
+
+ if (args->flags & I915_EXEC_HANDLE_LUT) {
+ int size = args->buffer_count;
+ size *= sizeof(struct drm_i915_gem_object *);
+ size += sizeof(struct eb_objects);
+ eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+ }
+
+ if (eb == NULL) {
+ int size = args->buffer_count;
+ int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
+ BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
+ while (count > 2*size)
+ count >>= 1;
+ eb = kzalloc(count*sizeof(struct hlist_head) +
+ sizeof(struct eb_objects),
+ GFP_TEMPORARY);
+ if (eb == NULL)
+ return eb;
+
+ eb->and = count - 1;
+ } else
+ eb->and = -args->buffer_count;
+
INIT_LIST_HEAD(&eb->objects);
return eb;
}
@@ -61,26 +77,20 @@ eb_create(int size)
static void
eb_reset(struct eb_objects *eb)
{
- memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
- hlist_add_head(&obj->exec_node,
- &eb->buckets[obj->exec_handle & eb->and]);
+ if (eb->and >= 0)
+ memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
static int
eb_lookup_objects(struct eb_objects *eb,
struct drm_i915_gem_exec_object2 *exec,
- int count,
+ const struct drm_i915_gem_execbuffer2 *args,
struct drm_file *file)
{
int i;
spin_lock(&file->table_lock);
- for (i = 0; i < count; i++) {
+ for (i = 0; i < args->buffer_count; i++) {
struct drm_i915_gem_object *obj;
obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
@@ -101,9 +111,15 @@ eb_lookup_objects(struct eb_objects *eb,
drm_gem_object_reference(&obj->base);
list_add_tail(&obj->exec_list, &eb->objects);
- obj->exec_handle = exec[i].handle;
obj->exec_entry = &exec[i];
- eb_add_object(eb, obj);
+ if (eb->and < 0) {
+ eb->lut[i] = obj;
+ } else {
+ uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
+ obj->exec_handle = handle;
+ hlist_add_head(&obj->exec_node,
+ &eb->buckets[handle & eb->and]);
+ }
}
spin_unlock(&file->table_lock);
@@ -113,18 +129,24 @@ eb_lookup_objects(struct eb_objects *eb,
static struct drm_i915_gem_object *
eb_get_object(struct eb_objects *eb, unsigned long handle)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct drm_i915_gem_object *obj;
+ if (eb->and < 0) {
+ if (handle >= -eb->and)
+ return NULL;
+ return eb->lut[handle];
+ } else {
+ struct hlist_head *head;
+ struct hlist_node *node;
- head = &eb->buckets[handle & eb->and];
- hlist_for_each(node, head) {
- obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
- if (obj->exec_handle == handle)
- return obj;
- }
+ head = &eb->buckets[handle & eb->and];
+ hlist_for_each(node, head) {
+ struct drm_i915_gem_object *obj;
- return NULL;
+ obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
+ if (obj->exec_handle == handle)
+ return obj;
+ }
+ return NULL;
+ }
}
static void
@@ -615,7 +637,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
/* reacquire the objects */
eb_reset(eb);
- ret = eb_lookup_objects(eb, exec, count, file);
+ ret = eb_lookup_objects(eb, exec, args, file);
if (ret)
goto err;
@@ -919,7 +941,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
- eb = eb_create(args->buffer_count);
+ eb = eb_create(args);
if (eb == NULL) {
mutex_unlock(&dev->struct_mutex);
ret = -ENOMEM;
@@ -927,7 +949,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Look up object handles */
- ret = eb_lookup_objects(eb, exec, args->buffer_count, file);
+ ret = eb_lookup_objects(eb, exec, args, file);
if (ret)
goto err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2430b6a..07d5941 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_SECURE_BATCHES 23
#define I915_PARAM_HAS_PINNED_BATCHES 24
#define I915_PARAM_HAS_EXEC_NO_RELOC 25
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
typedef struct drm_i915_getparam {
int param;
@@ -699,7 +700,12 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_NO_RELOC (1<<11)
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_NO_RELOC<<1)
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT (1<<12)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
OpenPOWER on IntegriCloud