*
*/
+#include <linux/log2.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
#include "i915_drv.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
+/* Initial size (as log2) to preallocate the handle->object hashtable */
+#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */
+
+static void resize_vma_ht(struct work_struct *work)
+{
+ struct i915_gem_context_vma_lut *lut =
+ container_of(work, typeof(*lut), resize);
+ unsigned int bits, new_bits, size, i;
+ struct hlist_head *new_ht;
+
+ GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));
+
+ bits = 1 + ilog2(4*lut->ht_count/3 + 1);
+ new_bits = min_t(unsigned int,
+ max(bits, VMA_HT_BITS),
+ sizeof(unsigned int) * BITS_PER_BYTE - 1);
+ if (new_bits == lut->ht_bits)
+ goto out;
+
+ new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
+ if (!new_ht)
+ new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
+ if (!new_ht)
+ /* Pretend resize succeeded and stop calling us for a bit! */
+ goto out;
+
+ size = BIT(lut->ht_bits);
+ for (i = 0; i < size; i++) {
+ struct i915_vma *vma;
+ struct hlist_node *tmp;
+
+ hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
+ hlist_add_head(&vma->ctx_node,
+ &new_ht[hash_32(vma->ctx_handle,
+ new_bits)]);
+ }
+ kvfree(lut->ht);
+ lut->ht = new_ht;
+ lut->ht_bits = new_bits;
+out:
+ smp_store_release(&lut->ht_size, BIT(bits));
+ GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
+}
+
+static void vma_lut_free(struct i915_gem_context *ctx)
+{
+ struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
+ unsigned int i, size;
+
+ if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
+ cancel_work_sync(&lut->resize);
+
+ size = BIT(lut->ht_bits);
+ for (i = 0; i < size; i++) {
+ struct i915_vma *vma;
+
+ hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
+ vma->obj->vma_hashed = NULL;
+ vma->ctx = NULL;
+ }
+ }
+ kvfree(lut->ht);
+}
+
void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
trace_i915_context_free(ctx);
GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+ vma_lut_free(ctx);
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
kfree(ctx->name);
put_pid(ctx->pid);
+
list_del(&ctx->link);
ida_simple_remove(&ctx->i915->context_hw_ida, ctx->hw_id);
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
+ ctx->vma_lut.ht_bits = VMA_HT_BITS;
+ ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
+ BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
+ ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
+ sizeof(*ctx->vma_lut.ht),
+ GFP_KERNEL);
+ if (!ctx->vma_lut.ht)
+ goto err_out;
+
+ INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);
+
/* Default context will never have a file_priv */
ret = DEFAULT_CONTEXT_HANDLE;
if (file_priv) {
ret = idr_alloc(&file_priv->context_idr, ctx,
DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
if (ret < 0)
- goto err_out;
+ goto err_lut;
}
ctx->user_handle = ret;
err_pid:
put_pid(ctx->pid);
idr_remove(&file_priv->context_idr, ctx->user_handle);
+err_lut:
+ kvfree(ctx->vma_lut.ht);
err_out:
context_close(ctx);
return ERR_PTR(ret);
unsigned int page;
bool use_64bit_reloc : 1;
} reloc_cache;
- int and;
- union {
- struct i915_vma **lut;
- struct hlist_head *buckets;
- };
+ int lut_mask;
+ struct hlist_head *buckets;
};
+/*
+ * As an alternative to creating a hashtable of handle-to-vma for a batch,
+ * we used the last available reserved field in the execobject[] and stash
+ * a link from the execobj to its vma.
+ */
+#define __exec_to_vma(ee) (ee)->rsvd2
+#define exec_to_vma(ee) u64_to_ptr(struct i915_vma, __exec_to_vma(ee))
+
static int eb_create(struct i915_execbuffer *eb)
{
- eb->lut = NULL;
- if (eb->args->flags & I915_EXEC_HANDLE_LUT) {
- unsigned int size = eb->args->buffer_count;
- size *= sizeof(struct i915_vma *);
- eb->lut = kmalloc(size,
- GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
- }
-
- if (!eb->lut) {
- unsigned int size = eb->args->buffer_count;
- unsigned int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
- BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
- while (count > 2*size)
- count >>= 1;
- eb->lut = kzalloc(count * sizeof(struct hlist_head),
- GFP_TEMPORARY);
- if (!eb->lut)
- return -ENOMEM;
-
- eb->and = count - 1;
+ if ((eb->args->flags & I915_EXEC_HANDLE_LUT) == 0) {
+ unsigned int size = 1 + ilog2(eb->args->buffer_count);
+
+ do {
+ eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+ GFP_TEMPORARY |
+ __GFP_NORETRY |
+ __GFP_NOWARN);
+ if (eb->buckets)
+ break;
+ } while (--size);
+
+ if (unlikely(!eb->buckets)) {
+ eb->buckets = kzalloc(sizeof(struct hlist_head),
+ GFP_TEMPORARY);
+ if (unlikely(!eb->buckets))
+ return -ENOMEM;
+ }
+
+ eb->lut_mask = size;
} else {
- eb->and = -eb->args->buffer_count;
+ eb->lut_mask = -eb->args->buffer_count;
}
return 0;
vma->exec_entry = NULL;
}
- if (eb->and >= 0)
- memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
+ if (eb->lut_mask >= 0)
+ memset(eb->buckets, 0,
+ sizeof(struct hlist_head) << eb->lut_mask);
}
-static struct i915_vma *
-eb_get_batch(struct i915_execbuffer *eb)
+static bool
+eb_add_vma(struct i915_execbuffer *eb, struct i915_vma *vma, int i)
{
- struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_link);
+ if (unlikely(vma->exec_entry)) {
+ DRM_DEBUG("Object [handle %d, index %d] appears more than once in object list\n",
+ eb->exec[i].handle, i);
+ return false;
+ }
+ list_add_tail(&vma->exec_link, &eb->vmas);
- /*
- * SNA is doing fancy tricks with compressing batch buffers, which leads
- * to negative relocation deltas. Usually that works out ok since the
- * relocate address is still positive, except when the batch is placed
- * very low in the GTT. Ensure this doesn't happen.
- *
- * Note that actual hangs have only been observed on gen7, but for
- * paranoia do it everywhere.
- */
- if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ vma->exec_entry = &eb->exec[i];
+ if (eb->lut_mask >= 0) {
+ vma->exec_handle = eb->exec[i].handle;
+ hlist_add_head(&vma->exec_node,
+ &eb->buckets[hash_32(vma->exec_handle,
+ eb->lut_mask)]);
+ }
- return vma;
+ i915_vma_get(vma);
+ __exec_to_vma(&eb->exec[i]) = (uintptr_t)vma;
+ return true;
+}
+
+static inline struct hlist_head *
+ht_head(const struct i915_gem_context *ctx, u32 handle)
+{
+ return &ctx->vma_lut.ht[hash_32(handle, ctx->vma_lut.ht_bits)];
+}
+
+static inline bool
+ht_needs_resize(const struct i915_gem_context *ctx)
+{
+ return (4*ctx->vma_lut.ht_count > 3*ctx->vma_lut.ht_size ||
+ 4*ctx->vma_lut.ht_count + 1 < ctx->vma_lut.ht_size);
}
static int
eb_lookup_vmas(struct i915_execbuffer *eb)
{
- struct drm_i915_gem_object *obj;
- struct list_head objects;
- int i, ret;
+#define INTERMEDIATE BIT(0)
+ const int count = eb->args->buffer_count;
+ struct i915_vma *vma;
+ int slow_pass = -1;
+ int i;
INIT_LIST_HEAD(&eb->vmas);
- INIT_LIST_HEAD(&objects);
+ if (unlikely(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS))
+ flush_work(&eb->ctx->vma_lut.resize);
+ GEM_BUG_ON(eb->ctx->vma_lut.ht_size & I915_CTX_RESIZE_IN_PROGRESS);
+
+ for (i = 0; i < count; i++) {
+ __exec_to_vma(&eb->exec[i]) = 0;
+
+ hlist_for_each_entry(vma,
+ ht_head(eb->ctx, eb->exec[i].handle),
+ ctx_node) {
+ if (vma->ctx_handle != eb->exec[i].handle)
+ continue;
+
+ if (!eb_add_vma(eb, vma, i))
+ return -EINVAL;
+
+ goto next_vma;
+ }
+
+ if (slow_pass < 0)
+ slow_pass = i;
+next_vma: ;
+ }
+
+ if (slow_pass < 0)
+ return 0;
+
spin_lock(&eb->file->table_lock);
/* Grab a reference to the object and release the lock so we can lookup
* or create the VMA without using GFP_ATOMIC */
- for (i = 0; i < eb->args->buffer_count; i++) {
- obj = to_intel_bo(idr_find(&eb->file->object_idr, eb->exec[i].handle));
- if (obj == NULL) {
- spin_unlock(&eb->file->table_lock);
- DRM_DEBUG("Invalid object handle %d at index %d\n",
- eb->exec[i].handle, i);
- ret = -ENOENT;
- goto err;
- }
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
- if (!list_empty(&obj->obj_exec_link)) {
+ if (__exec_to_vma(&eb->exec[i]))
+ continue;
+
+ obj = to_intel_bo(idr_find(&eb->file->object_idr,
+ eb->exec[i].handle));
+ if (unlikely(!obj)) {
spin_unlock(&eb->file->table_lock);
- DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
- obj, eb->exec[i].handle, i);
- ret = -EINVAL;
- goto err;
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
+ eb->exec[i].handle, i);
+ return -ENOENT;
}
- i915_gem_object_get(obj);
- list_add_tail(&obj->obj_exec_link, &objects);
+ __exec_to_vma(&eb->exec[i]) = INTERMEDIATE | (uintptr_t)obj;
}
spin_unlock(&eb->file->table_lock);
- i = 0;
- while (!list_empty(&objects)) {
- struct i915_vma *vma;
+ for (i = slow_pass; i < count; i++) {
+ struct drm_i915_gem_object *obj;
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
+ if ((__exec_to_vma(&eb->exec[i]) & INTERMEDIATE) == 0)
+ continue;
/*
* NOTE: We can leak any vmas created here when something fails
* from the (obj, vm) we don't run the risk of creating
* duplicated vmas for the same vm.
*/
+ obj = u64_to_ptr(struct drm_i915_gem_object,
+ __exec_to_vma(&eb->exec[i]) & ~INTERMEDIATE);
vma = i915_vma_instance(obj, eb->vm, NULL);
if (unlikely(IS_ERR(vma))) {
DRM_DEBUG("Failed to lookup VMA\n");
- ret = PTR_ERR(vma);
- goto err;
+ return PTR_ERR(vma);
}
- /* Transfer ownership from the objects list to the vmas list. */
- list_add_tail(&vma->exec_link, &eb->vmas);
- list_del_init(&obj->obj_exec_link);
-
- vma->exec_entry = &eb->exec[i];
- if (eb->and < 0) {
- eb->lut[i] = vma;
- } else {
- u32 handle =
- eb->args->flags & I915_EXEC_HANDLE_LUT ?
- i : eb->exec[i].handle;
- vma->exec_handle = handle;
- hlist_add_head(&vma->exec_node,
- &eb->buckets[handle & eb->and]);
+ /* First come, first served */
+ if (!vma->ctx) {
+ vma->ctx = eb->ctx;
+ vma->ctx_handle = eb->exec[i].handle;
+ hlist_add_head(&vma->ctx_node,
+ ht_head(eb->ctx, eb->exec[i].handle));
+ eb->ctx->vma_lut.ht_count++;
+ if (i915_vma_is_ggtt(vma)) {
+ GEM_BUG_ON(obj->vma_hashed);
+ obj->vma_hashed = vma;
+ }
}
- ++i;
+
+ if (!eb_add_vma(eb, vma, i))
+ return -EINVAL;
+ }
+
+ if (ht_needs_resize(eb->ctx)) {
+ eb->ctx->vma_lut.ht_size |= I915_CTX_RESIZE_IN_PROGRESS;
+ queue_work(system_highpri_wq, &eb->ctx->vma_lut.resize);
}
return 0;
+#undef INTERMEDIATE
+}
+static struct i915_vma *
+eb_get_batch(struct i915_execbuffer *eb)
+{
+ struct i915_vma *vma =
+ exec_to_vma(&eb->exec[eb->args->buffer_count - 1]);
-err:
- while (!list_empty(&objects)) {
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- obj_exec_link);
- list_del_init(&obj->obj_exec_link);
- i915_gem_object_put(obj);
- }
/*
- * Objects already transfered to the vmas list will be unreferenced by
- * eb_destroy.
+ * SNA is doing fancy tricks with compressing batch buffers, which leads
+ * to negative relocation deltas. Usually that works out ok since the
+ * relocate address is still positive, except when the batch is placed
+ * very low in the GTT. Ensure this doesn't happen.
+ *
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
*/
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
- return ret;
+ return vma;
}
-static struct i915_vma *eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
+static struct i915_vma *
+eb_get_vma(struct i915_execbuffer *eb, unsigned long handle)
{
- if (eb->and < 0) {
- if (handle >= -eb->and)
+ if (eb->lut_mask < 0) {
+ if (handle >= -eb->lut_mask)
return NULL;
- return eb->lut[handle];
+ return exec_to_vma(&eb->exec[handle]);
} else {
struct hlist_head *head;
struct i915_vma *vma;
- head = &eb->buckets[handle & eb->and];
+ head = &eb->buckets[hash_32(handle, eb->lut_mask)];
hlist_for_each_entry(vma, head, exec_node) {
if (vma->exec_handle == handle)
return vma;
i915_gem_context_put(eb->ctx);
- if (eb->buckets)
+ if (eb->lut_mask >= 0)
kfree(eb->buckets);
}
need_fence =
(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
needs_unfenced_map) &&
- i915_gem_object_is_tiled(obj);
+ i915_gem_object_is_tiled(vma->obj);
need_mappable = need_fence || need_reloc_mappable(vma);
if (entry->flags & EXEC_OBJECT_PINNED)