i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
# GEM code
-i915-y += i915_cmd_parser.o \
+i915-y += \
+ i915_active.o \
+ i915_cmd_parser.o \
i915_gem_batch_pool.o \
i915_gem_clflush.o \
i915_gem_context.o \
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_active.h"
+
+#define BKL(ref) (&(ref)->i915->drm.struct_mutex)
+
+struct active_node {
+ struct i915_gem_active base;
+ struct i915_active *ref;
+ struct rb_node node;
+ u64 timeline;
+};
+
+static void
+__active_retire(struct i915_active *ref)
+{
+ GEM_BUG_ON(!ref->count);
+ if (!--ref->count)
+ ref->retire(ref);
+}
+
+static void
+node_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __active_retire(container_of(base, struct active_node, base)->ref);
+}
+
+static void
+last_retire(struct i915_gem_active *base, struct i915_request *rq)
+{
+ __active_retire(container_of(base, struct i915_active, last));
+}
+
+static struct i915_gem_active *
+active_instance(struct i915_active *ref, u64 idx)
+{
+ struct active_node *node;
+ struct rb_node **p, *parent;
+ struct i915_request *old;
+
+ /*
+ * We track the most recently used timeline to skip a rbtree search
+ * for the common case, under typical loads we never need the rbtree
+ * at all. We can reuse the last slot if it is empty, that is
+ * after the previous activity has been retired, or if it matches the
+ * current timeline.
+ *
+ * Note that we allow the timeline to be active simultaneously in
+ * the rbtree and the last cache. We do this to avoid having
+ * to search and replace the rbtree element for a new timeline, with
+ * the cost being that we must be aware that the ref may be retired
+ * twice for the same timeline (as the older rbtree element will be
+ * retired before the new request added to last).
+ */
+ old = i915_gem_active_raw(&ref->last, BKL(ref));
+ if (!old || old->fence.context == idx)
+ goto out;
+
+ /* Move the currently active fence into the rbtree */
+ idx = old->fence.context;
+
+ parent = NULL;
+ p = &ref->tree.rb_node;
+ while (*p) {
+ parent = *p;
+
+ node = rb_entry(parent, struct active_node, node);
+ if (node->timeline == idx)
+ goto replace;
+
+ if (node->timeline < idx)
+ p = &parent->rb_right;
+ else
+ p = &parent->rb_left;
+ }
+
+ node = kmalloc(sizeof(*node), GFP_KERNEL);
+
+ /* kmalloc may retire the ref->last (thanks shrinker)! */
+ if (unlikely(!i915_gem_active_raw(&ref->last, BKL(ref)))) {
+ kfree(node);
+ goto out;
+ }
+
+ if (unlikely(!node))
+ return ERR_PTR(-ENOMEM);
+
+ init_request_active(&node->base, node_retire);
+ node->ref = ref;
+ node->timeline = idx;
+
+ rb_link_node(&node->node, parent, p);
+ rb_insert_color(&node->node, &ref->tree);
+
+replace:
+ /*
+ * Overwrite the previous active slot in the rbtree with last,
+ * leaving last zeroed. If the previous slot is still active,
+ * we must be careful as we now only expect to receive one retire
+ * callback not two, and so much undo the active counting for the
+ * overwritten slot.
+ */
+ if (i915_gem_active_isset(&node->base)) {
+ /* Retire ourselves from the old rq->active_list */
+ __list_del_entry(&node->base.link);
+ ref->count--;
+ GEM_BUG_ON(!ref->count);
+ }
+ GEM_BUG_ON(list_empty(&ref->last.link));
+ list_replace_init(&ref->last.link, &node->base.link);
+ node->base.request = fetch_and_zero(&ref->last.request);
+
+out:
+ return &ref->last;
+}
+
+void i915_active_init(struct drm_i915_private *i915,
+ struct i915_active *ref,
+ void (*retire)(struct i915_active *ref))
+{
+ ref->i915 = i915;
+ ref->retire = retire;
+ ref->tree = RB_ROOT;
+ init_request_active(&ref->last, last_retire);
+ ref->count = 0;
+}
+
+int i915_active_ref(struct i915_active *ref,
+ u64 timeline,
+ struct i915_request *rq)
+{
+ struct i915_gem_active *active;
+
+ active = active_instance(ref, timeline);
+ if (IS_ERR(active))
+ return PTR_ERR(active);
+
+ if (!i915_gem_active_isset(active))
+ ref->count++;
+ i915_gem_active_set(active, rq);
+
+ GEM_BUG_ON(!ref->count);
+ return 0;
+}
+
+bool i915_active_acquire(struct i915_active *ref)
+{
+ lockdep_assert_held(BKL(ref));
+ return !ref->count++;
+}
+
+void i915_active_release(struct i915_active *ref)
+{
+ lockdep_assert_held(BKL(ref));
+ __active_retire(ref);
+}
+
+int i915_active_wait(struct i915_active *ref)
+{
+ struct active_node *it, *n;
+ int ret = 0;
+
+ if (i915_active_acquire(ref))
+ goto out_release;
+
+ ret = i915_gem_active_retire(&ref->last, BKL(ref));
+ if (ret)
+ goto out_release;
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ ret = i915_gem_active_retire(&it->base, BKL(ref));
+ if (ret)
+ break;
+ }
+
+out_release:
+ i915_active_release(ref);
+ return ret;
+}
+
+static int __i915_request_await_active(struct i915_request *rq,
+ struct i915_gem_active *active)
+{
+ struct i915_request *barrier =
+ i915_gem_active_raw(active, &rq->i915->drm.struct_mutex);
+
+ return barrier ? i915_request_await_dma_fence(rq, &barrier->fence) : 0;
+}
+
+int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
+{
+ struct active_node *it, *n;
+ int ret;
+
+ ret = __i915_request_await_active(rq, &ref->last);
+ if (ret)
+ return ret;
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ ret = __i915_request_await_active(rq, &it->base);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+void i915_active_fini(struct i915_active *ref)
+{
+ struct active_node *it, *n;
+
+ GEM_BUG_ON(i915_gem_active_isset(&ref->last));
+
+ rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
+ GEM_BUG_ON(i915_gem_active_isset(&it->base));
+ kfree(it);
+ }
+ ref->tree = RB_ROOT;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_active.c"
+#endif
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_H_
+#define _I915_ACTIVE_H_
+
+#include "i915_active_types.h"
+
+/*
+ * GPU activity tracking
+ *
+ * Each set of commands submitted to the GPU compromises a single request that
+ * signals a fence upon completion. struct i915_request combines the
+ * command submission, scheduling and fence signaling roles. If we want to see
+ * if a particular task is complete, we need to grab the fence (struct
+ * i915_request) for that task and check or wait for it to be signaled. More
+ * often though we want to track the status of a bunch of tasks, for example
+ * to wait for the GPU to finish accessing some memory across a variety of
+ * different command pipelines from different clients. We could choose to
+ * track every single request associated with the task, but knowing that
+ * each request belongs to an ordered timeline (later requests within a
+ * timeline must wait for earlier requests), we need only track the
+ * latest request in each timeline to determine the overall status of the
+ * task.
+ *
+ * struct i915_active provides this tracking across timelines. It builds a
+ * composite shared-fence, and is updated as new work is submitted to the task,
+ * forming a snapshot of the current status. It should be embedded into the
+ * different resources that need to track their associated GPU activity to
+ * provide a callback when that GPU activity has ceased, or otherwise to
+ * provide a serialisation point either for request submission or for CPU
+ * synchronisation.
+ */
+
+void i915_active_init(struct drm_i915_private *i915,
+ struct i915_active *ref,
+ void (*retire)(struct i915_active *ref));
+
+int i915_active_ref(struct i915_active *ref,
+ u64 timeline,
+ struct i915_request *rq);
+
+int i915_active_wait(struct i915_active *ref);
+
+int i915_request_await_active(struct i915_request *rq,
+ struct i915_active *ref);
+
+bool i915_active_acquire(struct i915_active *ref);
+
+static inline void i915_active_cancel(struct i915_active *ref)
+{
+ GEM_BUG_ON(ref->count != 1);
+ ref->count = 0;
+}
+
+void i915_active_release(struct i915_active *ref);
+
+static inline bool
+i915_active_is_idle(const struct i915_active *ref)
+{
+ return !ref->count;
+}
+
+void i915_active_fini(struct i915_active *ref);
+
+#endif /* _I915_ACTIVE_H_ */
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef _I915_ACTIVE_TYPES_H_
+#define _I915_ACTIVE_TYPES_H_
+
+#include <linux/rbtree.h>
+
+#include "i915_request.h"
+
+struct drm_i915_private;
+
+struct i915_active {
+ struct drm_i915_private *i915;
+
+ struct rb_root tree;
+ struct i915_gem_active last;
+ unsigned int count;
+
+ void (*retire)(struct i915_active *ref);
+};
+
+#endif /* _I915_ACTIVE_TYPES_H_ */
if (!vma)
return ERR_PTR(-ENOMEM);
+ i915_active_init(i915, &vma->active, NULL);
init_request_active(&vma->last_fence, NULL);
vma->vm = &ggtt->vm;
vma->ops = &pd_vma_ops;
vma->private = ppgtt;
- vma->active = RB_ROOT;
-
vma->size = size;
vma->fence_size = size;
vma->flags = I915_VMA_GGTT;
#endif
-struct i915_vma_active {
- struct i915_gem_active base;
- struct i915_vma *vma;
- struct rb_node node;
- u64 timeline;
-};
+static void obj_bump_mru(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
-static void
-__i915_vma_retire(struct i915_vma *vma, struct i915_request *rq)
+ spin_lock(&i915->mm.obj_lock);
+ if (obj->bind_count)
+ list_move_tail(&obj->mm.link, &i915->mm.bound_list);
+ spin_unlock(&i915->mm.obj_lock);
+
+ obj->mm.dirty = true; /* be paranoid */
+}
+
+static void __i915_vma_retire(struct i915_active *ref)
{
+ struct i915_vma *vma = container_of(ref, typeof(*vma), active);
struct drm_i915_gem_object *obj = vma->obj;
- GEM_BUG_ON(!i915_vma_is_active(vma));
- if (--vma->active_count)
- return;
-
GEM_BUG_ON(!i915_gem_object_is_active(obj));
if (--obj->active_count)
return;
reservation_object_unlock(obj->resv);
}
- /* Bump our place on the bound list to keep it roughly in LRU order
+ /*
+ * Bump our place on the bound list to keep it roughly in LRU order
* so that we don't steal from recently used but inactive objects
* (unless we are forced to ofc!)
*/
- spin_lock(&rq->i915->mm.obj_lock);
- if (obj->bind_count)
- list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list);
- spin_unlock(&rq->i915->mm.obj_lock);
-
- obj->mm.dirty = true; /* be paranoid */
+ obj_bump_mru(obj);
if (i915_gem_object_has_active_reference(obj)) {
i915_gem_object_clear_active_reference(obj);
}
}
-static void
-i915_vma_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- struct i915_vma_active *active =
- container_of(base, typeof(*active), base);
-
- __i915_vma_retire(active->vma, rq);
-}
-
-static void
-i915_vma_last_retire(struct i915_gem_active *base, struct i915_request *rq)
-{
- __i915_vma_retire(container_of(base, struct i915_vma, last_active), rq);
-}
-
static struct i915_vma *
vma_create(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
if (vma == NULL)
return ERR_PTR(-ENOMEM);
- vma->active = RB_ROOT;
-
- init_request_active(&vma->last_active, i915_vma_last_retire);
+ i915_active_init(vm->i915, &vma->active, __i915_vma_retire);
init_request_active(&vma->last_fence, NULL);
+
vma->vm = vm;
vma->ops = &vm->vma_ops;
vma->obj = obj;
static void __i915_vma_destroy(struct i915_vma *vma)
{
struct drm_i915_private *i915 = vma->vm->i915;
- struct i915_vma_active *iter, *n;
GEM_BUG_ON(vma->node.allocated);
GEM_BUG_ON(vma->fence);
spin_unlock(&obj->vma.lock);
}
- rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) {
- GEM_BUG_ON(i915_gem_active_isset(&iter->base));
- kfree(iter);
- }
+ i915_active_fini(&vma->active);
kmem_cache_free(i915->vmas, vma);
}
reservation_object_unlock(resv);
}
-static struct i915_gem_active *active_instance(struct i915_vma *vma, u64 idx)
-{
- struct i915_vma_active *active;
- struct rb_node **p, *parent;
- struct i915_request *old;
-
- /*
- * We track the most recently used timeline to skip a rbtree search
- * for the common case, under typical loads we never need the rbtree
- * at all. We can reuse the last_active slot if it is empty, that is
- * after the previous activity has been retired, or if the active
- * matches the current timeline.
- *
- * Note that we allow the timeline to be active simultaneously in
- * the rbtree and the last_active cache. We do this to avoid having
- * to search and replace the rbtree element for a new timeline, with
- * the cost being that we must be aware that the vma may be retired
- * twice for the same timeline (as the older rbtree element will be
- * retired before the new request added to last_active).
- */
- old = i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
- if (!old || old->fence.context == idx)
- goto out;
-
- /* Move the currently active fence into the rbtree */
- idx = old->fence.context;
-
- parent = NULL;
- p = &vma->active.rb_node;
- while (*p) {
- parent = *p;
-
- active = rb_entry(parent, struct i915_vma_active, node);
- if (active->timeline == idx)
- goto replace;
-
- if (active->timeline < idx)
- p = &parent->rb_right;
- else
- p = &parent->rb_left;
- }
-
- active = kmalloc(sizeof(*active), GFP_KERNEL);
-
- /* kmalloc may retire the vma->last_active request (thanks shrinker)! */
- if (unlikely(!i915_gem_active_raw(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex))) {
- kfree(active);
- goto out;
- }
-
- if (unlikely(!active))
- return ERR_PTR(-ENOMEM);
-
- init_request_active(&active->base, i915_vma_retire);
- active->vma = vma;
- active->timeline = idx;
-
- rb_link_node(&active->node, parent, p);
- rb_insert_color(&active->node, &vma->active);
-
-replace:
- /*
- * Overwrite the previous active slot in the rbtree with last_active,
- * leaving last_active zeroed. If the previous slot is still active,
- * we must be careful as we now only expect to receive one retire
- * callback not two, and so much undo the active counting for the
- * overwritten slot.
- */
- if (i915_gem_active_isset(&active->base)) {
- /* Retire ourselves from the old rq->active_list */
- __list_del_entry(&active->base.link);
- vma->active_count--;
- GEM_BUG_ON(!vma->active_count);
- }
- GEM_BUG_ON(list_empty(&vma->last_active.link));
- list_replace_init(&vma->last_active.link, &active->base.link);
- active->base.request = fetch_and_zero(&vma->last_active.request);
-
-out:
- return &vma->last_active;
-}
-
int i915_vma_move_to_active(struct i915_vma *vma,
struct i915_request *rq,
unsigned int flags)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct i915_gem_active *active;
lockdep_assert_held(&rq->i915->drm.struct_mutex);
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
- active = active_instance(vma, rq->fence.context);
- if (IS_ERR(active))
- return PTR_ERR(active);
-
/*
* Add a reference if we're newly entering the active list.
* The order in which we add operations to the retirement queue is
* add the active reference first and queue for it to be dropped
* *last*.
*/
- if (!i915_gem_active_isset(active) && !vma->active_count++)
+ if (!vma->active.count)
obj->active_count++;
- i915_gem_active_set(active, rq);
+
+ if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
+ if (!vma->active.count)
+ obj->active_count--;
+ return -ENOMEM;
+ }
+
GEM_BUG_ON(!i915_vma_is_active(vma));
GEM_BUG_ON(!obj->active_count);
*/
might_sleep();
if (i915_vma_is_active(vma)) {
- struct i915_vma_active *active, *n;
-
/*
* When a closed VMA is retired, it is unbound - eek.
* In order to prevent it from being recursively closed,
*/
__i915_vma_pin(vma);
- ret = i915_gem_active_retire(&vma->last_active,
- &vma->vm->i915->drm.struct_mutex);
+ ret = i915_active_wait(&vma->active);
if (ret)
goto unpin;
- rbtree_postorder_for_each_entry_safe(active, n,
- &vma->active, node) {
- ret = i915_gem_active_retire(&active->base,
- &vma->vm->i915->drm.struct_mutex);
- if (ret)
- goto unpin;
- }
-
ret = i915_gem_active_retire(&vma->last_fence,
&vma->vm->i915->drm.struct_mutex);
unpin:
#include "i915_gem_fence_reg.h"
#include "i915_gem_object.h"
+#include "i915_active.h"
#include "i915_request.h"
enum i915_cache_level;
#define I915_VMA_USERFAULT BIT(I915_VMA_USERFAULT_BIT)
#define I915_VMA_GGTT_WRITE BIT(15)
- unsigned int active_count;
- struct rb_root active;
- struct i915_gem_active last_active;
+ struct i915_active active;
struct i915_gem_active last_fence;
/**
void i915_vma_unpin_and_release(struct i915_vma **p_vma, unsigned int flags);
#define I915_VMA_RELEASE_MAP BIT(0)
-static inline bool i915_vma_is_active(struct i915_vma *vma)
+static inline bool i915_vma_is_active(const struct i915_vma *vma)
{
- return vma->active_count;
+ return !i915_active_is_idle(&vma->active);
}
int __must_check i915_vma_move_to_active(struct i915_vma *vma,
--- /dev/null
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+
+#include "igt_flush_test.h"
+#include "lib_sw_fence.h"
+
+struct live_active {
+ struct i915_active base;
+ bool retired;
+};
+
+static void __live_active_retire(struct i915_active *base)
+{
+ struct live_active *active = container_of(base, typeof(*active), base);
+
+ active->retired = true;
+}
+
+static int __live_active_setup(struct drm_i915_private *i915,
+ struct live_active *active)
+{
+ struct intel_engine_cs *engine;
+ struct i915_sw_fence *submit;
+ enum intel_engine_id id;
+ unsigned int count = 0;
+ int err = 0;
+
+ submit = heap_fence_create(GFP_KERNEL);
+ if (!submit)
+ return -ENOMEM;
+
+ i915_active_init(i915, &active->base, __live_active_retire);
+ active->retired = false;
+
+ if (!i915_active_acquire(&active->base)) {
+ pr_err("First i915_active_acquire should report being idle\n");
+ err = -EINVAL;
+ goto out;
+ }
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *rq;
+
+ rq = i915_request_alloc(engine, i915->kernel_context);
+ if (IS_ERR(rq)) {
+ err = PTR_ERR(rq);
+ break;
+ }
+
+ err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
+ submit,
+ GFP_KERNEL);
+ if (err >= 0)
+ err = i915_active_ref(&active->base,
+ rq->fence.context, rq);
+ i915_request_add(rq);
+ if (err) {
+ pr_err("Failed to track active ref!\n");
+ break;
+ }
+
+ count++;
+ }
+
+ i915_active_release(&active->base);
+ if (active->retired && count) {
+ pr_err("i915_active retired before submission!\n");
+ err = -EINVAL;
+ }
+ if (active->base.count != count) {
+ pr_err("i915_active not tracking all requests, found %d, expected %d\n",
+ active->base.count, count);
+ err = -EINVAL;
+ }
+
+out:
+ i915_sw_fence_commit(submit);
+ heap_fence_put(submit);
+
+ return err;
+}
+
+static int live_active_wait(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct live_active active;
+ intel_wakeref_t wakeref;
+ int err;
+
+ /* Check that we get a callback when requests retire upon waiting */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ err = __live_active_setup(i915, &active);
+
+ i915_active_wait(&active.base);
+ if (!active.retired) {
+ pr_err("i915_active not retired after waiting!\n");
+ err = -EINVAL;
+ }
+
+ i915_active_fini(&active.base);
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+static int live_active_retire(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct live_active active;
+ intel_wakeref_t wakeref;
+ int err;
+
+ /* Check that we get a callback when requests are indirectly retired */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ err = __live_active_setup(i915, &active);
+
+ /* waits for & retires all requests */
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+
+ if (!active.retired) {
+ pr_err("i915_active not retired after flushing!\n");
+ err = -EINVAL;
+ }
+
+ i915_active_fini(&active.base);
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
+int i915_active_live_selftests(struct drm_i915_private *i915)
+{
+ static const struct i915_subtest tests[] = {
+ SUBTEST(live_active_wait),
+ SUBTEST(live_active_retire),
+ };
+
+ if (i915_terminally_wedged(&i915->gpu_error))
+ return 0;
+
+ return i915_subtests(tests, i915);
+}
selftest(sanitycheck, i915_live_sanitycheck) /* keep first (igt selfcheck) */
selftest(uncore, intel_uncore_live_selftests)
selftest(workarounds, intel_workarounds_live_selftests)
-selftest(requests, i915_request_live_selftests)
selftest(timelines, i915_timeline_live_selftests)
+selftest(requests, i915_request_live_selftests)
+selftest(active, i915_active_live_selftests)
selftest(objects, i915_gem_object_live_selftests)
selftest(dmabuf, i915_gem_dmabuf_live_selftests)
selftest(coherency, i915_gem_coherency_live_selftests)