drm/i915: Allow contexts to share a single timeline across all engines
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 22 Mar 2019 09:23:25 +0000 (09:23 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 22 Mar 2019 13:12:38 +0000 (13:12 +0000)
Previously, our view has been always to run the engines independently
within a context. (Multiple engines happened before we had contexts and
timelines, so they always operated independently and that behaviour
persisted into contexts.) However, at the user level the context often
represents a single timeline (e.g. GL contexts) and userspace must
ensure that the individual engines are serialised to present that
ordering to the client (or forgot about this detail entirely and hope no
one notices - a fair ploy if the client can only directly control one
engine themselves ;)

In the next patch, we will want to construct a set of engines that
operate as one, that have a single timeline interwoven between them, to
present a single virtual engine to the user. (They submit to the virtual
engine, then we decide which engine to execute on based.)

To that end, we want to be able to create contexts which have a single
timeline (fence context) shared between all engines, rather than multiple
timelines.

v2: Move the specialised timeline ordering to its own function.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190322092325.5883-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_gem_context_types.h
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/i915_sw_fence.c
drivers/gpu/drm/i915/i915_sw_fence.h
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/selftests/mock_context.c
include/uapi/drm/i915_drm.h

index 68f9c7c7bd6c871b953a54975f0f91dc36445619..39ca3cb6af666305b9456ec5531bb4f69d228f58 100644 (file)
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
        rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
                intel_context_put(it);
 
+       if (ctx->timeline)
+               i915_timeline_put(ctx->timeline);
+
        kfree(ctx->name);
        put_pid(ctx->pid);
 
@@ -403,12 +406,16 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
 }
 
 static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv)
+i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
 {
        struct i915_gem_context *ctx;
 
        lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
+       if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+           !HAS_EXECLISTS(dev_priv))
+               return ERR_PTR(-EINVAL);
+
        /* Reap the most stale context */
        contexts_free_first(dev_priv);
 
@@ -431,6 +438,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv)
                i915_ppgtt_put(ppgtt);
        }
 
+       if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+               struct i915_timeline *timeline;
+
+               timeline = i915_timeline_create(dev_priv, NULL);
+               if (IS_ERR(timeline)) {
+                       context_close(ctx);
+                       return ERR_CAST(timeline);
+               }
+
+               ctx->timeline = timeline;
+       }
+
        trace_i915_context_create(ctx);
 
        return ctx;
@@ -459,7 +478,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
        if (ret)
                return ERR_PTR(ret);
 
-       ctx = i915_gem_create_context(to_i915(dev));
+       ctx = i915_gem_create_context(to_i915(dev), 0);
        if (IS_ERR(ctx))
                goto out;
 
@@ -495,7 +514,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
        struct i915_gem_context *ctx;
        int err;
 
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
        if (IS_ERR(ctx))
                return ctx;
 
@@ -658,7 +677,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
        idr_init_base(&file_priv->vm_idr, 1);
 
        mutex_lock(&i915->drm.struct_mutex);
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
        mutex_unlock(&i915->drm.struct_mutex);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
@@ -800,7 +819,7 @@ last_request_on_engine(struct i915_timeline *timeline,
 
        rq = i915_active_request_raw(&timeline->last_request,
                                     &engine->i915->drm.struct_mutex);
-       if (rq && rq->engine == engine) {
+       if (rq && rq->engine->mask & engine->mask) {
                GEM_TRACE("last request on engine %s: %llx:%llu\n",
                          engine->name, rq->fence.context, rq->fence.seqno);
                GEM_BUG_ON(rq->timeline != timeline);
@@ -1520,7 +1539,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
        if (ret)
                return ret;
 
-       ext_data.ctx = i915_gem_create_context(i915);
+       ext_data.ctx = i915_gem_create_context(i915, args->flags);
        mutex_unlock(&dev->struct_mutex);
        if (IS_ERR(ext_data.ctx))
                return PTR_ERR(ext_data.ctx);
index 63ae8eb219393d6217bd488393eae0dc61dd1f75..e2ec58b10fb286f340ab6c7966223571970b8d5a 100644 (file)
@@ -41,6 +41,8 @@ struct i915_gem_context {
        /** file_priv: owning file descriptor */
        struct drm_i915_file_private *file_priv;
 
+       struct i915_timeline *timeline;
+
        /**
         * @ppgtt: unique address space (GTT)
         *
index 1529824d7c613f6a8b4a142719265001f505eb21..e9c2094ab8eafa2c110f05b03682f11536049876 100644 (file)
@@ -992,6 +992,60 @@ void i915_request_skip(struct i915_request *rq, int error)
        memset(vaddr + head, 0, rq->postfix - head);
 }
 
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+       struct i915_timeline *timeline = rq->timeline;
+       struct i915_request *prev;
+
+       /*
+        * Dependency tracking and request ordering along the timeline
+        * is special cased so that we can eliminate redundant ordering
+        * operations while building the request (we know that the timeline
+        * itself is ordered, and here we guarantee it).
+        *
+        * As we know we will need to emit tracking along the timeline,
+        * we embed the hooks into our request struct -- at the cost of
+        * having to have specialised no-allocation interfaces (which will
+        * be beneficial elsewhere).
+        *
+        * A second benefit to open-coding i915_request_await_request is
+        * that we can apply a slight variant of the rules specialised
+        * for timelines that jump between engines (such as virtual engines).
+        * If we consider the case of virtual engine, we must emit a dma-fence
+        * to prevent scheduling of the second request until the first is
+        * complete (to maximise our greedy late load balancing) and this
+        * precludes optimising to use semaphores serialisation of a single
+        * timeline across engines.
+        */
+       prev = i915_active_request_raw(&timeline->last_request,
+                                      &rq->i915->drm.struct_mutex);
+       if (prev && !i915_request_completed(prev)) {
+               if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+                       i915_sw_fence_await_sw_fence(&rq->submit,
+                                                    &prev->submit,
+                                                    &rq->submitq);
+               else
+                       __i915_sw_fence_await_dma_fence(&rq->submit,
+                                                       &prev->fence,
+                                                       &rq->dmaq);
+               if (rq->engine->schedule)
+                       __i915_sched_node_add_dependency(&rq->sched,
+                                                        &prev->sched,
+                                                        &rq->dep,
+                                                        0);
+       }
+
+       spin_lock_irq(&timeline->lock);
+       list_add_tail(&rq->link, &timeline->requests);
+       spin_unlock_irq(&timeline->lock);
+
+       GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
+       __i915_active_request_set(&timeline->last_request, rq);
+
+       return prev;
+}
+
 /*
  * NB: This function is not allowed to fail. Doing so would mean the the
  * request is not being tracked for completion but the work itself is
@@ -1036,31 +1090,7 @@ void i915_request_add(struct i915_request *request)
        GEM_BUG_ON(IS_ERR(cs));
        request->postfix = intel_ring_offset(request, cs);
 
-       /*
-        * Seal the request and mark it as pending execution. Note that
-        * we may inspect this state, without holding any locks, during
-        * hangcheck. Hence we apply the barrier to ensure that we do not
-        * see a more recent value in the hws than we are tracking.
-        */
-
-       prev = i915_active_request_raw(&timeline->last_request,
-                                      &request->i915->drm.struct_mutex);
-       if (prev && !i915_request_completed(prev)) {
-               i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-                                            &request->submitq);
-               if (engine->schedule)
-                       __i915_sched_node_add_dependency(&request->sched,
-                                                        &prev->sched,
-                                                        &request->dep,
-                                                        0);
-       }
-
-       spin_lock_irq(&timeline->lock);
-       list_add_tail(&request->link, &timeline->requests);
-       spin_unlock_irq(&timeline->lock);
-
-       GEM_BUG_ON(timeline->seqno != request->fence.seqno);
-       __i915_active_request_set(&timeline->last_request, request);
+       prev = __i915_request_add_to_timeline(request);
 
        list_add_tail(&request->ring_link, &ring->request_list);
        if (list_is_first(&request->ring_link, &ring->request_list))
index 8c8fa50106440922a701936e7b6797fe2f3c59df..cd6c130964cda6d80806a8fe07835d6fc3dedfa2 100644 (file)
@@ -128,7 +128,10 @@ struct i915_request {
         * It is used by the driver to then queue the request for execution.
         */
        struct i915_sw_fence submit;
-       wait_queue_entry_t submitq;
+       union {
+               wait_queue_entry_t submitq;
+               struct i915_sw_dma_fence_cb dmaq;
+       };
        struct list_head execute_cb;
 
        /*
index 8d1400d378d7a7746ec08426d683b52e5ea066f2..5387aafd3424bc9b2309559379cedf39520d15b9 100644 (file)
@@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
        return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
 }
 
-struct i915_sw_dma_fence_cb {
-       struct dma_fence_cb base;
-       struct i915_sw_fence *fence;
-};
-
 struct i915_sw_dma_fence_cb_timer {
        struct i915_sw_dma_fence_cb base;
        struct dma_fence *dma;
@@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
        return ret;
 }
 
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+                                    struct dma_fence_cb *data)
+{
+       struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+       i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+                                   struct dma_fence *dma,
+                                   struct i915_sw_dma_fence_cb *cb)
+{
+       int ret;
+
+       debug_fence_assert(fence);
+
+       if (dma_fence_is_signaled(dma))
+               return 0;
+
+       cb->fence = fence;
+       i915_sw_fence_await(fence);
+
+       ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+       if (ret == 0) {
+               ret = 1;
+       } else {
+               i915_sw_fence_complete(fence);
+               if (ret == -ENOENT) /* fence already signaled */
+                       ret = 0;
+       }
+
+       return ret;
+}
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
                                    struct reservation_object *resv,
                                    const struct dma_fence_ops *exclude,
index 6dec9e1d11029888d11d3e250aa9644c7339430a..9cb5c3b307a6241f02c321c588f8eaba76c67c58 100644 (file)
@@ -9,14 +9,13 @@
 #ifndef _I915_SW_FENCE_H_
 #define _I915_SW_FENCE_H_
 
+#include <linux/dma-fence.h>
 #include <linux/gfp.h>
 #include <linux/kref.h>
 #include <linux/notifier.h> /* for NOTIFY_DONE */
 #include <linux/wait.h>
 
 struct completion;
-struct dma_fence;
-struct dma_fence_ops;
 struct reservation_object;
 
 struct i915_sw_fence {
@@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
                                     struct i915_sw_fence *after,
                                     gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+       struct dma_fence_cb base;
+       struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+                                   struct dma_fence *dma,
+                                   struct i915_sw_dma_fence_cb *cb);
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
                                  struct dma_fence *dma,
                                  unsigned long timeout,
                                  gfp_t gfp);
+
 int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
                                    struct reservation_object *resv,
                                    const struct dma_fence_ops *exclude,
index 35f7ef9e75c8d9a8b2288bde8315b7b554d03c5c..66bc3cd4e1669a796ae693ac1bf810a05114738d 100644 (file)
@@ -2802,7 +2802,10 @@ err_unpin_ctx:
 
 static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
 {
-       return i915_timeline_create(ctx->i915, NULL);
+       if (ctx->timeline)
+               return i915_timeline_get(ctx->timeline);
+       else
+               return i915_timeline_create(ctx->i915, NULL);
 }
 
 static int execlists_context_deferred_alloc(struct intel_context *ce,
index 81e5ace18b81e5a3556d5dd0437c5ec4dbbd9e1b..0426093bf1d9fdced22f34bbe7ae36e1991dbe39 100644 (file)
@@ -97,7 +97,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
 
        lockdep_assert_held(&i915->drm.struct_mutex);
 
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
        if (IS_ERR(ctx))
                return ctx;
 
index d45b79746fc4986a545a7f0c59bcd15748953d6d..9999f7d6a5a9bac571d846f467ee612cf3aa2bfe 100644 (file)
@@ -1456,8 +1456,9 @@ struct drm_i915_gem_context_create_ext {
        __u32 ctx_id; /* output: id of new context*/
        __u32 flags;
 #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS       (1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE      (1u << 1)
 #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-       (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+       (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
        __u64 extensions;
 };