drm/i915: Allow contexts to share a single timeline across all engines

author Chris Wilson <chris@chris-wilson.co.uk>

Fri, 22 Mar 2019 09:23:25 +0000 (09:23 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Fri, 22 Mar 2019 13:12:38 +0000 (13:12 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Fri, 22 Mar 2019 09:23:25 +0000 (09:23 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Fri, 22 Mar 2019 13:12:38 +0000 (13:12 +0000)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c

index 68f9c7c7bd6c871b953a54975f0f91dc36445619..39ca3cb6af666305b9456ec5531bb4f69d228f58 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -238,6 +238,9 @@ static void i915_gem_context_free(struct i915_gem_context *ctx)
         rbtree_postorder_for_each_entry_safe(it, n, &ctx->hw_contexts, node)
                 intel_context_put(it);
  
+       if (ctx->timeline)
+               i915_timeline_put(ctx->timeline);
+
         kfree(ctx->name);
         put_pid(ctx->pid);
  
@@ -403,12 +406,16 @@ static void __assign_ppgtt(struct i915_gem_context *ctx,
  }
  
  static struct i915_gem_context *
-i915_gem_create_context(struct drm_i915_private *dev_priv)
+i915_gem_create_context(struct drm_i915_private *dev_priv, unsigned int flags)
  {
         struct i915_gem_context *ctx;
  
         lockdep_assert_held(&dev_priv->drm.struct_mutex);
  
+       if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE &&
+           !HAS_EXECLISTS(dev_priv))
+               return ERR_PTR(-EINVAL);
+
         /* Reap the most stale context */
         contexts_free_first(dev_priv);
  
@@ -431,6 +438,18 @@ i915_gem_create_context(struct drm_i915_private *dev_priv)
                 i915_ppgtt_put(ppgtt);
         }
  
+       if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+               struct i915_timeline *timeline;
+
+               timeline = i915_timeline_create(dev_priv, NULL);
+               if (IS_ERR(timeline)) {
+                       context_close(ctx);
+                       return ERR_CAST(timeline);
+               }
+
+               ctx->timeline = timeline;
+       }
+
         trace_i915_context_create(ctx);
  
         return ctx;
@@ -459,7 +478,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
         if (ret)
                 return ERR_PTR(ret);
  
-       ctx = i915_gem_create_context(to_i915(dev));
+       ctx = i915_gem_create_context(to_i915(dev), 0);
         if (IS_ERR(ctx))
                 goto out;
  
@@ -495,7 +514,7 @@ i915_gem_context_create_kernel(struct drm_i915_private *i915, int prio)
         struct i915_gem_context *ctx;
         int err;
  
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
         if (IS_ERR(ctx))
                 return ctx;
  
@@ -658,7 +677,7 @@ int i915_gem_context_open(struct drm_i915_private *i915,
         idr_init_base(&file_priv->vm_idr, 1);
  
         mutex_lock(&i915->drm.struct_mutex);
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
         mutex_unlock(&i915->drm.struct_mutex);
         if (IS_ERR(ctx)) {
                 err = PTR_ERR(ctx);
@@ -800,7 +819,7 @@ last_request_on_engine(struct i915_timeline *timeline,
  
         rq = i915_active_request_raw(&timeline->last_request,
                                      &engine->i915->drm.struct_mutex);
-       if (rq && rq->engine == engine) {
+       if (rq && rq->engine->mask & engine->mask) {
                 GEM_TRACE("last request on engine %s: %llx:%llu\n",
                           engine->name, rq->fence.context, rq->fence.seqno);
                 GEM_BUG_ON(rq->timeline != timeline);
@@ -1520,7 +1539,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
         if (ret)
                 return ret;
  
-       ext_data.ctx = i915_gem_create_context(i915);
+       ext_data.ctx = i915_gem_create_context(i915, args->flags);
         mutex_unlock(&dev->struct_mutex);
         if (IS_ERR(ext_data.ctx))
                 return PTR_ERR(ext_data.ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_context_types.h b/drivers/gpu/drm/i915/i915_gem_context_types.h

index 63ae8eb219393d6217bd488393eae0dc61dd1f75..e2ec58b10fb286f340ab6c7966223571970b8d5a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_context_types.h
+++ b/drivers/gpu/drm/i915/i915_gem_context_types.h
@@ -41,6 +41,8 @@ struct i915_gem_context {
         /** file_priv: owning file descriptor */
         struct drm_i915_file_private *file_priv;
  
+       struct i915_timeline *timeline;
+
         /**
          * @ppgtt: unique address space (GTT)
          *
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c

index 1529824d7c613f6a8b4a142719265001f505eb21..e9c2094ab8eafa2c110f05b03682f11536049876 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -992,6 +992,60 @@ void i915_request_skip(struct i915_request *rq, int error)
         memset(vaddr + head, 0, rq->postfix - head);
  }
  
+static struct i915_request *
+__i915_request_add_to_timeline(struct i915_request *rq)
+{
+       struct i915_timeline *timeline = rq->timeline;
+       struct i915_request *prev;
+
+       /*
+        * Dependency tracking and request ordering along the timeline
+        * is special cased so that we can eliminate redundant ordering
+        * operations while building the request (we know that the timeline
+        * itself is ordered, and here we guarantee it).
+        *
+        * As we know we will need to emit tracking along the timeline,
+        * we embed the hooks into our request struct -- at the cost of
+        * having to have specialised no-allocation interfaces (which will
+        * be beneficial elsewhere).
+        *
+        * A second benefit to open-coding i915_request_await_request is
+        * that we can apply a slight variant of the rules specialised
+        * for timelines that jump between engines (such as virtual engines).
+        * If we consider the case of virtual engine, we must emit a dma-fence
+        * to prevent scheduling of the second request until the first is
+        * complete (to maximise our greedy late load balancing) and this
+        * precludes optimising to use semaphores serialisation of a single
+        * timeline across engines.
+        */
+       prev = i915_active_request_raw(&timeline->last_request,
+                                      &rq->i915->drm.struct_mutex);
+       if (prev && !i915_request_completed(prev)) {
+               if (is_power_of_2(prev->engine->mask | rq->engine->mask))
+                       i915_sw_fence_await_sw_fence(&rq->submit,
+                                                    &prev->submit,
+                                                    &rq->submitq);
+               else
+                       __i915_sw_fence_await_dma_fence(&rq->submit,
+                                                       &prev->fence,
+                                                       &rq->dmaq);
+               if (rq->engine->schedule)
+                       __i915_sched_node_add_dependency(&rq->sched,
+                                                        &prev->sched,
+                                                        &rq->dep,
+                                                        0);
+       }
+
+       spin_lock_irq(&timeline->lock);
+       list_add_tail(&rq->link, &timeline->requests);
+       spin_unlock_irq(&timeline->lock);
+
+       GEM_BUG_ON(timeline->seqno != rq->fence.seqno);
+       __i915_active_request_set(&timeline->last_request, rq);
+
+       return prev;
+}
+
  /*
   * NB: This function is not allowed to fail. Doing so would mean the the
   * request is not being tracked for completion but the work itself is
@@ -1036,31 +1090,7 @@ void i915_request_add(struct i915_request *request)
         GEM_BUG_ON(IS_ERR(cs));
         request->postfix = intel_ring_offset(request, cs);
  
-       /*
-        * Seal the request and mark it as pending execution. Note that
-        * we may inspect this state, without holding any locks, during
-        * hangcheck. Hence we apply the barrier to ensure that we do not
-        * see a more recent value in the hws than we are tracking.
-        */
-
-       prev = i915_active_request_raw(&timeline->last_request,
-                                      &request->i915->drm.struct_mutex);
-       if (prev && !i915_request_completed(prev)) {
-               i915_sw_fence_await_sw_fence(&request->submit, &prev->submit,
-                                            &request->submitq);
-               if (engine->schedule)
-                       __i915_sched_node_add_dependency(&request->sched,
-                                                        &prev->sched,
-                                                        &request->dep,
-                                                        0);
-       }
-
-       spin_lock_irq(&timeline->lock);
-       list_add_tail(&request->link, &timeline->requests);
-       spin_unlock_irq(&timeline->lock);
-
-       GEM_BUG_ON(timeline->seqno != request->fence.seqno);
-       __i915_active_request_set(&timeline->last_request, request);
+       prev = __i915_request_add_to_timeline(request);
  
         list_add_tail(&request->ring_link, &ring->request_list);
         if (list_is_first(&request->ring_link, &ring->request_list))
diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h

index 8c8fa50106440922a701936e7b6797fe2f3c59df..cd6c130964cda6d80806a8fe07835d6fc3dedfa2 100644 (file)
--- a/drivers/gpu/drm/i915/i915_request.h
+++ b/drivers/gpu/drm/i915/i915_request.h
@@ -128,7 +128,10 @@ struct i915_request {
          * It is used by the driver to then queue the request for execution.
          */
         struct i915_sw_fence submit;
-       wait_queue_entry_t submitq;
+       union {
+               wait_queue_entry_t submitq;
+               struct i915_sw_dma_fence_cb dmaq;
+       };
         struct list_head execute_cb;
  
         /*
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c

index 8d1400d378d7a7746ec08426d683b52e5ea066f2..5387aafd3424bc9b2309559379cedf39520d15b9 100644 (file)
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -359,11 +359,6 @@ int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
         return __i915_sw_fence_await_sw_fence(fence, signaler, NULL, gfp);
  }
  
-struct i915_sw_dma_fence_cb {
-       struct dma_fence_cb base;
-       struct i915_sw_fence *fence;
-};
-
  struct i915_sw_dma_fence_cb_timer {
         struct i915_sw_dma_fence_cb base;
         struct dma_fence *dma;
@@ -480,6 +475,40 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
         return ret;
  }
  
+static void __dma_i915_sw_fence_wake(struct dma_fence *dma,
+                                    struct dma_fence_cb *data)
+{
+       struct i915_sw_dma_fence_cb *cb = container_of(data, typeof(*cb), base);
+
+       i915_sw_fence_complete(cb->fence);
+}
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+                                   struct dma_fence *dma,
+                                   struct i915_sw_dma_fence_cb *cb)
+{
+       int ret;
+
+       debug_fence_assert(fence);
+
+       if (dma_fence_is_signaled(dma))
+               return 0;
+
+       cb->fence = fence;
+       i915_sw_fence_await(fence);
+
+       ret = dma_fence_add_callback(dma, &cb->base, __dma_i915_sw_fence_wake);
+       if (ret == 0) {
+               ret = 1;
+       } else {
+               i915_sw_fence_complete(fence);
+               if (ret == -ENOENT) /* fence already signaled */
+                       ret = 0;
+       }
+
+       return ret;
+}
+
  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
                                     struct reservation_object *resv,
                                     const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.h b/drivers/gpu/drm/i915/i915_sw_fence.h

index 6dec9e1d11029888d11d3e250aa9644c7339430a..9cb5c3b307a6241f02c321c588f8eaba76c67c58 100644 (file)
--- a/drivers/gpu/drm/i915/i915_sw_fence.h
+++ b/drivers/gpu/drm/i915/i915_sw_fence.h
@@ -9,14 +9,13 @@
  #ifndef _I915_SW_FENCE_H_
  #define _I915_SW_FENCE_H_
  
+#include <linux/dma-fence.h>
  #include <linux/gfp.h>
  #include <linux/kref.h>
  #include <linux/notifier.h> /* for NOTIFY_DONE */
  #include <linux/wait.h>
  
  struct completion;
-struct dma_fence;
-struct dma_fence_ops;
  struct reservation_object;
  
  struct i915_sw_fence {
@@ -68,10 +67,20 @@ int i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
  int i915_sw_fence_await_sw_fence_gfp(struct i915_sw_fence *fence,
                                      struct i915_sw_fence *after,
                                      gfp_t gfp);
+
+struct i915_sw_dma_fence_cb {
+       struct dma_fence_cb base;
+       struct i915_sw_fence *fence;
+};
+
+int __i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
+                                   struct dma_fence *dma,
+                                   struct i915_sw_dma_fence_cb *cb);
  int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
                                   struct dma_fence *dma,
                                   unsigned long timeout,
                                   gfp_t gfp);
+
  int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
                                     struct reservation_object *resv,
                                     const struct dma_fence_ops *exclude,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 35f7ef9e75c8d9a8b2288bde8315b7b554d03c5c..66bc3cd4e1669a796ae693ac1bf810a05114738d 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2802,7 +2802,10 @@ err_unpin_ctx:
  
  static struct i915_timeline *get_timeline(struct i915_gem_context *ctx)
  {
-       return i915_timeline_create(ctx->i915, NULL);
+       if (ctx->timeline)
+               return i915_timeline_get(ctx->timeline);
+       else
+               return i915_timeline_create(ctx->i915, NULL);
  }
  
  static int execlists_context_deferred_alloc(struct intel_context *ce,
diff --git a/drivers/gpu/drm/i915/selftests/mock_context.c b/drivers/gpu/drm/i915/selftests/mock_context.c

index 81e5ace18b81e5a3556d5dd0437c5ec4dbbd9e1b..0426093bf1d9fdced22f34bbe7ae36e1991dbe39 100644 (file)
--- a/drivers/gpu/drm/i915/selftests/mock_context.c
+++ b/drivers/gpu/drm/i915/selftests/mock_context.c
@@ -97,7 +97,7 @@ live_context(struct drm_i915_private *i915, struct drm_file *file)
  
         lockdep_assert_held(&i915->drm.struct_mutex);
  
-       ctx = i915_gem_create_context(i915);
+       ctx = i915_gem_create_context(i915, 0);
         if (IS_ERR(ctx))
                 return ctx;
  
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index d45b79746fc4986a545a7f0c59bcd15748953d6d..9999f7d6a5a9bac571d846f467ee612cf3aa2bfe 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,8 +1456,9 @@ struct drm_i915_gem_context_create_ext {
         __u32 ctx_id; /* output: id of new context*/
         __u32 flags;
  #define I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS       (1u << 0)
+#define I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE      (1u << 1)
  #define I915_CONTEXT_CREATE_FLAGS_UNKNOWN \
-       (-(I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS << 1))
+       (-(I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE << 1))
         __u64 extensions;
  };
author	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 22 Mar 2019 09:23:25 +0000 (09:23 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Fri, 22 Mar 2019 13:12:38 +0000 (13:12 +0000)
drivers/gpu/drm/i915/i915_gem_context.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_context_types.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_sw_fence.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_sw_fence.h		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/selftests/mock_context.c		patch \| blob \| history
include/uapi/drm/i915_drm.h		patch \| blob \| history