drm/i915: Hack and slash, throttle execbuffer hogs
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 7 Feb 2019 07:18:22 +0000 (07:18 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 7 Feb 2019 16:13:21 +0000 (16:13 +0000)
Apply backpressure to hogs that emit requests faster than the GPU can
process them by waiting for their ring to be less than half-full before
proceeding with taking the struct_mutex.

This is a gross hack to apply throttling backpressure, the long term
goal is to remove the struct_mutex contention so that each client
naturally waits, preferably in an asynchronous, nonblocking fashion
(pipelined operations for the win), for their own resources and never
blocks another client within the driver at least. (Realtime priority
goals would extend to ensuring that resource contention favours high
priority clients as well.)

This patch only limits excessive request production and does not attempt
to throttle clients that block waiting for eviction (either global GTT or
system memory) or any other global resources, see above for the long term
goal.

No microbenchmarks are harmed (to the best of my knowledge).

Testcase: igt/gem_exec_schedule/pi-ringfull-*
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: John Harrison <John.C.Harrison@Intel.com>
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190207071829.5574-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.c
drivers/gpu/drm/i915/intel_ringbuffer.h

index 8eedf7cac4938f1444b2aa32e346182a038ec9a6..02adcaf6ebea69086aa07be57fed55b347636c94 100644 (file)
@@ -753,6 +753,68 @@ static int eb_select_context(struct i915_execbuffer *eb)
        return 0;
 }
 
+static struct i915_request *__eb_wait_for_ring(struct intel_ring *ring)
+{
+       struct i915_request *rq;
+
+       /*
+        * Completely unscientific finger-in-the-air estimates for suitable
+        * maximum user request size (to avoid blocking) and then backoff.
+        */
+       if (intel_ring_update_space(ring) >= PAGE_SIZE)
+               return NULL;
+
+       /*
+        * Find a request that after waiting upon, there will be at least half
+        * the ring available. The hysteresis allows us to compete for the
+        * shared ring and should mean that we sleep less often prior to
+        * claiming our resources, but not so long that the ring completely
+        * drains before we can submit our next request.
+        */
+       list_for_each_entry(rq, &ring->request_list, ring_link) {
+               if (__intel_ring_space(rq->postfix,
+                                      ring->emit, ring->size) > ring->size / 2)
+                       break;
+       }
+       if (&rq->ring_link == &ring->request_list)
+               return NULL; /* weird, we will check again later for real */
+
+       return i915_request_get(rq);
+}
+
+static int eb_wait_for_ring(const struct i915_execbuffer *eb)
+{
+       const struct intel_context *ce;
+       struct i915_request *rq;
+       int ret = 0;
+
+       /*
+        * Apply a light amount of backpressure to prevent excessive hogs
+        * from blocking waiting for space whilst holding struct_mutex and
+        * keeping all of their resources pinned.
+        */
+
+       ce = to_intel_context(eb->ctx, eb->engine);
+       if (!ce->ring) /* first use, assume empty! */
+               return 0;
+
+       rq = __eb_wait_for_ring(ce->ring);
+       if (rq) {
+               mutex_unlock(&eb->i915->drm.struct_mutex);
+
+               if (i915_request_wait(rq,
+                                     I915_WAIT_INTERRUPTIBLE,
+                                     MAX_SCHEDULE_TIMEOUT) < 0)
+                       ret = -EINTR;
+
+               i915_request_put(rq);
+
+               mutex_lock(&eb->i915->drm.struct_mutex);
+       }
+
+       return ret;
+}
+
 static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
        struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
@@ -2291,6 +2353,10 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (err)
                goto err_rpm;
 
+       err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */
+       if (unlikely(err))
+               goto err_unlock;
+
        err = eb_relocate(&eb);
        if (err) {
                /*
@@ -2435,6 +2501,7 @@ err_batch_unpin:
 err_vma:
        if (eb.exec)
                eb_release_vmas(&eb);
+err_unlock:
        mutex_unlock(&dev->struct_mutex);
 err_rpm:
        intel_runtime_pm_put(eb.i915, wakeref);
index b889b27f8aebafccfe660e759b650d23afd41e69..7f841dba87b3026893b2cb330168cb74246d5804 100644 (file)
@@ -49,19 +49,6 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
                I915_GEM_HWS_INDEX_ADDR);
 }
 
-static unsigned int __intel_ring_space(unsigned int head,
-                                      unsigned int tail,
-                                      unsigned int size)
-{
-       /*
-        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
-        * same cacheline, the Head Pointer must not be greater than the Tail
-        * Pointer."
-        */
-       GEM_BUG_ON(!is_power_of_2(size));
-       return (head - tail - CACHELINE_BYTES) & (size - 1);
-}
-
 unsigned int intel_ring_update_space(struct intel_ring *ring)
 {
        unsigned int space;
index 4d4ea6963a72d274266a62f75b03a41f7f1a7715..710ffb2217753b85026c92996f092890fa434ebf 100644 (file)
@@ -832,6 +832,18 @@ intel_ring_set_tail(struct intel_ring *ring, unsigned int tail)
        return tail;
 }
 
+static inline unsigned int
+__intel_ring_space(unsigned int head, unsigned int tail, unsigned int size)
+{
+       /*
+        * "If the Ring Buffer Head Pointer and the Tail Pointer are on the
+        * same cacheline, the Head Pointer must not be greater than the Tail
+        * Pointer."
+        */
+       GEM_BUG_ON(!is_power_of_2(size));
+       return (head - tail - CACHELINE_BYTES) & (size - 1);
+}
+
 void intel_engine_write_global_seqno(struct intel_engine_cs *engine, u32 seqno);
 
 int intel_engine_setup_common(struct intel_engine_cs *engine);