drm/i915: Apply i915_request_skip() on submission
authorChris Wilson <chris@chris-wilson.co.uk>
Wed, 4 Mar 2020 12:18:48 +0000 (12:18 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Wed, 4 Mar 2020 14:29:50 +0000 (14:29 +0000)
Trying to use i915_request_skip() prior to i915_request_add() causes us
to try and fill the ring upto request->postfix, which has not yet been
set, and so may cause us to memset() past the end of the ring.

Instead of skipping the request immediately, just flag the error on the
request (only accepting the first fatal error we see) and then clear the
request upon submission.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200304121849.2448028-1-chris@chris-wilson.co.uk
14 files changed:
drivers/gpu/drm/i915/gem/i915_gem_client_blt.c
drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
drivers/gpu/drm/i915/gem/i915_gem_object_blt.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_reset.c
drivers/gpu/drm/i915/gt/intel_ring_submission.c
drivers/gpu/drm/i915/gt/mock_engine.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
drivers/gpu/drm/i915/i915_request.c
drivers/gpu/drm/i915/i915_request.h
drivers/gpu/drm/i915/selftests/igt_spinner.c

index 81366aa4812b84e6c5dfff760a0e89bd346b2ddf..0598e5382a1def8df146d15af324ab4e385d052d 100644 (file)
@@ -217,7 +217,7 @@ static void clear_pages_worker(struct work_struct *work)
                                           0);
 out_request:
        if (unlikely(err)) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                err = 0;
        }
 
index a1636c168e1fd4d41ead9b4879f82a9929ecc050..7bb27f382af7668a08a1cc9c769679cd6e55c232 100644 (file)
@@ -1169,7 +1169,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
        goto out_pool;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
@@ -1850,7 +1850,7 @@ static int eb_move_to_gpu(struct i915_execbuffer *eb)
        return 0;
 
 err_skip:
-       i915_request_skip(eb->request, err);
+       i915_request_set_error_once(eb->request, err);
        return err;
 }
 
@@ -2579,7 +2579,8 @@ static void eb_request_add(struct i915_execbuffer *eb)
                        attr.priority |= I915_PRIORITY_WAIT;
        } else {
                /* Serialise with context_close via the add_to_timeline */
-               i915_request_skip(rq, -ENOENT);
+               i915_request_set_error_once(rq, -ENOENT);
+               __i915_request_skip(rq);
        }
 
        local_bh_disable();
index 70809d8897cdac437da15da52cd4eb35483d1b7e..39b8a055d80aa0e54f7fd6b738b544e21372b653 100644 (file)
@@ -186,7 +186,7 @@ int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
                                        0);
 out_request:
        if (unlikely(err))
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
 
        i915_request_add(rq);
 out_batch:
@@ -385,7 +385,7 @@ out_unlock:
        drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 out_request:
        if (unlikely(err))
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
 
        i915_request_add(rq);
 out_batch:
index 375d864736f31bdff43c6923ab9bac92459db77d..77c7e65de7c31e2f9560ed27e2fe455a5a1a6b02 100644 (file)
@@ -1004,7 +1004,7 @@ emit_rpcs_query(struct drm_i915_gem_object *obj,
        return 0;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_batch:
@@ -1559,7 +1559,7 @@ static int write_to_scratch(struct i915_gem_context *ctx,
 
        goto out_vm;
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
@@ -1708,7 +1708,7 @@ static int read_from_scratch(struct i915_gem_context *ctx,
 
        goto out_vm;
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_unpin:
index 6718da20f35d2227d7d3f36f27104bd5f1c5468d..772d8cba7da9715e3efeaa07955d08cf13dc7011 100644 (file)
@@ -159,7 +159,7 @@ int igt_gpu_fill_dw(struct intel_context *ce,
        return 0;
 
 skip_request:
-       i915_request_skip(rq, err);
+       i915_request_set_error_once(rq, err);
 err_request:
        i915_request_add(rq);
 err_batch:
index b9b3f78f13240a44dfe472783a4f47982e75bc86..d123dd7fe98baf05c668a41de4874d0ea73095d2 100644 (file)
@@ -245,7 +245,7 @@ static void mark_eio(struct i915_request *rq)
 
        GEM_BUG_ON(i915_request_signaled(rq));
 
-       dma_fence_set_error(&rq->fence, -EIO);
+       i915_request_set_error_once(rq, -EIO);
        i915_request_mark_complete(rq);
 }
 
@@ -4903,7 +4903,7 @@ static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve)
        mask = rq->execution_mask;
        if (unlikely(!mask)) {
                /* Invalid selection, submit to a random engine in error */
-               i915_request_skip(rq, -ENODEV);
+               i915_request_set_error_once(rq, -ENODEV);
                mask = ve->siblings[0]->mask;
        }
 
index aef6ab58d7d99a58c90e677a75935282a50e1854..10ad816b32e2a86a3f84cc96e2ce955ef5fd97f1 100644 (file)
@@ -48,8 +48,10 @@ static void engine_skip_context(struct i915_request *rq)
 
        lockdep_assert_held(&engine->active.lock);
        list_for_each_entry_continue(rq, &engine->active.requests, sched.link)
-               if (rq->context == hung_ctx)
-                       i915_request_skip(rq, -EIO);
+               if (rq->context == hung_ctx) {
+                       i915_request_set_error_once(rq, -EIO);
+                       __i915_request_skip(rq);
+               }
 }
 
 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned)
@@ -154,11 +156,12 @@ void __i915_request_reset(struct i915_request *rq, bool guilty)
 
        rcu_read_lock(); /* protect the GEM context */
        if (guilty) {
-               i915_request_skip(rq, -EIO);
+               i915_request_set_error_once(rq, -EIO);
+               __i915_request_skip(rq);
                if (mark_guilty(rq))
                        engine_skip_context(rq);
        } else {
-               dma_fence_set_error(&rq->fence, -EAGAIN);
+               i915_request_set_error_once(rq, -EAGAIN);
                mark_innocent(rq);
        }
        rcu_read_unlock();
@@ -785,7 +788,7 @@ static void nop_submit_request(struct i915_request *request)
        unsigned long flags;
 
        RQ_TRACE(request, "-EIO\n");
-       dma_fence_set_error(&request->fence, -EIO);
+       i915_request_set_error_once(request, -EIO);
 
        spin_lock_irqsave(&engine->active.lock, flags);
        __i915_request_submit(request);
index fee7436260603d6bd3a774c74077faeb2cbe8751..ee241b7eaa3bf8ebeeaa06cb7c6dc898feb47d84 100644 (file)
@@ -895,9 +895,7 @@ static void reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all submitted requests as skipped. */
        list_for_each_entry(request, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(request))
-                       dma_fence_set_error(&request->fence, -EIO);
-
+               i915_request_set_error_once(request, -EIO);
                i915_request_mark_complete(request);
        }
 
index 5633515c12e964af575c4cc116b3e4e79aaa72cb..4a53ded7c2dd6a5ad80413c5c1358fbc6b3955af 100644 (file)
@@ -244,9 +244,7 @@ static void mock_reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all submitted requests as skipped. */
        list_for_each_entry(request, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(request))
-                       dma_fence_set_error(&request->fence, -EIO);
-
+               i915_request_set_error_once(request, -EIO);
                i915_request_mark_complete(request);
        }
 
index c3514ec7b8db54af86aeeeed0db036723cc4a21c..2b2efff6e19d40d6c41e383cdf319037ff34497e 100644 (file)
@@ -268,7 +268,7 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 
 cancel_rq:
        if (err) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                i915_request_add(rq);
        }
 unpin_hws:
index 1beaa77f9bb6928d066dd9c30b1e9217b9e9d8df..fe7778c28d2d7846c308da40668bf86dd445765c 100644 (file)
@@ -456,9 +456,7 @@ static void guc_reset_cancel(struct intel_engine_cs *engine)
 
        /* Mark all executing requests as skipped. */
        list_for_each_entry(rq, &engine->active.requests, sched.link) {
-               if (!i915_request_signaled(rq))
-                       dma_fence_set_error(&rq->fence, -EIO);
-
+               i915_request_set_error_once(rq, -EIO);
                i915_request_mark_complete(rq);
        }
 
index d837c1380015b000451e8fdd59b706fdceaaa5d5..4bfe68edfc815cc9622c1d6a6580c010601657f5 100644 (file)
@@ -363,6 +363,50 @@ __await_execution(struct i915_request *rq,
        return 0;
 }
 
+static bool fatal_error(int error)
+{
+       switch (error) {
+       case 0: /* not an error! */
+       case -EAGAIN: /* innocent victim of a GT reset (__i915_request_reset) */
+       case -ETIMEDOUT: /* waiting for Godot (timer_i915_sw_fence_wake) */
+               return false;
+       default:
+               return true;
+       }
+}
+
+void __i915_request_skip(struct i915_request *rq)
+{
+       GEM_BUG_ON(!fatal_error(rq->fence.error));
+
+       if (rq->infix == rq->postfix)
+               return;
+
+       /*
+        * As this request likely depends on state from the lost
+        * context, clear out all the user operations leaving the
+        * breadcrumb at the end (so we get the fence notifications).
+        */
+       __i915_request_fill(rq, 0);
+       rq->infix = rq->postfix;
+}
+
+void i915_request_set_error_once(struct i915_request *rq, int error)
+{
+       int old;
+
+       GEM_BUG_ON(!IS_ERR_VALUE((long)error));
+
+       if (i915_request_signaled(rq))
+               return;
+
+       old = READ_ONCE(rq->fence.error);
+       do {
+               if (fatal_error(old))
+                       return;
+       } while (!try_cmpxchg(&rq->fence.error, &old, error));
+}
+
 bool __i915_request_submit(struct i915_request *request)
 {
        struct intel_engine_cs *engine = request->engine;
@@ -392,8 +436,10 @@ bool __i915_request_submit(struct i915_request *request)
        if (i915_request_completed(request))
                goto xfer;
 
-       if (intel_context_is_banned(request->context))
-               i915_request_skip(request, -EIO);
+       if (unlikely(intel_context_is_banned(request->context)))
+               i915_request_set_error_once(request, -EIO);
+       if (unlikely(fatal_error(request->fence.error)))
+               __i915_request_skip(request);
 
        /*
         * Are we using semaphores when the gpu is already saturated?
@@ -519,7 +565,7 @@ submit_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
                trace_i915_request_submit(request);
 
                if (unlikely(fence->error))
-                       i915_request_skip(request, fence->error);
+                       i915_request_set_error_once(request, fence->error);
 
                /*
                 * We need to serialize use of the submit_request() callback
@@ -1209,23 +1255,6 @@ i915_request_await_object(struct i915_request *to,
        return ret;
 }
 
-void i915_request_skip(struct i915_request *rq, int error)
-{
-       GEM_BUG_ON(!IS_ERR_VALUE((long)error));
-       dma_fence_set_error(&rq->fence, error);
-
-       if (rq->infix == rq->postfix)
-               return;
-
-       /*
-        * As this request likely depends on state from the lost
-        * context, clear out all the user operations leaving the
-        * breadcrumb at the end (so we get the fence notifications).
-        */
-       __i915_request_fill(rq, 0);
-       rq->infix = rq->postfix;
-}
-
 static struct i915_request *
 __i915_request_add_to_timeline(struct i915_request *rq)
 {
index da8420f03232b7145f943df4ce6d1456ca833103..d4bae16b4785880ab0ec4b9d8dba9371cf588352 100644 (file)
@@ -303,6 +303,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp);
 struct i915_request * __must_check
 i915_request_create(struct intel_context *ce);
 
+void i915_request_set_error_once(struct i915_request *rq, int error);
+void __i915_request_skip(struct i915_request *rq);
+
 struct i915_request *__i915_request_commit(struct i915_request *request);
 void __i915_request_queue(struct i915_request *rq,
                          const struct i915_sched_attr *attr);
@@ -352,8 +355,6 @@ void i915_request_add(struct i915_request *rq);
 bool __i915_request_submit(struct i915_request *request);
 void i915_request_submit(struct i915_request *request);
 
-void i915_request_skip(struct i915_request *request, int error);
-
 void __i915_request_unsubmit(struct i915_request *request);
 void i915_request_unsubmit(struct i915_request *request);
 
index e8a58fe49c3963ccc81ec1a679650d13cbe2d4ba..9ad4ab088466daa1a5fad20c34f7eee6b9c5c77e 100644 (file)
@@ -183,7 +183,7 @@ igt_spinner_create_request(struct igt_spinner *spin,
 
 cancel_rq:
        if (err) {
-               i915_request_skip(rq, err);
+               i915_request_set_error_once(rq, err);
                i915_request_add(rq);
        }
 unpin_hws: