drm/i915/selftests: Exercise resetting during non-user payloads
authorChris Wilson <chris@chris-wilson.co.uk>
Tue, 26 Feb 2019 09:49:22 +0000 (09:49 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Tue, 26 Feb 2019 09:55:41 +0000 (09:55 +0000)
In selftests/live_hangcheck, we have a lot of tests for resetting simple
spinners, but nothing quite prepared us for how the GPU reacted to
triggering a reset outside of the safe spinner. These two subtests fill
the ring with plain old empty, non-spinning requests, and then triggers
a reset. Without a user-payload to blame, these requests will exercise
the 'non-started' paths and mostly be replayed verbatim.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190226094922.31617-4-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/selftests/intel_hangcheck.c

index fa02cf9ce0cf3c8b41cc41091f7e7e44e09fa462..12e047328ab82c283a7424ef1bf58e2bbfb666d0 100644 (file)
@@ -415,6 +415,222 @@ static bool wait_for_idle(struct intel_engine_cs *engine)
        return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0;
 }
 
+static int igt_reset_nop(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_engine_cs *engine;
+       struct i915_gem_context *ctx;
+       unsigned int reset_count, count;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       struct drm_file *file;
+       IGT_TIMEOUT(end_time);
+       int err = 0;
+
+       /* Check that we can reset during non-user portions of requests */
+
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       mutex_lock(&i915->drm.struct_mutex);
+       ctx = live_context(i915, file);
+       mutex_unlock(&i915->drm.struct_mutex);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+
+       i915_gem_context_clear_bannable(ctx);
+       wakeref = intel_runtime_pm_get(i915);
+       reset_count = i915_reset_count(&i915->gpu_error);
+       count = 0;
+       do {
+               mutex_lock(&i915->drm.struct_mutex);
+               for_each_engine(engine, i915, id) {
+                       int i;
+
+                       for (i = 0; i < 16; i++) {
+                               struct i915_request *rq;
+
+                               rq = i915_request_alloc(engine, ctx);
+                               if (IS_ERR(rq)) {
+                                       err = PTR_ERR(rq);
+                                       break;
+                               }
+
+                               i915_request_add(rq);
+                       }
+               }
+               mutex_unlock(&i915->drm.struct_mutex);
+
+               igt_global_reset_lock(i915);
+               i915_reset(i915, ALL_ENGINES, NULL);
+               igt_global_reset_unlock(i915);
+               if (i915_reset_failed(i915)) {
+                       err = -EIO;
+                       break;
+               }
+
+               if (i915_reset_count(&i915->gpu_error) !=
+                   reset_count + ++count) {
+                       pr_err("Full GPU reset not recorded!\n");
+                       err = -EINVAL;
+                       break;
+               }
+
+               if (!i915_reset_flush(i915)) {
+                       struct drm_printer p =
+                               drm_info_printer(i915->drm.dev);
+
+                       pr_err("%s failed to idle after reset\n",
+                              engine->name);
+                       intel_engine_dump(engine, &p,
+                                         "%s\n", engine->name);
+
+                       err = -EIO;
+                       break;
+               }
+
+               err = igt_flush_test(i915, 0);
+               if (err)
+                       break;
+       } while (time_before(jiffies, end_time));
+       pr_info("%s: %d resets\n", __func__, count);
+
+       mutex_lock(&i915->drm.struct_mutex);
+       err = igt_flush_test(i915, I915_WAIT_LOCKED);
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       intel_runtime_pm_put(i915, wakeref);
+
+out:
+       mock_file_free(i915, file);
+       if (i915_reset_failed(i915))
+               err = -EIO;
+       return err;
+}
+
+static int igt_reset_nop_engine(void *arg)
+{
+       struct drm_i915_private *i915 = arg;
+       struct intel_engine_cs *engine;
+       struct i915_gem_context *ctx;
+       enum intel_engine_id id;
+       intel_wakeref_t wakeref;
+       struct drm_file *file;
+       int err = 0;
+
+       /* Check that we can engine-reset during non-user portions */
+
+       if (!intel_has_reset_engine(i915))
+               return 0;
+
+       file = mock_file(i915);
+       if (IS_ERR(file))
+               return PTR_ERR(file);
+
+       mutex_lock(&i915->drm.struct_mutex);
+       ctx = live_context(i915, file);
+       mutex_unlock(&i915->drm.struct_mutex);
+       if (IS_ERR(ctx)) {
+               err = PTR_ERR(ctx);
+               goto out;
+       }
+
+       i915_gem_context_clear_bannable(ctx);
+       wakeref = intel_runtime_pm_get(i915);
+       for_each_engine(engine, i915, id) {
+               unsigned int reset_count, reset_engine_count;
+               unsigned int count;
+               IGT_TIMEOUT(end_time);
+
+               reset_count = i915_reset_count(&i915->gpu_error);
+               reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
+                                                            engine);
+               count = 0;
+
+               set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+               do {
+                       int i;
+
+                       if (!wait_for_idle(engine)) {
+                               pr_err("%s failed to idle before reset\n",
+                                      engine->name);
+                               err = -EIO;
+                               break;
+                       }
+
+                       mutex_lock(&i915->drm.struct_mutex);
+                       for (i = 0; i < 16; i++) {
+                               struct i915_request *rq;
+
+                               rq = i915_request_alloc(engine, ctx);
+                               if (IS_ERR(rq)) {
+                                       err = PTR_ERR(rq);
+                                       break;
+                               }
+
+                               i915_request_add(rq);
+                       }
+                       mutex_unlock(&i915->drm.struct_mutex);
+
+                       err = i915_reset_engine(engine, NULL);
+                       if (err) {
+                               pr_err("i915_reset_engine failed\n");
+                               break;
+                       }
+
+                       if (i915_reset_count(&i915->gpu_error) != reset_count) {
+                               pr_err("Full GPU reset recorded! (engine reset expected)\n");
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+                           reset_engine_count + ++count) {
+                               pr_err("%s engine reset not recorded!\n",
+                                      engine->name);
+                               err = -EINVAL;
+                               break;
+                       }
+
+                       if (!i915_reset_flush(i915)) {
+                               struct drm_printer p =
+                                       drm_info_printer(i915->drm.dev);
+
+                               pr_err("%s failed to idle after reset\n",
+                                      engine->name);
+                               intel_engine_dump(engine, &p,
+                                                 "%s\n", engine->name);
+
+                               err = -EIO;
+                               break;
+                       }
+               } while (time_before(jiffies, end_time));
+               clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+               pr_info("%s(%s): %d resets\n", __func__, engine->name, count);
+
+               if (err)
+                       break;
+
+               err = igt_flush_test(i915, 0);
+               if (err)
+                       break;
+       }
+
+       mutex_lock(&i915->drm.struct_mutex);
+       err = igt_flush_test(i915, I915_WAIT_LOCKED);
+       mutex_unlock(&i915->drm.struct_mutex);
+
+       intel_runtime_pm_put(i915, wakeref);
+out:
+       mock_file_free(i915, file);
+       if (i915_reset_failed(i915))
+               err = -EIO;
+       return err;
+}
+
 static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 {
        struct intel_engine_cs *engine;
@@ -1646,6 +1862,8 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
                SUBTEST(igt_global_reset), /* attempt to recover GPU first */
                SUBTEST(igt_wedged_reset),
                SUBTEST(igt_hang_sanitycheck),
+               SUBTEST(igt_reset_nop),
+               SUBTEST(igt_reset_nop_engine),
                SUBTEST(igt_reset_idle_engine),
                SUBTEST(igt_reset_active_engine),
                SUBTEST(igt_reset_engines),