drm/i915: Always try to reset the GPU on takeover
authorChris Wilson <chris@chris-wilson.co.uk>
Thu, 3 Jan 2019 11:21:04 +0000 (11:21 +0000)
committerChris Wilson <chris@chris-wilson.co.uk>
Thu, 3 Jan 2019 12:40:42 +0000 (12:40 +0000)
When we first introduced the reset to sanitize the GPU on taking over
from the BIOS and before returning control to third parties (the BIOS!),
we restricted it to only systems utilizing HW contexts as we were
uncertain of how stable our reset mechanism truly was. We now have
reasonable coverage across all machines that expose a GPU reset method,
and so we should be safe to sanitize the GPU state everywhere.

v2: We _have_ to skip the reset if it would clobber the display.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190103112104.19561-1-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/i915_drv.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_pci.c
drivers/gpu/drm/i915/intel_device_info.h
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_engine_cs.c
drivers/gpu/drm/i915/intel_ringbuffer.h
drivers/gpu/drm/i915/selftests/i915_gem.c

index fe01d090f9bb6d4e9b547f414485c20d5b0c5a81..17fca3ba343ea413de2f650f33efe912c35e7be8 100644 (file)
@@ -2180,7 +2180,7 @@ static int i915_drm_resume_early(struct drm_device *dev)
 
        intel_power_domains_resume(dev_priv);
 
-       intel_engines_sanitize(dev_priv);
+       intel_engines_sanitize(dev_priv, true);
 
        enable_rpm_wakeref_asserts(dev_priv);
 
index e872d0a179f09c9d08e1e61ee28c6689d0e1e777..4b1656d2482e1bfc23f4cbd702e93ef29e3a6595 100644 (file)
@@ -3418,8 +3418,7 @@ bool i915_gem_unset_wedged(struct drm_i915_private *i915)
        i915_retire_requests(i915);
        GEM_BUG_ON(i915->gt.active_requests);
 
-       if (!intel_gpu_reset(i915, ALL_ENGINES))
-               intel_engines_sanitize(i915);
+       intel_engines_sanitize(i915, false);
 
        /*
         * Undo nop_submit_request. We prevent all new i915 requests from
@@ -5023,8 +5022,6 @@ void __i915_gem_object_release_unless_active(struct drm_i915_gem_object *obj)
 
 void i915_gem_sanitize(struct drm_i915_private *i915)
 {
-       int err;
-
        GEM_TRACE("\n");
 
        mutex_lock(&i915->drm.struct_mutex);
@@ -5049,11 +5046,7 @@ void i915_gem_sanitize(struct drm_i915_private *i915)
         * it may impact the display and we are uncertain about the stability
         * of the reset, so this could be applied to even earlier gen.
         */
-       err = -ENODEV;
-       if (INTEL_GEN(i915) >= 5 && intel_has_gpu_reset(i915))
-               err = WARN_ON(intel_gpu_reset(i915, ALL_ENGINES));
-       if (!err)
-               intel_engines_sanitize(i915);
+       intel_engines_sanitize(i915, false);
 
        intel_uncore_forcewake_put(i915, FORCEWAKE_ALL);
        intel_runtime_pm_put(i915);
index 0d342f2b44a533d51d18100fde43da3cf3e24baf..dd4aff2b256e910408ba5d7a5d35ca4fa34a9d5e 100644 (file)
@@ -82,6 +82,7 @@
        .display.has_overlay = 1, \
        .display.overlay_needs_physical = 1, \
        .display.has_gmch_display = 1, \
+       .gpu_reset_clobbers_display = true, \
        .hws_needs_physical = 1, \
        .unfenced_needs_alignment = 1, \
        .ring_mask = RENDER_RING, \
@@ -122,6 +123,7 @@ static const struct intel_device_info intel_i865g_info = {
        GEN(3), \
        .num_pipes = 2, \
        .display.has_gmch_display = 1, \
+       .gpu_reset_clobbers_display = true, \
        .ring_mask = RENDER_RING, \
        .has_snoop = true, \
        .has_coherent_ggtt = true, \
@@ -198,6 +200,7 @@ static const struct intel_device_info intel_pineview_info = {
        .num_pipes = 2, \
        .display.has_hotplug = 1, \
        .display.has_gmch_display = 1, \
+       .gpu_reset_clobbers_display = true, \
        .ring_mask = RENDER_RING, \
        .has_snoop = true, \
        .has_coherent_ggtt = true, \
@@ -228,6 +231,7 @@ static const struct intel_device_info intel_g45_info = {
        GEN4_FEATURES,
        PLATFORM(INTEL_G45),
        .ring_mask = RENDER_RING | BSD_RING,
+       .gpu_reset_clobbers_display = false,
 };
 
 static const struct intel_device_info intel_gm45_info = {
@@ -237,6 +241,7 @@ static const struct intel_device_info intel_gm45_info = {
        .display.has_fbc = 1,
        .display.supports_tv = 1,
        .ring_mask = RENDER_RING | BSD_RING,
+       .gpu_reset_clobbers_display = false,
 };
 
 #define GEN5_FEATURES \
index 76735869e32df84ea19f5736817259ced289be33..957c6527f76bc659390e0c21a76b868c2c396429 100644 (file)
@@ -89,6 +89,7 @@ enum intel_ppgtt {
        func(is_alpha_support); \
        /* Keep has_* in alphabetical order */ \
        func(has_64bit_reloc); \
+       func(gpu_reset_clobbers_display); \
        func(has_reset_engine); \
        func(has_fpga_dbg); \
        func(has_guc); \
index 665b483bbbccf2e1fd4b6045e1587fe8bddb1c49..699ee6946891222bd9e5b5e2d68ce05860e82085 100644 (file)
@@ -3746,8 +3746,8 @@ __intel_display_resume(struct drm_device *dev,
 
 static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 {
-       return intel_has_gpu_reset(dev_priv) &&
-               INTEL_GEN(dev_priv) < 5 && !IS_G4X(dev_priv);
+       return (INTEL_INFO(dev_priv)->gpu_reset_clobbers_display &&
+               intel_has_gpu_reset(dev_priv));
 }
 
 void intel_prepare_reset(struct drm_i915_private *dev_priv)
index 2aa3e0d7e6a3d15cbd4dd6596e0b259b18fce9a9..5990f8500bcaf1873acccd57d98706325b6e43b7 100644 (file)
@@ -1043,22 +1043,34 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915)
                engine->set_default_submission(engine);
 }
 
+static bool reset_engines(struct drm_i915_private *i915)
+{
+       if (INTEL_INFO(i915)->gpu_reset_clobbers_display)
+               return false;
+
+       return intel_gpu_reset(i915, ALL_ENGINES) == 0;
+}
+
 /**
  * intel_engines_sanitize: called after the GPU has lost power
  * @i915: the i915 device
+ * @force: ignore a failed reset and sanitize engine state anyway
  *
  * Anytime we reset the GPU, either with an explicit GPU reset or through a
  * PCI power cycle, the GPU loses state and we must reset our state tracking
  * to match. Note that calling intel_engines_sanitize() if the GPU has not
  * been reset results in much confusion!
  */
-void intel_engines_sanitize(struct drm_i915_private *i915)
+void intel_engines_sanitize(struct drm_i915_private *i915, bool force)
 {
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
 
        GEM_TRACE("\n");
 
+       if (!reset_engines(i915) && !force)
+               return;
+
        for_each_engine(engine, i915, id) {
                if (engine->reset.reset)
                        engine->reset.reset(engine, NULL);
index 69287e77b929194b2a9d983d8791ec3e4b23486b..3c1366c58cf3eec01b8f78852d3443dc020bafd9 100644 (file)
@@ -1019,7 +1019,7 @@ gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
        return cs;
 }
 
-void intel_engines_sanitize(struct drm_i915_private *i915);
+void intel_engines_sanitize(struct drm_i915_private *i915, bool force);
 
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
index d0aa19d176536c0f2d4f273be4c113d472de89c5..bdcc53e15e751c9f0bf3971469bda785dc487869 100644 (file)
@@ -121,7 +121,7 @@ static void pm_resume(struct drm_i915_private *i915)
         */
        intel_runtime_pm_get(i915);
 
-       intel_engines_sanitize(i915);
+       intel_engines_sanitize(i915, false);
        i915_gem_sanitize(i915);
        i915_gem_resume(i915);