static int
i915_wedged_get(void *data, u64 *val)
{
- struct drm_i915_private *dev_priv = data;
-
- *val = i915_terminally_wedged(&dev_priv->gpu_error);
+ int ret = i915_terminally_wedged(data);
- return 0;
+ switch (ret) {
+ case -EIO:
+ *val = 1;
+ return 0;
+ case 0:
+ *val = 0;
+ return 0;
+ default:
+ return ret;
+ }
}
static int
mutex_unlock(&i915->drm.struct_mutex);
}
- if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(&i915->gpu_error))
+ if (val & DROP_RESET_ACTIVE && i915_terminally_wedged(i915))
i915_handle_error(i915, ALL_ENGINES, 0, NULL);
fs_reclaim_acquire(GFP_KERNEL);
struct i915_request *
i915_gem_find_active_request(struct intel_engine_cs *engine);
-static inline bool i915_terminally_wedged(struct i915_gpu_error *error)
+static inline bool __i915_wedged(struct i915_gpu_error *error)
{
return unlikely(test_bit(I915_WEDGED, &error->flags));
}
+static inline bool i915_reset_failed(struct drm_i915_private *i915)
+{
+ return __i915_wedged(&i915->gpu_error);
+}
+
static inline u32 i915_reset_count(struct i915_gpu_error *error)
{
return READ_ONCE(error->reset_count);
* fail). But any other -EIO isn't ours (e.g. swap in failure)
* and so needs to be reported.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error))
+ if (!i915_terminally_wedged(dev_priv))
return VM_FAULT_SIGBUS;
/* else: fall through */
case -EAGAIN:
struct intel_engine_cs *engine;
enum intel_engine_id id;
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
return;
GEM_BUG_ON(i915->gt.active_requests);
long ret;
/* ABI: return -EIO if already wedged */
- if (i915_terminally_wedged(&dev_priv->gpu_error))
- return -EIO;
+ ret = i915_terminally_wedged(dev_priv);
+ if (ret)
+ return ret;
spin_lock(&file_priv->mm.lock);
list_for_each_entry(request, &file_priv->mm.request_list, client_link) {
* back to defaults, recovering from whatever wedged state we left it
* in and so worth trying to use the device once more.
*/
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
i915_gem_unset_wedged(i915);
/*
* state. Fortunately, the kernel_context is disposable and we do
* not rely on its state.
*/
- if (!i915_terminally_wedged(&i915->gpu_error)) {
+ if (!i915_reset_failed(i915)) {
ret = i915_gem_switch_to_kernel_context(i915);
if (ret)
goto err_unlock;
return;
err_wedged:
- if (!i915_terminally_wedged(&i915->gpu_error)) {
- DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
+ if (!i915_reset_failed(i915)) {
+ dev_err(i915->drm.dev,
+ "Failed to re-initialize GPU, declaring it wedged!\n");
i915_gem_set_wedged(i915);
}
goto out_unlock;
init_unused_rings(dev_priv);
BUG_ON(!dev_priv->kernel_context);
- if (i915_terminally_wedged(&dev_priv->gpu_error)) {
- ret = -EIO;
+ ret = i915_terminally_wedged(dev_priv);
+ if (ret)
goto out;
- }
ret = i915_ppgtt_init_hw(dev_priv);
if (ret) {
* wedged. But we only want to do this where the GPU is angry,
* for all other failure, such as an allocation failure, bail.
*/
- if (!i915_terminally_wedged(&dev_priv->gpu_error)) {
+ if (!i915_reset_failed(dev_priv)) {
i915_load_error(dev_priv,
"Failed to initialize GPU, declaring it wedged!\n");
i915_gem_set_wedged(dev_priv);
* ABI: Before userspace accesses the GPU (e.g. execbuffer), report
* EIO if the GPU is already wedged.
*/
- if (i915_terminally_wedged(&i915->gpu_error))
- return ERR_PTR(-EIO);
+ ret = i915_terminally_wedged(i915);
+ if (ret)
+ return ERR_PTR(ret);
/*
* Pinning the contexts may generate requests in order to acquire
finish:
reset_finish(i915);
- if (!i915_terminally_wedged(error))
+ if (!__i915_wedged(error))
reset_restart(i915);
return;
* Try engine reset when available. We fall back to full reset if
* single reset fails.
*/
- if (intel_has_reset_engine(i915) && !i915_terminally_wedged(error)) {
+ if (intel_has_reset_engine(i915) && !__i915_wedged(error)) {
for_each_engine_masked(engine, i915, engine_mask, tmp) {
BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE);
if (test_and_set_bit(I915_RESET_ENGINE + engine->id,
srcu_read_unlock(&error->reset_backoff_srcu, tag);
}
+int i915_terminally_wedged(struct drm_i915_private *i915)
+{
+ struct i915_gpu_error *error = &i915->gpu_error;
+
+ might_sleep();
+
+ if (!__i915_wedged(error))
+ return 0;
+
+ /* Reset still in progress? Maybe we will recover? */
+ if (!test_bit(I915_RESET_BACKOFF, &error->flags))
+ return -EIO;
+
+ /* XXX intel_reset_finish() still takes struct_mutex!!! */
+ if (mutex_is_locked(&i915->drm.struct_mutex))
+ return -EAGAIN;
+
+ if (wait_event_interruptible(error->reset_queue,
+ !test_bit(I915_RESET_BACKOFF,
+ &error->flags)))
+ return -EINTR;
+
+ return __i915_wedged(error) ? -EIO : 0;
+}
+
bool i915_reset_flush(struct drm_i915_private *i915)
{
int err;
int __must_check i915_reset_trylock(struct drm_i915_private *i915);
void i915_reset_unlock(struct drm_i915_private *i915, int tag);
+int i915_terminally_wedged(struct drm_i915_private *i915);
+
bool intel_has_gpu_reset(struct drm_i915_private *i915);
bool intel_has_reset_engine(struct drm_i915_private *i915);
*/
bool intel_engine_is_idle(struct intel_engine_cs *engine)
{
- struct drm_i915_private *dev_priv = engine->i915;
-
/* More white lies, if wedged, hw state is inconsistent */
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_reset_failed(engine->i915))
return true;
/* Any inflight/incomplete requests? */
* If the driver is wedged, HW state may be very inconsistent and
* report that it is still busy, even though we have stopped using it.
*/
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_reset_failed(dev_priv))
return true;
for_each_engine(engine, dev_priv, id) {
va_end(ap);
}
- if (i915_terminally_wedged(&engine->i915->gpu_error))
+ if (i915_reset_failed(engine->i915))
drm_printf(m, "*** WEDGED ***\n");
drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
if (!READ_ONCE(dev_priv->gt.awake))
return;
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_terminally_wedged(dev_priv))
return;
/* As enabling the GPU requires fairly extensive mmio access,
return 0;
}
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_terminally_wedged(dev_priv))
return 0;
file = mock_file(dev_priv);
SUBTEST(live_active_retire),
};
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return 0;
return i915_subtests(tests, i915);
static bool needs_fence_registers(struct drm_i915_private *i915)
{
- return !i915_terminally_wedged(&i915->gpu_error);
+ return !i915_terminally_wedged(i915);
}
static bool needs_mi_store_dword(struct drm_i915_private *i915)
{
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return false;
return intel_engine_can_store_dword(i915->engine[RCS]);
SUBTEST(igt_vm_isolation),
};
- if (i915_terminally_wedged(&dev_priv->gpu_error))
+ if (i915_terminally_wedged(dev_priv))
return 0;
return i915_subtests(tests, dev_priv);
SUBTEST(igt_evict_contexts),
};
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return 0;
return i915_subtests(tests, i915);
for (loop = 0; loop < 3; loop++) {
intel_wakeref_t wakeref;
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
break;
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
SUBTEST(live_breadcrumbs_smoketest),
};
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return 0;
return i915_subtests(tests, i915);
i915_gem_set_wedged(i915);
}
- return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
+ return i915_terminally_wedged(i915);
}
timeout = i915_request_wait(rq,
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
timeout = -EIO;
i915_request_put(rq);
igt_global_reset_unlock(i915);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
err = -EIO;
return err;
i915_gem_set_wedged(i915);
- GEM_BUG_ON(!i915_terminally_wedged(&i915->gpu_error));
+ GEM_BUG_ON(!i915_reset_failed(i915));
i915_reset(i915, ALL_ENGINES, NULL);
intel_runtime_pm_put(i915, wakeref);
igt_global_reset_unlock(i915);
- return i915_terminally_wedged(&i915->gpu_error) ? -EIO : 0;
+ return i915_reset_failed(i915) ? -EIO : 0;
}
static bool wait_for_idle(struct intel_engine_cs *engine)
break;
}
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
err = -EIO;
if (active) {
break;
}
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
err = -EIO;
if (flags & TEST_ACTIVE) {
mutex_unlock(&i915->drm.struct_mutex);
igt_global_reset_unlock(i915);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
return -EIO;
return err;
unlock:
mutex_unlock(&i915->drm.struct_mutex);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
return -EIO;
return err;
mutex_unlock(&i915->drm.struct_mutex);
igt_global_reset_unlock(i915);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
return -EIO;
return err;
i915_request_wait(rq,
I915_WAIT_LOCKED,
MAX_SCHEDULE_TIMEOUT);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
err = -EIO;
}
/* Flush any requests before we get started and check basics */
force_reset(i915);
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_reset_failed(i915))
goto unlock;
if (intel_has_gpu_reset(i915)) {
if (!intel_has_gpu_reset(i915))
return 0;
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return -EIO; /* we're long past hope of a successful reset */
wakeref = intel_runtime_pm_get(i915);
if (!HAS_EXECLISTS(i915))
return 0;
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return 0;
return i915_subtests(tests, i915);
err = 0;
igt_wedge_on_timeout(&wedge, ctx->i915, HZ / 5) /* a safety net! */
err = i915_gem_object_set_to_cpu_domain(results, false);
- if (i915_terminally_wedged(&ctx->i915->gpu_error))
+ if (i915_terminally_wedged(ctx->i915))
err = -EIO;
if (err)
goto out_put;
};
int err;
- if (i915_terminally_wedged(&i915->gpu_error))
+ if (i915_terminally_wedged(i915))
return 0;
mutex_lock(&i915->drm.struct_mutex);