seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
- seq_printf(m, "Current user interrupts (%s): %lx\n",
- engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
engine->hangcheck.seqno,
seqno[id],
engine->last_submitted_seqno);
- seq_printf(m, "\twaiters? %d\n",
- intel_engine_has_waiter(engine));
- seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
- engine->hangcheck.user_interrupts,
- READ_ONCE(engine->breadcrumbs.irq_wakeups));
+ seq_printf(m, "\twaiters? %s, fake irq active? %s\n",
+ yesno(intel_engine_has_waiter(engine)),
+ yesno(test_bit(engine->id,
+ &dev_priv->gpu_error.missed_irq_rings)));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
container_of(work, typeof(*dev_priv), gt.idle_work.work);
struct drm_device *dev = &dev_priv->drm;
struct intel_engine_cs *engine;
- unsigned int stuck_engines;
bool rearm_hangcheck;
if (!READ_ONCE(dev_priv->gt.awake))
dev_priv->gt.awake = false;
rearm_hangcheck = false;
- /* As we have disabled hangcheck, we need to unstick any waiters still
- * hanging around. However, as we may be racing against the interrupt
- * handler or the waiters themselves, we skip enabling the fake-irq.
- */
- stuck_engines = intel_kick_waiters(dev_priv);
- if (unlikely(stuck_engines))
- DRM_DEBUG_DRIVER("kicked stuck waiters (%x)...missed irq?\n",
- stuck_engines);
-
if (INTEL_GEN(dev_priv) >= 6)
gen6_rps_idle(dev_priv);
intel_runtime_pm_put(dev_priv);
static void notify_ring(struct intel_engine_cs *engine)
{
smp_store_mb(engine->breadcrumbs.irq_posted, true);
- if (intel_engine_wakeup(engine)) {
+ if (intel_engine_wakeup(engine))
trace_i915_gem_request_notify(engine);
- engine->breadcrumbs.irq_wakeups++;
- }
}
static void vlv_c0_read(struct drm_i915_private *dev_priv,
return HANGCHECK_HUNG;
}
-static unsigned long kick_waiters(struct intel_engine_cs *engine)
-{
- struct drm_i915_private *i915 = engine->i915;
- unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
-
- if (engine->hangcheck.user_interrupts == irq_count &&
- !test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
- if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
- DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
- engine->name);
-
- intel_engine_enable_fake_irq(engine);
- }
-
- return irq_count;
-}
/*
* This is called when the chip hasn't reported back with completed
* batchbuffers in a long time. We keep track per ring seqno progress and
bool busy = intel_engine_has_waiter(engine);
u64 acthd;
u32 seqno;
- unsigned user_interrupts;
semaphore_clear_deadlocks(dev_priv);
acthd = intel_engine_get_active_head(engine);
seqno = intel_engine_get_seqno(engine);
- /* Reset stuck interrupts between batch advances */
- user_interrupts = 0;
-
if (engine->hangcheck.seqno == seqno) {
if (!intel_engine_is_active(engine)) {
engine->hangcheck.action = HANGCHECK_IDLE;
if (busy) {
/* Safeguard against driver failure */
- user_interrupts = kick_waiters(engine);
engine->hangcheck.score += BUSY;
}
} else {
engine->hangcheck.seqno = seqno;
engine->hangcheck.acthd = acthd;
- engine->hangcheck.user_interrupts = user_interrupts;
busy_count += busy;
}
#include "i915_drv.h"
+static void intel_breadcrumbs_hangcheck(unsigned long data)
+{
+ struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ if (!b->irq_enabled)
+ return;
+
+ if (time_before(jiffies, b->timeout)) {
+ mod_timer(&b->hangcheck, b->timeout);
+ return;
+ }
+
+ DRM_DEBUG("Hangcheck timer elapsed... %s idle\n", engine->name);
+ set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
+ mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+
+ /* Ensure that even if the GPU hangs, we get woken up.
+ *
+ * However, note that if no one is waiting, we never notice
+ * a gpu hang. Eventually, we will have to wait for a resource
+ * held by the GPU and so trigger a hangcheck. In the most
+ * pathological case, this will be upon memory starvation! To
+ * prevent this, we also queue the hangcheck from the retire
+ * worker.
+ */
+ i915_queue_hangcheck(engine->i915);
+}
+
+static unsigned long wait_timeout(void)
+{
+ return round_jiffies_up(jiffies + DRM_I915_HANGCHECK_JIFFIES);
+}
+
static void intel_breadcrumbs_fake_irq(unsigned long data)
{
struct intel_engine_cs *engine = (struct intel_engine_cs *)data;
*/
engine->breadcrumbs.irq_posted = true;
- /* Make sure the current hangcheck doesn't falsely accuse a just
- * started irq handler from missing an interrupt (because the
- * interrupt count still matches the stale value from when
- * the irq handler was disabled, many hangchecks ago).
- */
- engine->breadcrumbs.irq_wakeups++;
-
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
}
if (!b->irq_enabled ||
- test_bit(engine->id, &i915->gpu_error.missed_irq_rings))
+ test_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
mod_timer(&b->fake_irq, jiffies + 1);
-
- /* Ensure that even if the GPU hangs, we get woken up.
- *
- * However, note that if no one is waiting, we never notice
- * a gpu hang. Eventually, we will have to wait for a resource
- * held by the GPU and so trigger a hangcheck. In the most
- * pathological case, this will be upon memory starvation!
- */
- i915_queue_hangcheck(i915);
+ } else {
+ /* Ensure we never sleep indefinitely */
+ GEM_BUG_ON(!time_after(b->timeout, jiffies));
+ mod_timer(&b->hangcheck, b->timeout);
+ }
}
static void __intel_breadcrumbs_disable_irq(struct intel_breadcrumbs *b)
GEM_BUG_ON(!next && !first);
if (next && next != &wait->node) {
GEM_BUG_ON(first);
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
+ b->timeout = wait_timeout();
b->first_wait = wait;
smp_store_mb(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
return first;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine)
-{
- mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
-}
-
static inline bool chain_wakeup(struct rb_node *rb, int priority)
{
return rb && to_wait(rb)->tsk->prio <= priority;
* the interrupt, or if we have to handle an
* exception rather than a seqno completion.
*/
+ b->timeout = wait_timeout();
b->first_wait = to_wait(next);
smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
setup_timer(&b->fake_irq,
intel_breadcrumbs_fake_irq,
(unsigned long)engine);
+ setup_timer(&b->hangcheck,
+ intel_breadcrumbs_hangcheck,
+ (unsigned long)engine);
/* Spawn a thread to provide a common bottom-half for all signals.
* As this is an asynchronous interface we cannot steal the current
if (!IS_ERR_OR_NULL(b->signaler))
kthread_stop(b->signaler);
+ del_timer_sync(&b->hangcheck);
del_timer_sync(&b->fake_irq);
}
void intel_engine_init_hangcheck(struct intel_engine_cs *engine)
{
memset(&engine->hangcheck, 0, sizeof(engine->hangcheck));
+ clear_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
}
static void intel_engine_init_requests(struct intel_engine_cs *engine)
struct intel_engine_hangcheck {
u64 acthd;
- unsigned long user_interrupts;
u32 seqno;
int score;
enum intel_engine_hangcheck_action action;
*/
struct intel_breadcrumbs {
struct task_struct *irq_seqno_bh; /* bh for user interrupts */
- unsigned long irq_wakeups;
bool irq_posted;
spinlock_t lock; /* protects the lists of requests */
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
+ struct timer_list hangcheck; /* detect missed interrupts */
+
+ unsigned long timeout;
bool irq_enabled : 1;
bool rpm_wakelock : 1;
return wakeup;
}
-void intel_engine_enable_fake_irq(struct intel_engine_cs *engine);
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine);
unsigned int intel_kick_waiters(struct drm_i915_private *i915);
unsigned int intel_kick_signalers(struct drm_i915_private *i915);