drm/i915: Replace global_seqno with a hangcheck heartbeat seqno

author Chris Wilson <chris@chris-wilson.co.uk>

Tue, 26 Feb 2019 09:49:19 +0000 (09:49 +0000)

committer Chris Wilson <chris@chris-wilson.co.uk>

Tue, 26 Feb 2019 09:55:31 +0000 (09:55 +0000)
author Chris Wilson <chris@chris-wilson.co.uk>
Tue, 26 Feb 2019 09:49:19 +0000 (09:49 +0000)
committer Chris Wilson <chris@chris-wilson.co.uk>
Tue, 26 Feb 2019 09:55:31 +0000 (09:55 +0000)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c

index 37175414ce892a50d49846326efd5e29c836ff3b..545091a5180b82578cf003fabec76bfac6ac157d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1295,7 +1295,7 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
         with_intel_runtime_pm(dev_priv, wakeref) {
                 for_each_engine(engine, dev_priv, id) {
                         acthd[id] = intel_engine_get_active_head(engine);
-                       seqno[id] = intel_engine_get_seqno(engine);
+                       seqno[id] = intel_engine_get_hangcheck_seqno(engine);
                 }
  
                 intel_engine_get_instdone(dev_priv->engine[RCS], &instdone);
@@ -1315,8 +1315,9 @@ static int i915_hangcheck_info(struct seq_file *m, void *unused)
         for_each_engine(engine, dev_priv, id) {
                 seq_printf(m, "%s:\n", engine->name);
                 seq_printf(m, "\tseqno = %x [current %x, last %x], %dms ago\n",
-                          engine->hangcheck.seqno, seqno[id],
-                          intel_engine_last_submit(engine),
+                          engine->hangcheck.last_seqno,
+                          seqno[id],
+                          engine->hangcheck.next_seqno,
                            jiffies_to_msecs(jiffies -
                                             engine->hangcheck.action_timestamp));
  
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c

index 81b80f8fd9ea8030491947348220f0f7079d8c9c..57bc5c4fb3ff29318f3d82e852f12faca9b583cc 100644 (file)
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1497,10 +1497,11 @@ void intel_engine_dump(struct intel_engine_cs *engine,
         if (i915_reset_failed(engine->i915))
                 drm_printf(m, "*** WEDGED ***\n");
  
-       drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms]\n",
+       drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x/%x [%d ms]\n",
                    intel_engine_get_seqno(engine),
                    intel_engine_last_submit(engine),
-                  engine->hangcheck.seqno,
+                  engine->hangcheck.last_seqno,
+                  engine->hangcheck.next_seqno,
                    jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp));
         drm_printf(m, "\tReset count: %d (global %d)\n",
                    i915_reset_engine_count(error, engine),
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c

index 9be033b6f4d2220dd18400e64320cc2d51d5f383..f1d8dfc58049a7c47eb6a3a155820647843155d3 100644 (file)
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -133,21 +133,21 @@ static void hangcheck_load_sample(struct intel_engine_cs *engine,
                                   struct hangcheck *hc)
  {
         hc->acthd = intel_engine_get_active_head(engine);
-       hc->seqno = intel_engine_get_seqno(engine);
+       hc->seqno = intel_engine_get_hangcheck_seqno(engine);
  }
  
  static void hangcheck_store_sample(struct intel_engine_cs *engine,
                                    const struct hangcheck *hc)
  {
         engine->hangcheck.acthd = hc->acthd;
-       engine->hangcheck.seqno = hc->seqno;
+       engine->hangcheck.last_seqno = hc->seqno;
  }
  
  static enum intel_engine_hangcheck_action
  hangcheck_get_action(struct intel_engine_cs *engine,
                      const struct hangcheck *hc)
  {
-       if (engine->hangcheck.seqno != hc->seqno)
+       if (engine->hangcheck.last_seqno != hc->seqno)
                 return ENGINE_ACTIVE_SEQNO;
  
         if (intel_engine_is_idle(engine))
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c

index 34a0866959c5799dd96604ca748bc803c0d323c0..0516fc6b965284277e60d2b6108480fa975b47b9 100644 (file)
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -178,6 +178,12 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
                 I915_GEM_HWS_INDEX_ADDR);
  }
  
+static inline u32 intel_hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
  static inline struct i915_priolist *to_priolist(struct rb_node *rb)
  {
         return rb_entry(rb, struct i915_priolist, node);
@@ -2206,6 +2212,10 @@ static u32 *gen8_emit_fini_breadcrumb(struct i915_request *request, u32 *cs)
                                   request->fence.seqno,
                                   request->timeline->hwsp_offset);
  
+       cs = gen8_emit_ggtt_write(cs,
+                                 intel_engine_next_hangcheck_seqno(request->engine),
+                                 intel_hws_hangcheck_address(request->engine));
+
         cs = gen8_emit_ggtt_write(cs,
                                   request->global_seqno,
                                   intel_hws_seqno_address(request->engine));
@@ -2230,6 +2240,11 @@ static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs)
                                       PIPE_CONTROL_FLUSH_ENABLE |
                                       PIPE_CONTROL_CS_STALL);
  
+       cs = gen8_emit_ggtt_write_rcs(cs,
+                                     intel_engine_next_hangcheck_seqno(request->engine),
+                                     intel_hws_hangcheck_address(request->engine),
+                                     0);
+
         cs = gen8_emit_ggtt_write_rcs(cs,
                                       request->global_seqno,
                                       intel_hws_seqno_address(request->engine),
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c

index 7f841dba87b3026893b2cb330168cb74246d5804..870184bbd1690c8d80e1a063b8c5c7cedfcdb278 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,12 @@
   */
  #define LEGACY_REQUEST_SIZE 200
  
+static inline u32 hws_hangcheck_address(struct intel_engine_cs *engine)
+{
+       return (i915_ggtt_offset(engine->status_page.vma) +
+               I915_GEM_HWS_HANGCHECK_ADDR);
+}
+
  static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
  {
         return (i915_ggtt_offset(engine->status_page.vma) +
@@ -316,6 +322,11 @@ static u32 *gen6_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = rq->timeline->hwsp_offset | PIPE_CONTROL_GLOBAL_GTT;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = GFX_OP_PIPE_CONTROL(4);
+       *cs++ = PIPE_CONTROL_QW_WRITE;
+       *cs++ = hws_hangcheck_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         *cs++ = GFX_OP_PIPE_CONTROL(4);
         *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
         *cs++ = intel_hws_seqno_address(rq->engine) | PIPE_CONTROL_GLOBAL_GTT;
@@ -422,6 +433,11 @@ static u32 *gen7_rcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = rq->timeline->hwsp_offset;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = GFX_OP_PIPE_CONTROL(4);
+       *cs++ = PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_GLOBAL_GTT_IVB;
+       *cs++ = hws_hangcheck_address(rq->engine);
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         *cs++ = GFX_OP_PIPE_CONTROL(4);
         *cs++ = (PIPE_CONTROL_QW_WRITE |
                  PIPE_CONTROL_GLOBAL_GTT_IVB |
@@ -447,12 +463,15 @@ static u32 *gen6_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+       *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
         *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
         *cs++ = rq->global_seqno;
  
         *cs++ = MI_USER_INTERRUPT;
-       *cs++ = MI_NOOP;
  
         rq->tail = intel_ring_offset(rq, cs);
         assert_ring_tail_valid(rq->ring, rq->tail);
@@ -472,6 +491,10 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = I915_GEM_HWS_SEQNO_ADDR | MI_FLUSH_DW_USE_GTT;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
+       *cs++ = I915_GEM_HWS_HANGCHECK_ADDR | MI_FLUSH_DW_USE_GTT;
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         *cs++ = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_DW_STORE_INDEX;
         *cs++ = I915_GEM_HWS_INDEX_ADDR | MI_FLUSH_DW_USE_GTT;
         *cs++ = rq->global_seqno;
@@ -487,6 +510,7 @@ static u32 *gen7_xcs_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = 0;
  
         *cs++ = MI_USER_INTERRUPT;
+       *cs++ = MI_NOOP;
  
         rq->tail = intel_ring_offset(rq, cs);
         assert_ring_tail_valid(rq->ring, rq->tail);
@@ -930,11 +954,16 @@ static u32 *i9xx_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = I915_GEM_HWS_SEQNO_ADDR;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = MI_STORE_DWORD_INDEX;
+       *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         *cs++ = MI_STORE_DWORD_INDEX;
         *cs++ = I915_GEM_HWS_INDEX_ADDR;
         *cs++ = rq->global_seqno;
  
         *cs++ = MI_USER_INTERRUPT;
+       *cs++ = MI_NOOP;
  
         rq->tail = intel_ring_offset(rq, cs);
         assert_ring_tail_valid(rq->ring, rq->tail);
@@ -956,6 +985,10 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         *cs++ = I915_GEM_HWS_SEQNO_ADDR;
         *cs++ = rq->fence.seqno;
  
+       *cs++ = MI_STORE_DWORD_INDEX;
+       *cs++ = I915_GEM_HWS_HANGCHECK_ADDR;
+       *cs++ = intel_engine_next_hangcheck_seqno(rq->engine);
+
         BUILD_BUG_ON(GEN5_WA_STORES < 1);
         for (i = 0; i < GEN5_WA_STORES; i++) {
                 *cs++ = MI_STORE_DWORD_INDEX;
@@ -964,7 +997,6 @@ static u32 *gen5_emit_breadcrumb(struct i915_request *rq, u32 *cs)
         }
  
         *cs++ = MI_USER_INTERRUPT;
-       *cs++ = MI_NOOP;
  
         rq->tail = intel_ring_offset(rq, cs);
         assert_ring_tail_valid(rq->ring, rq->tail);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h

index 5d45ad4ecca9039817c26abe5c2369c91bf39089..2869aaa9d22597a58e9473d8a05356f93ae180a6 100644 (file)
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
  
  #include <linux/hashtable.h>
  #include <linux/irq_work.h>
+#include <linux/random.h>
  #include <linux/seqlock.h>
  
  #include "i915_gem_batch_pool.h"
@@ -119,7 +120,8 @@ struct intel_instdone {
  
  struct intel_engine_hangcheck {
         u64 acthd;
-       u32 seqno;
+       u32 last_seqno;
+       u32 next_seqno;
         unsigned long action_timestamp;
         struct intel_instdone instdone;
  };
@@ -726,6 +728,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  #define I915_GEM_HWS_INDEX_ADDR                (I915_GEM_HWS_INDEX * sizeof(u32))
  #define I915_GEM_HWS_PREEMPT           0x32
  #define I915_GEM_HWS_PREEMPT_ADDR      (I915_GEM_HWS_PREEMPT * sizeof(u32))
+#define I915_GEM_HWS_HANGCHECK         0x34
+#define I915_GEM_HWS_HANGCHECK_ADDR    (I915_GEM_HWS_HANGCHECK * sizeof(u32))
  #define I915_GEM_HWS_SEQNO             0x40
  #define I915_GEM_HWS_SEQNO_ADDR                (I915_GEM_HWS_SEQNO * sizeof(u32))
  #define I915_GEM_HWS_SCRATCH           0x80
@@ -1086,4 +1090,17 @@ static inline bool inject_preempt_hang(struct intel_engine_execlists *execlists)
  
  #endif
  
+static inline u32
+intel_engine_next_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+       return engine->hangcheck.next_seqno =
+               next_pseudo_random32(engine->hangcheck.next_seqno);
+}
+
+static inline u32
+intel_engine_get_hangcheck_seqno(struct intel_engine_cs *engine)
+{
+       return intel_read_status_page(engine, I915_GEM_HWS_HANGCHECK);
+}
+
  #endif /* _INTEL_RINGBUFFER_H_ */
author	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 26 Feb 2019 09:49:19 +0000 (09:49 +0000)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Tue, 26 Feb 2019 09:55:31 +0000 (09:55 +0000)
drivers/gpu/drm/i915/i915_debugfs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_engine_cs.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_hangcheck.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_lrc.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/intel_ringbuffer.h		patch \| blob \| history