drm/i915: Copy user requested buffers into the error state

author Chris Wilson <chris@chris-wilson.co.uk>

Sat, 15 Apr 2017 09:39:02 +0000 (10:39 +0100)

committer Chris Wilson <chris@chris-wilson.co.uk>

Sat, 15 Apr 2017 11:39:57 +0000 (12:39 +0100)
author Chris Wilson <chris@chris-wilson.co.uk>
Sat, 15 Apr 2017 09:39:02 +0000 (10:39 +0100)
committer Chris Wilson <chris@chris-wilson.co.uk>
Sat, 15 Apr 2017 11:39:57 +0000 (12:39 +0100)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c

index bd85e3826b72b318987d03ff9cebbbeff3480f33..cc7393e65e9959d66d22441397800d490d40e716 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -350,6 +350,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
         case I915_PARAM_HAS_EXEC_SOFTPIN:
         case I915_PARAM_HAS_EXEC_ASYNC:
         case I915_PARAM_HAS_EXEC_FENCE:
+       case I915_PARAM_HAS_EXEC_CAPTURE:
                 /* For the time being all of these are always true;
                  * if some supported hardware does not have one of these
                  * features this value needs to be provided from
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h

index 1af4e6f5410ceeefc3dcc75ea28f5c883a60f925..ed21f0afaaec790756f24bb745f77050b403f986 100644 (file)
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1025,6 +1025,9 @@ struct i915_gpu_state {
                         u32 *pages[0];
                 } *ringbuffer, *batchbuffer, *wa_batchbuffer, *ctx, *hws_page;
  
+               struct drm_i915_error_object **user_bo;
+               long user_bo_count;
+
                 struct drm_i915_error_object *wa_ctx;
  
                 struct drm_i915_error_request {
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

index a3e59c8ef27baf4f3584ff5016635d8005735af6..af1965774e7b7f4408c75e39081097e60b483d3a 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1114,6 +1114,18 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
         list_for_each_entry(vma, vmas, exec_list) {
                 struct drm_i915_gem_object *obj = vma->obj;
  
+               if (vma->exec_entry->flags & EXEC_OBJECT_CAPTURE) {
+                       struct i915_gem_capture_list *capture;
+
+                       capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+                       if (unlikely(!capture))
+                               return -ENOMEM;
+
+                       capture->next = req->capture_list;
+                       capture->vma = vma;
+                       req->capture_list = capture;
+               }
+
                 if (vma->exec_entry->flags & EXEC_OBJECT_ASYNC)
                         continue;
  
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c

index 313cdff7c6dd6e92d62179f6c751378afe8ca7d0..095cccc2e8b27e6bf4c5778db048bf297508ca30 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -292,6 +292,19 @@ static void advance_ring(struct drm_i915_gem_request *request)
         request->ring->head = tail;
  }
  
+static void free_capture_list(struct drm_i915_gem_request *request)
+{
+       struct i915_gem_capture_list *capture;
+
+       capture = request->capture_list;
+       while (capture) {
+               struct i915_gem_capture_list *next = capture->next;
+
+               kfree(capture);
+               capture = next;
+       }
+}
+
  static void i915_gem_request_retire(struct drm_i915_gem_request *request)
  {
         struct intel_engine_cs *engine = request->engine;
@@ -317,6 +330,8 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
         unreserve_seqno(request->engine);
         advance_ring(request);
  
+       free_capture_list(request);
+
         /* Walk through the active list, calling retire on each. This allows
          * objects to track their GPU activity and mark themselves as idle
          * when their *last* active request is completed (updating state
@@ -615,6 +630,7 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
         req->global_seqno = 0;
         req->file_priv = NULL;
         req->batch = NULL;
+       req->capture_list = NULL;
  
         /*
          * Reserve space in the ring buffer for all the commands required to
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h

index a211c53c813f75d9e38274643bfc73031eeafa5a..4ccab5affd3c22bdc4e6c7890b0df6f5636ac86d 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -73,6 +73,11 @@ struct i915_priotree {
  #define I915_PRIORITY_MIN (-I915_PRIORITY_MAX)
  };
  
+struct i915_gem_capture_list {
+       struct i915_gem_capture_list *next;
+       struct i915_vma *vma;
+};
+
  /**
   * Request queue structure.
   *
@@ -167,6 +172,12 @@ struct drm_i915_gem_request {
          * error state dump only).
          */
         struct i915_vma *batch;
+       /** Additional buffers requested by userspace to be captured upon
+        * a GPU hang. The vma/obj on this list are protected by their
+        * active reference - all objects on this list must also be
+        * on the active_list (of their final request).
+        */
+       struct i915_gem_capture_list *capture_list;
         struct list_head active_list;
  
         /** Time at which this request was emitted, in jiffies. */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c

index 8effc59f5cb572651bd7f98fba747821bef0dcc8..4b247b050dcd9e019e3ea8c4e561ef11ed6d4a79 100644 (file)
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -712,6 +712,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
                         print_error_obj(m, dev_priv->engine[i], NULL, obj);
                 }
  
+               for (j = 0; j < ee->user_bo_count; j++)
+                       print_error_obj(m, dev_priv->engine[i],
+                                       "user", ee->user_bo[j]);
+
                 if (ee->num_requests) {
                         err_printf(m, "%s --- %d requests\n",
                                    dev_priv->engine[i]->name,
@@ -825,11 +829,15 @@ void __i915_gpu_state_free(struct kref *error_ref)
  {
         struct i915_gpu_state *error =
                 container_of(error_ref, typeof(*error), ref);
-       int i;
+       long i, j;
  
         for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
                 struct drm_i915_error_engine *ee = &error->engine[i];
  
+               for (j = 0; j < ee->user_bo_count; j++)
+                       i915_error_object_free(ee->user_bo[j]);
+               kfree(ee->user_bo);
+
                 i915_error_object_free(ee->batchbuffer);
                 i915_error_object_free(ee->wa_batchbuffer);
                 i915_error_object_free(ee->ringbuffer);
@@ -1346,6 +1354,35 @@ static void record_context(struct drm_i915_error_context *e,
         e->active = ctx->active_count;
  }
  
+static void request_record_user_bo(struct drm_i915_gem_request *request,
+                                  struct drm_i915_error_engine *ee)
+{
+       struct i915_gem_capture_list *c;
+       struct drm_i915_error_object **bo;
+       long count;
+
+       count = 0;
+       for (c = request->capture_list; c; c = c->next)
+               count++;
+
+       bo = NULL;
+       if (count)
+               bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC);
+       if (!bo)
+               return;
+
+       count = 0;
+       for (c = request->capture_list; c; c = c->next) {
+               bo[count] = i915_error_object_create(request->i915, c->vma);
+               if (!bo[count])
+                       break;
+               count++;
+       }
+
+       ee->user_bo = bo;
+       ee->user_bo_count = count;
+}
+
  static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                                   struct i915_gpu_state *error)
  {
@@ -1392,6 +1429,7 @@ static void i915_gem_record_rings(struct drm_i915_private *dev_priv,
                                 ee->wa_batchbuffer =
                                         i915_error_object_create(dev_priv,
                                                                  engine->scratch);
+                       request_record_user_bo(request, ee);
  
                         ee->ctx =
                                 i915_error_object_create(dev_priv,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h

index 9ee06ec8a2d6b430deead76159a5adfd0bf62cc4..f24a80d2d42e0ebe9cb5d033706521430325b318 100644 (file)
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -412,6 +412,12 @@ typedef struct drm_i915_irq_wait {
   */
  #define I915_PARAM_HAS_EXEC_FENCE       44
  
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE     45
+
  typedef struct drm_i915_getparam {
         __s32 param;
         /*
@@ -775,8 +781,15 @@ struct drm_i915_gem_exec_object2 {
   * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
   */
  #define EXEC_OBJECT_ASYNC              (1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE            (1<<7)
  /* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_ASYNC<<1)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
         __u64 flags;
  
         union {
author	Chris Wilson <chris@chris-wilson.co.uk>
	Sat, 15 Apr 2017 09:39:02 +0000 (10:39 +0100)
committer	Chris Wilson <chris@chris-wilson.co.uk>
	Sat, 15 Apr 2017 11:39:57 +0000 (12:39 +0100)
drivers/gpu/drm/i915/i915_drv.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_drv.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_execbuffer.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_gem_request.h		patch \| blob \| history
drivers/gpu/drm/i915/i915_gpu_error.c		patch \| blob \| history
include/uapi/drm/i915_drm.h		patch \| blob \| history