drm/nouveau/fifo/gk104-: refactor recovery code
authorBen Skeggs <bskeggs@redhat.com>
Wed, 18 Jan 2017 05:22:43 +0000 (15:22 +1000)
committerBen Skeggs <bskeggs@redhat.com>
Fri, 17 Feb 2017 07:38:13 +0000 (17:38 +1000)
This will serve as a basis for implementing some improvements to how
we recover the GPU from channel errors.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c

index 32c35397ae64906a0bb32c295164b08e6a6e35ae..c45f732366b4e11cf9ae75f62b8767caa5c2d28e 100644 (file)
@@ -27,6 +27,7 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
@@ -211,33 +212,98 @@ gk104_fifo_recover_work(struct work_struct *w)
        nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
+static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
 static void
-gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-                  struct gk104_fifo_chan *chan)
+gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
 {
        struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
        struct nvkm_device *device = subdev->device;
-       u32 chid = chan->base.chid;
-       int engn;
+       const u32 runm = BIT(runl);
 
-       nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-                  nvkm_subdev_name[engine->subdev.index], chid);
        assert_spin_locked(&fifo->base.lock);
+       if (fifo->recover.runm & runm)
+               return;
+       fifo->recover.runm |= runm;
 
-       nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800);
-       list_del_init(&chan->head);
-       chan->killed = true;
+       /* Block runlist to prevent channel assignment(s) from changing. */
+       nvkm_mask(device, 0x002630, runm, runm);
 
-       for (engn = 0; engn < fifo->engine_nr; engn++) {
-               if (fifo->engine[engn].engine == engine) {
-                       fifo->recover.engm |= BIT(engn);
+       /* Schedule recovery. */
+       nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+       schedule_work(&fifo->recover.work);
+}
+
+static void
+gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+       struct gk104_fifo *fifo = gk104_fifo(base);
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       struct nvkm_device *device = subdev->device;
+       const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+       const u32  runl = (stat & 0x000f0000) >> 16;
+       const bool used = (stat & 0x00000001);
+       struct gk104_fifo_chan *chan;
+
+       assert_spin_locked(&fifo->base.lock);
+       if (!used)
+               return;
+
+       /* Lookup SW state for channel, and mark it as dead. */
+       list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+               if (chan->base.chid == chid) {
+                       list_del_init(&chan->head);
+                       chan->killed = true;
+                       nvkm_fifo_kevent(&fifo->base, chid);
                        break;
                }
        }
 
-       fifo->recover.runm |= BIT(chan->runl);
+       /* Disable channel. */
+       nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+       nvkm_warn(subdev, "channel %d: killed\n", chid);
+}
+
+static void
+gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+       struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+       const u32 runl = fifo->engine[engn].runl;
+       const u32 engm = BIT(engn);
+       struct gk104_fifo_engine_status status;
+
+       assert_spin_locked(&fifo->base.lock);
+       if (fifo->recover.engm & engm)
+               return;
+       fifo->recover.engm |= engm;
+
+       /* Block channel assignments from changing during recovery. */
+       gk104_fifo_recover_runl(fifo, runl);
+
+       /* Determine which channel (if any) is currently on the engine. */
+       gk104_fifo_engine_status(fifo, engn, &status);
+       if (status.chan) {
+               /* The channel is not longer viable, kill it. */
+               gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+       }
+
+       /* Schedule recovery. */
+       nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
        schedule_work(&fifo->recover.work);
-       nvkm_fifo_kevent(&fifo->base, chid);
+}
+
+static void
+gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
+                  struct gk104_fifo_chan *chan)
+{
+       int engn;
+       for (engn = 0; engn < fifo->engine_nr; engn++) {
+               if (fifo->engine[engn].engine == engine) {
+                       gk104_fifo_recover_engn(fifo, engn);
+                       break;
+               }
+       }
+       gk104_fifo_recover_chan(&fifo->base, chan->base.chid);
 }
 
 static const struct nvkm_enum
@@ -771,6 +837,7 @@ gk104_fifo_ = {
        .intr = gk104_fifo_intr,
        .uevent_init = gk104_fifo_uevent_init,
        .uevent_fini = gk104_fifo_uevent_fini,
+       .recover_chan = gk104_fifo_recover_chan,
        .class_get = gk104_fifo_class_get,
 };