drm/radeon: convert fence to uint64_t v4
authorJerome Glisse <jglisse@redhat.com>
Wed, 9 May 2012 13:34:46 +0000 (15:34 +0200)
committerDave Airlie <airlied@redhat.com>
Wed, 9 May 2012 16:22:17 +0000 (17:22 +0100)
This convert fence to use uint64_t sequence number intention is
to use the fact that uin64_t is big enough that we don't need to
care about wrap around.

Tested with and without writeback using 0xFFFFF000 as initial
fence sequence and thus allowing to test the wrap around from
32bits to 64bits.

v2: Add comment about possible race btw CPU & GPU, add comment
    stressing that we need 2 dword aligned for R600_WB_EVENT_OFFSET
    Read fence sequenc in reverse order of GPU write them so we
    mitigate the race btw CPU and GPU.

v3: Drop the need for ring to emit the 64bits fence, and just have
    each ring emit the lower 32bits of the fence sequence. We
    handle the wrap over 32bits in fence_process.

v4: Just a small optimization: Don't reread the last_seq value
    if loop restarts, since we already know its value anyway.
    Also start at zero not one for seq value and use pre instead
    of post increment in emmit, otherwise wait_empty will deadlock.

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_fence.c
drivers/gpu/drm/radeon/radeon_ring.c

index e99ea816d8c91487f28ffce6ac5dcd72e2ed842c..cdf46bc6dcc4f052ab4e30a88cd5f0a7915d2a01 100644 (file)
@@ -100,28 +100,32 @@ extern int radeon_lockup_timeout;
  * Copy from radeon_drv.h so we don't have to include both and have conflicting
  * symbol;
  */
-#define RADEON_MAX_USEC_TIMEOUT                100000  /* 100 ms */
-#define RADEON_FENCE_JIFFIES_TIMEOUT   (HZ / 2)
+#define RADEON_MAX_USEC_TIMEOUT                        100000  /* 100 ms */
+#define RADEON_FENCE_JIFFIES_TIMEOUT           (HZ / 2)
 /* RADEON_IB_POOL_SIZE must be a power of 2 */
-#define RADEON_IB_POOL_SIZE            16
-#define RADEON_DEBUGFS_MAX_COMPONENTS  32
-#define RADEONFB_CONN_LIMIT            4
-#define RADEON_BIOS_NUM_SCRATCH                8
+#define RADEON_IB_POOL_SIZE                    16
+#define RADEON_DEBUGFS_MAX_COMPONENTS          32
+#define RADEONFB_CONN_LIMIT                    4
+#define RADEON_BIOS_NUM_SCRATCH                        8
 
 /* max number of rings */
-#define RADEON_NUM_RINGS 3
+#define RADEON_NUM_RINGS                       3
+
+/* fence seq are set to this number when signaled */
+#define RADEON_FENCE_SIGNALED_SEQ              0LL
+#define RADEON_FENCE_NOTEMITED_SEQ             (~0LL)
 
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
-#define RADEON_RING_TYPE_GFX_INDEX  0
+#define RADEON_RING_TYPE_GFX_INDEX             0
 
 /* cayman has 2 compute CP rings */
-#define CAYMAN_RING_TYPE_CP1_INDEX 1
-#define CAYMAN_RING_TYPE_CP2_INDEX 2
+#define CAYMAN_RING_TYPE_CP1_INDEX             1
+#define CAYMAN_RING_TYPE_CP2_INDEX             2
 
 /* hardcode those limit for now */
-#define RADEON_VA_RESERVED_SIZE                (8 << 20)
-#define RADEON_IB_VM_MAX_SIZE          (64 << 10)
+#define RADEON_VA_RESERVED_SIZE                        (8 << 20)
+#define RADEON_IB_VM_MAX_SIZE                  (64 << 10)
 
 /*
  * Errata workarounds.
@@ -254,8 +258,9 @@ struct radeon_fence_driver {
        uint32_t                        scratch_reg;
        uint64_t                        gpu_addr;
        volatile uint32_t               *cpu_addr;
-       atomic_t                        seq;
-       uint32_t                        last_seq;
+       /* seq is protected by ring emission lock */
+       uint64_t                        seq;
+       atomic64_t                      last_seq;
        unsigned long                   last_activity;
        wait_queue_head_t               queue;
        struct list_head                emitted;
@@ -268,11 +273,9 @@ struct radeon_fence {
        struct kref                     kref;
        struct list_head                list;
        /* protected by radeon_fence.lock */
-       uint32_t                        seq;
-       bool                            emitted;
-       bool                            signaled;
+       uint64_t                        seq;
        /* RB, DMA, etc. */
-       int                             ring;
+       unsigned                        ring;
        struct radeon_semaphore         *semaphore;
 };
 
index 5bb78bf547ea6eed5aa4d46543f9e524b0700631..feb2bbc6ef6d74525b425163a5b9d3c22a5b26b1 100644 (file)
@@ -66,14 +66,14 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence *fence)
        unsigned long irq_flags;
 
        write_lock_irqsave(&rdev->fence_lock, irq_flags);
-       if (fence->emitted) {
+       if (fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
                write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
                return 0;
        }
-       fence->seq = atomic_add_return(1, &rdev->fence_drv[fence->ring].seq);
+       /* we are protected by the ring emission mutex */
+       fence->seq = ++rdev->fence_drv[fence->ring].seq;
        radeon_fence_ring_emit(rdev, fence->ring, fence);
        trace_radeon_fence_emit(rdev->ddev, fence->seq);
-       fence->emitted = true;
        /* are we the first fence on a previusly idle ring? */
        if (list_empty(&rdev->fence_drv[fence->ring].emitted)) {
                rdev->fence_drv[fence->ring].last_activity = jiffies;
@@ -87,14 +87,60 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
 {
        struct radeon_fence *fence;
        struct list_head *i, *n;
-       uint32_t seq;
+       uint64_t seq, last_seq;
+       unsigned count_loop = 0;
        bool wake = false;
 
-       seq = radeon_fence_read(rdev, ring);
-       if (seq == rdev->fence_drv[ring].last_seq)
-               return false;
+       /* Note there is a scenario here for an infinite loop but it's
+        * very unlikely to happen. For it to happen, the current polling
+        * process need to be interrupted by another process and another
+        * process needs to update the last_seq btw the atomic read and
+        * xchg of the current process.
+        *
+        * More over for this to go in infinite loop there need to be
+        * continuously new fence signaled ie radeon_fence_read needs
+        * to return a different value each time for both the currently
+        * polling process and the other process that xchg the last_seq
+        * btw atomic read and xchg of the current process. And the
+        * value the other process set as last seq must be higher than
+        * the seq value we just read. Which means that current process
+        * need to be interrupted after radeon_fence_read and before
+        * atomic xchg.
+        *
+        * To be even more safe we count the number of time we loop and
+        * we bail after 10 loop just accepting the fact that we might
+        * have temporarly set the last_seq not to the true real last
+        * seq but to an older one.
+        */
+       last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
+       do {
+               seq = radeon_fence_read(rdev, ring);
+               seq |= last_seq & 0xffffffff00000000LL;
+               if (seq < last_seq) {
+                       seq += 0x100000000LL;
+               }
 
-       rdev->fence_drv[ring].last_seq = seq;
+               if (!wake && seq == last_seq) {
+                       return false;
+               }
+               /* If we loop over we don't want to return without
+                * checking if a fence is signaled as it means that the
+                * seq we just read is different from the previous on.
+                */
+               wake = true;
+               if ((count_loop++) > 10) {
+                       /* We looped over too many time leave with the
+                        * fact that we might have set an older fence
+                        * seq then the current real last seq as signaled
+                        * by the hw.
+                        */
+                       break;
+               }
+               last_seq = seq;
+       } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);
+
+       /* reset wake to false */
+       wake = false;
        rdev->fence_drv[ring].last_activity = jiffies;
 
        n = NULL;
@@ -112,7 +158,7 @@ static bool radeon_fence_poll_locked(struct radeon_device *rdev, int ring)
                        n = i->prev;
                        list_move_tail(i, &rdev->fence_drv[ring].signaled);
                        fence = list_entry(i, struct radeon_fence, list);
-                       fence->signaled = true;
+                       fence->seq = RADEON_FENCE_SIGNALED_SEQ;
                        i = n;
                } while (i != &rdev->fence_drv[ring].emitted);
                wake = true;
@@ -128,7 +174,7 @@ static void radeon_fence_destroy(struct kref *kref)
        fence = container_of(kref, struct radeon_fence, kref);
        write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
        list_del(&fence->list);
-       fence->emitted = false;
+       fence->seq = RADEON_FENCE_NOTEMITED_SEQ;
        write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
        if (fence->semaphore)
                radeon_semaphore_free(fence->rdev, fence->semaphore);
@@ -145,9 +191,7 @@ int radeon_fence_create(struct radeon_device *rdev,
        }
        kref_init(&((*fence)->kref));
        (*fence)->rdev = rdev;
-       (*fence)->emitted = false;
-       (*fence)->signaled = false;
-       (*fence)->seq = 0;
+       (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ;
        (*fence)->ring = ring;
        (*fence)->semaphore = NULL;
        INIT_LIST_HEAD(&(*fence)->list);
@@ -163,18 +207,18 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
                return true;
 
        write_lock_irqsave(&fence->rdev->fence_lock, irq_flags);
-       signaled = fence->signaled;
+       signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ);
        /* if we are shuting down report all fence as signaled */
        if (fence->rdev->shutdown) {
                signaled = true;
        }
-       if (!fence->emitted) {
+       if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
                WARN(1, "Querying an unemitted fence : %p !\n", fence);
                signaled = true;
        }
        if (!signaled) {
                radeon_fence_poll_locked(fence->rdev, fence->ring);
-               signaled = fence->signaled;
+               signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ);
        }
        write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags);
        return signaled;
@@ -183,8 +227,8 @@ bool radeon_fence_signaled(struct radeon_fence *fence)
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
        struct radeon_device *rdev;
-       unsigned long irq_flags, timeout;
-       u32 seq;
+       unsigned long irq_flags, timeout, last_activity;
+       uint64_t seq;
        int i, r;
        bool signaled;
 
@@ -207,7 +251,9 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
                        timeout = 1;
                }
                /* save current sequence value used to check for GPU lockups */
-               seq = rdev->fence_drv[fence->ring].last_seq;
+               seq = atomic64_read(&rdev->fence_drv[fence->ring].last_seq);
+               /* Save current last activity valuee, used to check for GPU lockups */
+               last_activity = rdev->fence_drv[fence->ring].last_activity;
                read_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 
                trace_radeon_fence_wait_begin(rdev->ddev, seq);
@@ -235,24 +281,23 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
                        }
 
                        write_lock_irqsave(&rdev->fence_lock, irq_flags);
-                       /* check if sequence value has changed since last_activity */
-                       if (seq != rdev->fence_drv[fence->ring].last_seq) {
+                       /* test if somebody else has already decided that this is a lockup */
+                       if (last_activity != rdev->fence_drv[fence->ring].last_activity) {
                                write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
                                continue;
                        }
 
-                       /* change sequence value on all rings, so nobody else things there is a lockup */
-                       for (i = 0; i < RADEON_NUM_RINGS; ++i)
-                               rdev->fence_drv[i].last_seq -= 0x10000;
-
-                       rdev->fence_drv[fence->ring].last_activity = jiffies;
                        write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
 
                        if (radeon_ring_is_lockup(rdev, fence->ring, &rdev->ring[fence->ring])) {
-
                                /* good news we believe it's a lockup */
-                               printk(KERN_WARNING "GPU lockup (waiting for 0x%08X last fence id 0x%08X)\n",
-                                    fence->seq, seq);
+                               dev_warn(rdev->dev, "GPU lockup (waiting for 0x%016llx last fence id 0x%016llx)\n",
+                                        fence->seq, seq);
+
+                               /* change last activity so nobody else think there is a lockup */
+                               for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+                                       rdev->fence_drv[i].last_activity = jiffies;
+                               }
 
                                /* mark the ring as not ready any more */
                                rdev->ring[fence->ring].ready = false;
@@ -387,9 +432,9 @@ int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
        }
        rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
        rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
-       radeon_fence_write(rdev, atomic_read(&rdev->fence_drv[ring].seq), ring);
+       radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring);
        rdev->fence_drv[ring].initialized = true;
-       DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu addr 0x%p\n",
+       DRM_INFO("fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
                 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
        write_unlock_irqrestore(&rdev->fence_lock, irq_flags);
        return 0;
@@ -400,7 +445,8 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
        rdev->fence_drv[ring].scratch_reg = -1;
        rdev->fence_drv[ring].cpu_addr = NULL;
        rdev->fence_drv[ring].gpu_addr = 0;
-       atomic_set(&rdev->fence_drv[ring].seq, 0);
+       rdev->fence_drv[ring].seq = 0;
+       atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
        INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted);
        INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled);
        init_waitqueue_head(&rdev->fence_drv[ring].queue);
@@ -458,12 +504,12 @@ static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
                        continue;
 
                seq_printf(m, "--- ring %d ---\n", i);
-               seq_printf(m, "Last signaled fence 0x%08X\n",
-                          radeon_fence_read(rdev, i));
+               seq_printf(m, "Last signaled fence 0x%016lx\n",
+                          atomic64_read(&rdev->fence_drv[i].last_seq));
                if (!list_empty(&rdev->fence_drv[i].emitted)) {
                        fence = list_entry(rdev->fence_drv[i].emitted.prev,
                                           struct radeon_fence, list);
-                       seq_printf(m, "Last emitted fence %p with 0x%08X\n",
+                       seq_printf(m, "Last emitted fence %p with 0x%016llx\n",
                                   fence,  fence->seq);
                }
        }
index a4d60ae524b16b447eef91cf6ee4164d63ddf38f..4ae222bb3ec53279d2b8e3d84ff4ea2091e1e0d6 100644 (file)
@@ -82,7 +82,7 @@ bool radeon_ib_try_free(struct radeon_device *rdev, struct radeon_ib *ib)
        bool done = false;
 
        /* only free ib which have been emited */
-       if (ib->fence && ib->fence->emitted) {
+       if (ib->fence && ib->fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
                if (radeon_fence_signaled(ib->fence)) {
                        radeon_fence_unref(&ib->fence);
                        radeon_sa_bo_free(rdev, &ib->sa_bo);
@@ -149,8 +149,9 @@ retry:
        /* this should be rare event, ie all ib scheduled none signaled yet.
         */
        for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
-               if (rdev->ib_pool.ibs[idx].fence && rdev->ib_pool.ibs[idx].fence->emitted) {
-                       r = radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false);
+               struct radeon_fence *fence = rdev->ib_pool.ibs[idx].fence;
+               if (fence && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) {
+                       r = radeon_fence_wait(fence, false);
                        if (!r) {
                                goto retry;
                        }
@@ -173,7 +174,7 @@ void radeon_ib_free(struct radeon_device *rdev, struct radeon_ib **ib)
                return;
        }
        radeon_mutex_lock(&rdev->ib_pool.mutex);
-       if (tmp->fence && !tmp->fence->emitted) {
+       if (tmp->fence && tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ) {
                radeon_sa_bo_free(rdev, &tmp->sa_bo);
                radeon_fence_unref(&tmp->fence);
        }