drm/amdgpu: fix sdma v4 startup under SRIOV
authorChristian König <christian.koenig@amd.com>
Wed, 26 Sep 2018 17:56:41 +0000 (19:56 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 5 Nov 2018 19:20:53 +0000 (14:20 -0500)
Under SRIOV we were enabling the ring buffer before it was initialized.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index 7a8c9172d30a946fd91d147f8c73267a51b1fb08..fde27d8bfeb285e118c8810b33c8545cbed9e75f 100644 (file)
@@ -675,13 +675,14 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
  * sdma_v4_0_gfx_resume - setup and start the async dma engines
  *
  * @adev: amdgpu_device pointer
+ * @i: instance to resume
  *
  * Set up the gfx DMA ring buffers and enable them (VEGA10).
  * Returns 0 for success, error for failure.
  */
-static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i)
 {
-       struct amdgpu_ring *ring;
+       struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
        u32 rb_cntl, ib_cntl, wptr_poll_cntl;
        u32 rb_bufsz;
        u32 wb_offset;
@@ -689,129 +690,108 @@ static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev)
        u32 doorbell_offset;
        u32 temp;
        u64 wptr_gpu_addr;
-       int i, r;
 
-       for (i = 0; i < adev->sdma.num_instances; i++) {
-               ring = &adev->sdma.instance[i].ring;
-               wb_offset = (ring->rptr_offs * 4);
+       wb_offset = (ring->rptr_offs * 4);
 
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0);
 
-               /* Set ring buffer size in dwords */
-               rb_bufsz = order_base_2(ring->ring_size / 4);
-               rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
-               rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
+       /* Set ring buffer size in dwords */
+       rb_bufsz = order_base_2(ring->ring_size / 4);
+       rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL));
+       rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
 #ifdef __BIG_ENDIAN
-               rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
-               rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
-                                       RPTR_WRITEBACK_SWAP_ENABLE, 1);
+       rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
+       rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
+                               RPTR_WRITEBACK_SWAP_ENABLE, 1);
 #endif
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
-               /* Initialize the ring buffer's read and write pointers */
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
+       /* Initialize the ring buffer's read and write pointers */
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0);
 
-               /* set the wb address whether it's enabled or not */
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
-                      upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
-                      lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
+       /* set the wb address whether it's enabled or not */
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI),
+              upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO),
+              lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
 
-               rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
+       rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
 
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40);
 
-               ring->wptr = 0;
+       ring->wptr = 0;
 
-               /* before programing wptr to a less value, need set minor_ptr_update first */
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
+       /* before programing wptr to a less value, need set minor_ptr_update first */
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1);
 
-               if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
-                       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
-                       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
-               }
+       if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */
+               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2);
+               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2);
+       }
 
-               doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
-               doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
+       doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL));
+       doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET));
 
-               if (ring->use_doorbell) {
-                       doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
-                       doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
-                                       OFFSET, ring->doorbell_index);
-               } else {
-                       doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
-               }
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
-               adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
-                                                     ring->doorbell_index);
-
-               if (amdgpu_sriov_vf(adev))
-                       sdma_v4_0_ring_set_wptr(ring);
-
-               /* set minor_ptr_update to 0 after wptr programed */
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
-
-               /* set utc l1 enable flag always to 1 */
-               temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
-               temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
-
-               if (!amdgpu_sriov_vf(adev)) {
-                       /* unhalt engine */
-                       temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
-                       temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
-                       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
-               }
+       if (ring->use_doorbell) {
+               doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1);
+               doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET,
+                               OFFSET, ring->doorbell_index);
+       } else {
+               doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0);
+       }
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset);
+       adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell,
+                                             ring->doorbell_index);
 
-               /* setup the wptr shadow polling */
-               wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
-                      lower_32_bits(wptr_gpu_addr));
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
-                      upper_32_bits(wptr_gpu_addr));
-               wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
-               if (amdgpu_sriov_vf(adev))
-                       wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
-               else
-                       wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
+       if (amdgpu_sriov_vf(adev))
+               sdma_v4_0_ring_set_wptr(ring);
 
-               /* enable DMA RB */
-               rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
+       /* set minor_ptr_update to 0 after wptr programed */
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0);
 
-               ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
-               ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
-#ifdef __BIG_ENDIAN
-               ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
-#endif
-               /* enable DMA IBs */
-               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
+       /* set utc l1 enable flag always to 1 */
+       temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL));
+       temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp);
 
-               ring->ready = true;
+       if (!amdgpu_sriov_vf(adev)) {
+               /* unhalt engine */
+               temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
+               temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0);
+               WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp);
+       }
 
-               if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
-                       sdma_v4_0_ctx_switch_enable(adev, true);
-                       sdma_v4_0_enable(adev, true);
-               }
+       /* setup the wptr shadow polling */
+       wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO),
+              lower_32_bits(wptr_gpu_addr));
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI),
+              upper_32_bits(wptr_gpu_addr));
+       wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL));
+       if (amdgpu_sriov_vf(adev))
+               wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1);
+       else
+               wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl);
 
-               r = amdgpu_ring_test_ring(ring);
-               if (r) {
-                       ring->ready = false;
-                       return r;
-               }
+       /* enable DMA RB */
+       rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl);
 
-               if (adev->mman.buffer_funcs_ring == ring)
-                       amdgpu_ttm_set_buffer_funcs_status(adev, true);
-
-       }
+       ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL));
+       ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
+#ifdef __BIG_ENDIAN
+       ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
+#endif
+       /* enable DMA IBs */
+       WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl);
 
-       return 0;
+       ring->ready = true;
 }
 
 static void
@@ -943,33 +923,51 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev)
  */
 static int sdma_v4_0_start(struct amdgpu_device *adev)
 {
-       int r = 0;
+       struct amdgpu_ring *ring;
+       int i, r;
 
        if (amdgpu_sriov_vf(adev)) {
                sdma_v4_0_ctx_switch_enable(adev, false);
                sdma_v4_0_enable(adev, false);
+       } else {
 
-               /* set RB registers */
-               r = sdma_v4_0_gfx_resume(adev);
-               return r;
+               if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+                       r = sdma_v4_0_load_microcode(adev);
+                       if (r)
+                               return r;
+               }
+
+               /* unhalt the MEs */
+               sdma_v4_0_enable(adev, true);
+               /* enable sdma ring preemption */
+               sdma_v4_0_ctx_switch_enable(adev, true);
        }
 
-       if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-               r = sdma_v4_0_load_microcode(adev);
+       /* start the gfx rings and rlc compute queues */
+       for (i = 0; i < adev->sdma.num_instances; i++)
+               sdma_v4_0_gfx_resume(adev, i);
+
+       if (amdgpu_sriov_vf(adev)) {
+               sdma_v4_0_ctx_switch_enable(adev, true);
+               sdma_v4_0_enable(adev, true);
+       } else {
+               r = sdma_v4_0_rlc_resume(adev);
                if (r)
                        return r;
        }
 
-       /* unhalt the MEs */
-       sdma_v4_0_enable(adev, true);
-       /* enable sdma ring preemption */
-       sdma_v4_0_ctx_switch_enable(adev, true);
+       for (i = 0; i < adev->sdma.num_instances; i++) {
+               ring = &adev->sdma.instance[i].ring;
 
-       /* start the gfx rings and rlc compute queues */
-       r = sdma_v4_0_gfx_resume(adev);
-       if (r)
-               return r;
-       r = sdma_v4_0_rlc_resume(adev);
+               r = amdgpu_ring_test_ring(ring);
+               if (r) {
+                       ring->ready = false;
+                       return r;
+               }
+
+               if (adev->mman.buffer_funcs_ring == ring)
+                       amdgpu_ttm_set_buffer_funcs_status(adev, true);
+       }
 
        return r;
 }