drm/amdgpu: add high priority compute support for gfx9
authorAndres Rodriguez <andresx7@gmail.com>
Tue, 2 Jan 2018 20:49:40 +0000 (15:49 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 19 Feb 2018 19:17:10 +0000 (14:17 -0500)
We follow the same approach as gfx8. The only changes are register
access macros.

Tested on vega10. The execution latency results fall within the expected
ranges from the polaris10 data.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Andres Rodriguez <andresx7@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c

index c06479615e8a4c69fc3baa2f4802ffcb9dbefdb5..ee5464b43e9eb5fbcd6b114b4e2af60b39aee7be 100644 (file)
@@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
        return wptr;
 }
 
+static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
+                                          bool acquire)
+{
+       struct amdgpu_device *adev = ring->adev;
+       int pipe_num, tmp, reg;
+       int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
+
+       pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
+
+       /* first me only has 2 entries, GFX and HP3D */
+       if (ring->me > 0)
+               pipe_num -= 2;
+
+       reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
+       tmp = RREG32(reg);
+       tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
+       WREG32(reg, tmp);
+}
+
+static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
+                                           struct amdgpu_ring *ring,
+                                           bool acquire)
+{
+       int i, pipe;
+       bool reserve;
+       struct amdgpu_ring *iring;
+
+       mutex_lock(&adev->gfx.pipe_reserve_mutex);
+       pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
+       if (acquire)
+               set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+       else
+               clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+
+       if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
+               /* Clear all reservations - everyone reacquires all resources */
+               for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
+                       gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
+                                                      true);
+
+               for (i = 0; i < adev->gfx.num_compute_rings; ++i)
+                       gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
+                                                      true);
+       } else {
+               /* Lower all pipes without a current reservation */
+               for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
+                       iring = &adev->gfx.gfx_ring[i];
+                       pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                      iring->me,
+                                                      iring->pipe,
+                                                      0);
+                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                       gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+               }
+
+               for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
+                       iring = &adev->gfx.compute_ring[i];
+                       pipe = amdgpu_gfx_queue_to_bit(adev,
+                                                      iring->me,
+                                                      iring->pipe,
+                                                      0);
+                       reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
+                       gfx_v9_0_ring_set_pipe_percent(iring, reserve);
+               }
+       }
+
+       mutex_unlock(&adev->gfx.pipe_reserve_mutex);
+}
+
+static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
+                                     struct amdgpu_ring *ring,
+                                     bool acquire)
+{
+       uint32_t pipe_priority = acquire ? 0x2 : 0x0;
+       uint32_t queue_priority = acquire ? 0xf : 0x0;
+
+       mutex_lock(&adev->srbm_mutex);
+       soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
+
+       WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
+       WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
+
+       soc15_grbm_select(adev, 0, 0, 0, 0);
+       mutex_unlock(&adev->srbm_mutex);
+}
+
+static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
+                                              enum drm_sched_priority priority)
+{
+       struct amdgpu_device *adev = ring->adev;
+       bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
+
+       if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
+               return;
+
+       gfx_v9_0_hqd_set_priority(adev, ring, acquire);
+       gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
+}
+
 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
 {
        struct amdgpu_device *adev = ring->adev;
@@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
        .test_ib = gfx_v9_0_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .pad_ib = amdgpu_ring_generic_pad_ib,
+       .set_priority = gfx_v9_0_ring_set_priority_compute,
 };
 
 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {