drm/amdgpu: add reset_ras_error_count function for SDMA
authorHawking Zhang <Hawking.Zhang@amd.com>
Mon, 2 Mar 2020 03:54:28 +0000 (11:54 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 5 Mar 2020 05:32:32 +0000 (00:32 -0500)
SDMA ras error counters are dirty ones after cold reboot
Read operation is needed to reset them to 0

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c

index 485335267d780de5b082c78a288f41f570212a2b..4b352206354b884f5d6cae71d5a26af1008b5edc 100644 (file)
@@ -56,6 +56,7 @@ struct amdgpu_sdma_ras_funcs {
        void (*ras_fini)(struct amdgpu_device *adev);
        int (*query_ras_error_count)(struct amdgpu_device *adev,
                        uint32_t instance, void *ras_error_status);
+       void (*reset_ras_error_count)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_sdma {
index e55884d204bd70529afe7e61bedf36691556eb62..9159bd46482b1a0165f7820141e1a0f374bf68dd 100644 (file)
@@ -1801,13 +1801,9 @@ static int sdma_v4_0_late_init(void *handle)
        struct ras_ih_if ih_info = {
                .cb = sdma_v4_0_process_ras_data_cb,
        };
-       int i;
 
-       /* read back edc counter registers to clear the counters */
-       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
-               for (i = 0; i < adev->sdma.num_instances; i++)
-                       RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
-       }
+       if (adev->sdma.funcs && adev->sdma.funcs->reset_ras_error_count)
+               adev->sdma.funcs->reset_ras_error_count(adev);
 
        if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init)
                return adev->sdma.funcs->ras_late_init(adev, &ih_info);
@@ -2572,10 +2568,22 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
        return 0;
 };
 
+static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+       int i;
+
+       /* read back edc counter registers to clear the counters */
+       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) {
+               for (i = 0; i < adev->sdma.num_instances; i++)
+                       RREG32_SDMA(i, mmSDMA0_EDC_COUNTER);
+       }
+}
+
 static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = {
        .ras_late_init = amdgpu_sdma_ras_late_init,
        .ras_fini = amdgpu_sdma_ras_fini,
        .query_ras_error_count = sdma_v4_0_query_ras_error_count,
+       .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
 };
 
 static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)