drm/amdgpu: add reset_ras_error_count function for MMHUB
authorHawking Zhang <Hawking.Zhang@amd.com>
Mon, 2 Mar 2020 04:14:20 +0000 (12:14 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 5 Mar 2020 05:32:40 +0000 (00:32 -0500)
MMHUB ras error counters are dirty ones after cold reboot
Read operation is needed to reset them to 0

Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Tao Zhou <tao.zhou1@amd.com>
Reviewed-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c

index 1cd78940cf828b88a1a49b8de8ecfffae88b5ae5..e89fb35fec713f62f5156f27541b5b7911130828 100644 (file)
@@ -26,6 +26,7 @@ struct amdgpu_mmhub_funcs {
        int (*ras_late_init)(struct amdgpu_device *adev);
        void (*query_ras_error_count)(struct amdgpu_device *adev,
                                        void *ras_error_status);
+       void (*reset_ras_error_count)(struct amdgpu_device *adev);
 };
 
 struct amdgpu_mmhub {
index 10171acbf3e1b37bdc6d0b35a3a205f3424ee734..6ceaab5531309db786d0ec16391c4ba7595b8a10 100644 (file)
@@ -948,6 +948,9 @@ static int gmc_v9_0_late_init(void *handle)
                }
        }
 
+       if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
+               adev->mmhub.funcs->reset_ras_error_count(adev);
+
        r = amdgpu_gmc_ras_late_init(adev);
        if (r)
                return r;
index 49a3a56ec017d65506a943af107ae1a6f185fc9c..396c2a624de08031f3c9e0d71248e2ca89aeb835 100644 (file)
@@ -747,7 +747,19 @@ static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
        err_data->ue_count += ded_count;
 }
 
+static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+       uint32_t i;
+
+       /* read back edc counter registers to reset the counters to 0 */
+       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++)
+                       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+       }
+}
+
 const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
        .ras_late_init = amdgpu_mmhub_ras_late_init,
        .query_ras_error_count = mmhub_v1_0_query_ras_error_count,
+       .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
 };
index a5281df8d84fbd75676420bf6ba16d4ac79af27c..0d413fabd015c9276e5370ca79a2e7079f22459d 100644 (file)
@@ -1596,7 +1596,19 @@ static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
        err_data->ue_count += ded_count;
 }
 
+static void mmhub_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+       uint32_t i;
+
+       /* read back edc counter registers to reset the counters to 0 */
+       if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+               for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++)
+                       RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+       }
+}
+
 const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
        .ras_late_init = amdgpu_mmhub_ras_late_init,
        .query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+       .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
 };