drm/amdgpu: add ras error count after each query (v2)
authorTao Zhou <tao.zhou1@amd.com>
Wed, 31 Jul 2019 12:28:13 +0000 (20:28 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 31 Jul 2019 19:49:33 +0000 (14:49 -0500)
v1: increase ras ce/ue error count
v2: log the number of correctable and uncorrectable errors

Signed-off-by: Tao Zhou <tao.zhou1@amd.com>
Signed-off-by: Hawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: Dennis Li <dennis.li@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c

index 3d39d624e9c629a09d876b66cbe21fdc32d15a29..a6134280b9417f8988beda522cde643b412f2acf 100644 (file)
@@ -601,9 +601,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
        default:
                break;
        }
+
+       obj->err_data.ue_count += err_data.ue_count;
+       obj->err_data.ce_count += err_data.ce_count;
+
        info->ue_count = obj->err_data.ue_count;
        info->ce_count = obj->err_data.ce_count;
 
+       if (err_data.ce_count)
+               dev_info(adev->dev, "%ld correctable errors detected in %s block\n",
+                        obj->err_data.ce_count, ras_block_str(info->head.block));
+       if (err_data.ue_count)
+               dev_info(adev->dev, "%ld uncorrectable errors detected in %s block\n",
+                        obj->err_data.ue_count, ras_block_str(info->head.block));
+
        return 0;
 }