drm/amdgpu: fix MGPU fan boost enablement for XGMI reset
authorEvan Quan <evan.quan@amd.com>
Wed, 26 Jun 2019 02:53:39 +0000 (10:53 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 1 Jul 2019 19:54:12 +0000 (14:54 -0500)
MGPU fan boost feature should not be enabled until all the
devices from the same hive are all back from reset.

Signed-off-by: Evan Quan <evan.quan@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

index 596f7e07b5a8803bed8ab660225b7778e9ffc1f1..10c4ce69347cfa72eba94f4fe312105fe8cc448f 100644 (file)
@@ -1216,6 +1216,10 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
 static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
 #endif
 
+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
 #include "amdgpu_object.h"
 
 /* used by df_v3_6.c and amdgpu_pmu.c */
index e886be292f8675a3606766603c9dbcfee2de5c4f..ea79763577c62b4ee2a0ea18db060785a9f8c4be 100644 (file)
@@ -3559,6 +3559,12 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
                                if (vram_lost)
                                        amdgpu_device_fill_reset_magic(tmp_adev);
 
+                               /*
+                                * Add this ASIC as tracked as reset was already
+                                * complete successfully.
+                                */
+                               amdgpu_register_gpu_instance(tmp_adev);
+
                                r = amdgpu_device_ip_late_init(tmp_adev);
                                if (r)
                                        goto out;
@@ -3693,6 +3699,13 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
                device_list_handle = &device_list;
        }
 
+       /*
+        * Mark these ASICs to be reseted as untracked first
+        * And add them back after reset completed
+        */
+       list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
+               amdgpu_unregister_gpu_instance(tmp_adev);
+
        /* block all schedulers and reset given job's ring */
        list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
                for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
index 5832cd8f4ff1a82747e1a8192062c08df2f6b22d..0cf7e8606fd3d8e6df5f43cc15d33536e512d4bb 100644 (file)
@@ -44,7 +44,7 @@
 #include "amdgpu_display.h"
 #include "amdgpu_ras.h"
 
-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
        struct amdgpu_gpu_instance *gpu_instance;
        int i;
@@ -105,7 +105,7 @@ done_free:
        dev->dev_private = NULL;
 }
 
-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
 {
        struct amdgpu_gpu_instance *gpu_instance;