drm/amdgpu: Fix page fault and kasan warning on pci device remove.
authorAndrey Grodzovsky <andrey.grodzovsky@amd.com>
Wed, 22 Aug 2018 14:07:35 +0000 (10:07 -0400)
committerAlex Deucher <alexander.deucher@amd.com>
Mon, 27 Aug 2018 16:09:40 +0000 (11:09 -0500)
Problem:
When executing echo 1 > /sys/class/drm/card0/device/remove kasan warning
as bellow and page fault happen because adev->gart.pages already freed by the
time amdgpu_gart_unbind is called.

BUG: KASAN: user-memory-access in amdgpu_gart_unbind+0x98/0x180 [amdgpu]
Write of size 8 at addr 0000000000003648 by task bash/1828
CPU: 2 PID: 1828 Comm: bash Tainted: G        W  O      4.18.0-rc1-dev+ #29
Hardware name: Gigabyte Technology Co., Ltd. AX370-Gaming/AX370-Gaming-CF, BIOS F3 06/19/2017
Call Trace:
dump_stack+0x71/0xab
kasan_report+0x109/0x390
amdgpu_gart_unbind+0x98/0x180 [amdgpu]
ttm_tt_unbind+0x43/0x60 [ttm]
ttm_bo_move_ttm+0x83/0x1c0 [ttm]
ttm_bo_handle_move_mem+0xb97/0xd00 [ttm]
ttm_bo_evict+0x273/0x530 [ttm]
ttm_mem_evict_first+0x29c/0x360 [ttm]
ttm_bo_force_list_clean+0xfc/0x210 [ttm]
ttm_bo_clean_mm+0xe7/0x160 [ttm]
amdgpu_ttm_fini+0xda/0x1d0 [amdgpu]
amdgpu_bo_fini+0xf/0x60 [amdgpu]
gmc_v8_0_sw_fini+0x36/0x70 [amdgpu]
amdgpu_device_fini+0x2d0/0x7d0 [amdgpu]
amdgpu_driver_unload_kms+0x6a/0xd0 [amdgpu]
drm_dev_unregister+0x79/0x180 [drm]
amdgpu_pci_remove+0x2a/0x60 [amdgpu]
pci_device_remove+0x5b/0x100
device_release_driver_internal+0x236/0x360
pci_stop_bus_device+0xbf/0xf0
pci_stop_and_remove_bus_device_locked+0x16/0x30
remove_store+0xda/0xf0
kernfs_fop_write+0x186/0x220
__vfs_write+0xcc/0x330
vfs_write+0xe6/0x250
ksys_write+0xb1/0x140
do_syscall_64+0x77/0x1e0
entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x7f66ebbb32c0

Fix:
Split gmc_v{6,7,8,9}_0_gart_fini to postpone amdgpu_gart_fini to after
memory managers are shut down since gart unbind happens
as part of this procedure

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
Reviewed-by: Junwei Zhang <Jerry.Zhang@amd.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c

index 75317f283c6967d2de4daaaf5dca4cfdaf9922b7..ad151fefa41f1ed1d6f19ae1783b13b1b76b4f2e 100644 (file)
@@ -632,12 +632,6 @@ static void gmc_v6_0_gart_disable(struct amdgpu_device *adev)
        amdgpu_gart_table_vram_unpin(adev);
 }
 
-static void gmc_v6_0_gart_fini(struct amdgpu_device *adev)
-{
-       amdgpu_gart_table_vram_free(adev);
-       amdgpu_gart_fini(adev);
-}
-
 static void gmc_v6_0_vm_decode_fault(struct amdgpu_device *adev,
                                     u32 status, u32 addr, u32 mc_client)
 {
@@ -935,8 +929,9 @@ static int gmc_v6_0_sw_fini(void *handle)
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
-       gmc_v6_0_gart_fini(adev);
+       amdgpu_gart_table_vram_free(adev);
        amdgpu_bo_fini(adev);
+       amdgpu_gart_fini(adev);
        release_firmware(adev->gmc.fw);
        adev->gmc.fw = NULL;
 
index 36dc367c4b45ea86a5a5b575ba357f9717ec92b0..f8d8a3a73e42b31397b97f743ab187da2732cd89 100644 (file)
@@ -746,19 +746,6 @@ static void gmc_v7_0_gart_disable(struct amdgpu_device *adev)
        amdgpu_gart_table_vram_unpin(adev);
 }
 
-/**
- * gmc_v7_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v7_0_gart_fini(struct amdgpu_device *adev)
-{
-       amdgpu_gart_table_vram_free(adev);
-       amdgpu_gart_fini(adev);
-}
-
 /**
  * gmc_v7_0_vm_decode_fault - print human readable fault info
  *
@@ -1095,8 +1082,9 @@ static int gmc_v7_0_sw_fini(void *handle)
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
        kfree(adev->gmc.vm_fault_info);
-       gmc_v7_0_gart_fini(adev);
+       amdgpu_gart_table_vram_free(adev);
        amdgpu_bo_fini(adev);
+       amdgpu_gart_fini(adev);
        release_firmware(adev->gmc.fw);
        adev->gmc.fw = NULL;
 
index 70fc97b59b4f2dcf157b49885c8356fedfca05a3..9333109b210de810119f0d15d94ec5d125a84cf7 100644 (file)
@@ -968,19 +968,6 @@ static void gmc_v8_0_gart_disable(struct amdgpu_device *adev)
        amdgpu_gart_table_vram_unpin(adev);
 }
 
-/**
- * gmc_v8_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v8_0_gart_fini(struct amdgpu_device *adev)
-{
-       amdgpu_gart_table_vram_free(adev);
-       amdgpu_gart_fini(adev);
-}
-
 /**
  * gmc_v8_0_vm_decode_fault - print human readable fault info
  *
@@ -1199,8 +1186,9 @@ static int gmc_v8_0_sw_fini(void *handle)
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
        kfree(adev->gmc.vm_fault_info);
-       gmc_v8_0_gart_fini(adev);
+       amdgpu_gart_table_vram_free(adev);
        amdgpu_bo_fini(adev);
+       amdgpu_gart_fini(adev);
        release_firmware(adev->gmc.fw);
        adev->gmc.fw = NULL;
 
index 399a5db27649728686868550502089391f3b0807..72f8018fa2a836572b9c898785bb99deecc1ca91 100644 (file)
@@ -942,26 +942,12 @@ static int gmc_v9_0_sw_init(void *handle)
        return 0;
 }
 
-/**
- * gmc_v9_0_gart_fini - vm fini callback
- *
- * @adev: amdgpu_device pointer
- *
- * Tears down the driver GART/VM setup (CIK).
- */
-static void gmc_v9_0_gart_fini(struct amdgpu_device *adev)
-{
-       amdgpu_gart_table_vram_free(adev);
-       amdgpu_gart_fini(adev);
-}
-
 static int gmc_v9_0_sw_fini(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
        amdgpu_gem_force_release(adev);
        amdgpu_vm_manager_fini(adev);
-       gmc_v9_0_gart_fini(adev);
 
        /*
        * TODO:
@@ -974,7 +960,9 @@ static int gmc_v9_0_sw_fini(void *handle)
        */
        amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL);
 
+       amdgpu_gart_table_vram_free(adev);
        amdgpu_bo_fini(adev);
+       amdgpu_gart_fini(adev);
 
        return 0;
 }