From b97dfa27ef3ad3eddd2cb97a3b6a140d7037827a Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 11 Jul 2018 22:32:49 -0400 Subject: [PATCH] drm/amdgpu: save vm fault information for amdkfd MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit amdgpu save the vm fault related information for KFD usage and keep the copy until KFD read it. Signed-off-by: shaoyun liu Signed-off-by: Felix Kuehling Acked-by: Christian König Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 3 ++ .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 14 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 ++++++++++++++++++- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 33 ++++++++++++++++++- .../gpu/drm/amd/include/kgd_kfd_interface.h | 20 +++++++++++ 8 files changed, 105 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index a8418a3f4e9d..3dc76d9b4d12 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -183,6 +183,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, struct dma_fence **ef); +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index ea79908dac4c..befc7c48b1cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -216,6 +216,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index 19dd665e7307..c68ef85f7753 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -176,6 +176,7 @@ static const struct kfd2kgd_calls kfd2kgd = { .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, .submit_ib = amdgpu_amdkfd_submit_ib, + .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fa38a960ce00..8a707d8bbb1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1621,6 +1621,20 @@ bo_reserve_failed: return ret; } +int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, + struct kfd_vm_fault_info *mem) +{ + struct amdgpu_device *adev; + + adev = (struct amdgpu_device *)kgd; + if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) { + *mem = *adev->gmc.vm_fault_info; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + } + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 6cb4948233cb..bb5a47a45790 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -105,6 +105,8 @@ struct amdgpu_gmc { /* protects concurrent invalidation */ spinlock_t invalidate_lock; bool translate_further; + struct kfd_vm_fault_info *vm_fault_info; + atomic_t vm_fault_info_updated; const struct amdgpu_gmc_funcs *gmc_funcs; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index 10920f0bd85f..36dc367c4b45 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -28,6 +28,7 @@ #include "cik.h" #include "gmc_v7_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "bif/bif_4_1_d.h" #include "bif/bif_4_1_sh_mask.h" @@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v7_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR); status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); @@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 75f3ffb2891e..70fc97b59b4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "gmc_v8_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_amdkfd.h" #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" @@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle) adev->vm_manager.vram_base_offset = 0; } + adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info), + GFP_KERNEL); + if (!adev->gmc.vm_fault_info) + return -ENOMEM; + atomic_set(&adev->gmc.vm_fault_info_updated, 0); + return 0; } @@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); + kfree(adev->gmc.vm_fault_info); gmc_v8_0_gart_fini(adev); amdgpu_bo_fini(adev); release_firmware(adev->gmc.fw); @@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - u32 addr, status, mc_client; + u32 addr, status, mc_client, vmid; if (amdgpu_sriov_vf(adev)) { dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n", @@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev, entry->pasid); } + vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, + VMID); + if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid) + && !atomic_read(&adev->gmc.vm_fault_info_updated)) { + struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info; + u32 protections = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + PROTECTIONS); + + info->vmid = vmid; + info->mc_id = REG_GET_FIELD(status, + VM_CONTEXT1_PROTECTION_FAULT_STATUS, + MEMORY_CLIENT_ID); + info->status = status; + info->page_addr = addr; + info->prot_valid = protections & 0x7 ? true : false; + info->prot_read = protections & 0x8 ? true : false; + info->prot_write = protections & 0x10 ? true : false; + info->prot_exec = protections & 0x20 ? true : false; + mb(); + atomic_set(&adev->gmc.vm_fault_info_updated, 1); + } + return 0; } diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 5733fbee07f7..28b11d105288 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -47,6 +47,17 @@ enum kfd_preempt_type { KFD_PREEMPT_TYPE_WAVEFRONT_RESET, }; +struct kfd_vm_fault_info { + uint64_t page_addr; + uint32_t vmid; + uint32_t mc_id; + uint32_t status; + bool prot_valid; + bool prot_read; + bool prot_write; + bool prot_exec; +}; + struct kfd_cu_info { uint32_t num_shader_engines; uint32_t num_shader_arrays_per_engine; @@ -259,6 +270,12 @@ struct tile_config { * IB to the corresponding ring (ring type). The IB is executed with the * specified VMID in a user mode context. * + * @get_vm_fault_info: Return information about a recent VM fault on + * GFXv7 and v8. If multiple VM faults occurred since the last call of + * this function, it will return information about the first of those + * faults. On GFXv9 VM fault information is fully contained in the IH + * packet and this function is not needed. + * * This structure contains function pointers to services that the kgd driver * provides to amdkfd driver. * @@ -374,6 +391,9 @@ struct kfd2kgd_calls { int (*submit_ib)(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len); + + int (*get_vm_fault_info)(struct kgd_dev *kgd, + struct kfd_vm_fault_info *info); }; /** -- 2.30.2