From 3ff985485b29693376bb470a40b7aba4394a189b Mon Sep 17 00:00:00 2001 From: Oak Zeng Date: Thu, 1 Aug 2019 14:55:45 -0500 Subject: [PATCH] drm/amdgpu: Export function to flush TLB of specific vm hub This is for kfd to reuse amdgpu TLB invalidation function. On gfx10, kfd only needs to flush TLB on gfx hub but not on mm hub. So export a function for KFD flush TLB only on specific hub. Signed-off-by: Oak Zeng Reviewed-by: Christian Konig Signed-off-by: Alex Deucher --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 12 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 9 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 +- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 5 +- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 6 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 69 ++++++++++--------- 9 files changed, 78 insertions(+), 57 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 9d153cf39581..e262f2ac07a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -670,7 +670,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid, int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - int vmid; + int vmid, i; struct amdgpu_ring *ring = &adev->gfx.kiq.ring; uint32_t flush_type = 0; @@ -689,8 +689,9 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) { if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid) == pasid) { - amdgpu_gmc_flush_gpu_tlb(adev, vmid, - flush_type); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, + i, flush_type); break; } } @@ -702,6 +703,7 @@ int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; + int i; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("non kfd vmid %d\n", vmid); @@ -723,7 +725,9 @@ int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) * TODO 2: support range-based invalidation, requires kfg2kgd * interface change */ - amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index d79ab1da9e07..ac14d473a143 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -251,7 +251,9 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); + return 0; } @@ -312,7 +314,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS unsigned i,t,p; #endif - int r; + int r, i; if (!adev->gart.ready) { WARN(1, "trying to bind memory to uninitialized GART !\n"); @@ -336,7 +338,8 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index cac2ef84a1a1..b6e1d98ef01e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -89,8 +89,8 @@ struct amdgpu_vmhub { */ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ - void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type); + void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -181,7 +181,7 @@ struct amdgpu_gmc { struct ras_common_if *mmhub_ras_if; }; -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 7ae0e86ec6a7..79d3fbd3ba63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -1748,9 +1748,12 @@ static void gfx_v10_0_init_csb(struct amdgpu_device *adev) static void gfx_v10_0_init_pg(struct amdgpu_device *adev) { + int i; + gfx_v10_0_init_csb(adev); - amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0); /* TODO: init power gating */ return; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index ee16ec1a01bb..aafb16064338 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -230,8 +230,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, * * Flush the TLB for the requested page table. */ -static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct dma_fence *fence; @@ -244,7 +244,14 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); - gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + if (vmhub == AMDGPU_MMHUB_0) { + gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0); + mutex_unlock(&adev->mman.gtt_window_lock); + return; + } + + BUG_ON(vmhub != AMDGPU_GFXHUB_0); + if (!adev->mman.buffer_funcs_enabled || !adev->ib_pool_ready || adev->in_gpu_reset) { @@ -756,7 +763,8 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) gfxhub_v2_0_set_fault_enable_default(adev, value); mmhub_v2_0_set_fault_enable_default(adev, value); - gmc_v10_0_flush_gpu_tlb(adev, 0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0); + gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index 14073b506afe..f0f6c6da9f30 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -362,8 +362,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -571,7 +571,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index ca32915fbecb..d935a2f29e5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -433,8 +433,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -677,7 +677,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 909a8764703e..2c60e45e3fa0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -635,8 +635,8 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (VI). */ -static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -921,7 +921,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), (unsigned long long)table_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index ba4f939f657f..7da355bf6d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -453,44 +453,45 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, * * Flush the TLB for the requested page table using certain type. */ -static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid, uint32_t flush_type) +static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, + uint32_t vmhub, uint32_t flush_type) { const unsigned eng = 17; - unsigned i, j; + u32 j, tmp; + struct amdgpu_vmhub *hub; - for (i = 0; i < adev->num_vmhubs; ++i) { - struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + BUG_ON(vmhub >= adev->num_vmhubs); - /* This is necessary for a HW workaround under SRIOV as well - * as GFXOFF under bare metal - */ - if (adev->gfx.kiq.ring.sched.ready && - (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && - !adev->in_gpu_reset) { - uint32_t req = hub->vm_inv_eng0_req + eng; - uint32_t ack = hub->vm_inv_eng0_ack + eng; - - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, - 1 << vmid); - continue; - } + hub = &adev->vmhub[vmhub]; + tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); - spin_lock(&adev->gmc.invalidate_lock); - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); - for (j = 0; j < adev->usec_timeout; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - if (tmp & (1 << vmid)) - break; - udelay(1); - } - spin_unlock(&adev->gmc.invalidate_lock); - if (j < adev->usec_timeout) - continue; + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + 1 << vmid); + return; + } - DRM_ERROR("Timeout waiting for VM flush ACK!\n"); + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + for (j = 0; j < adev->usec_timeout; j++) { + tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); + if (tmp & (1 << vmid)) + break; + udelay(1); } + spin_unlock(&adev->gmc.invalidate_lock); + if (j < adev->usec_timeout) + return; + + DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -1296,7 +1297,7 @@ static void gmc_v9_0_init_golden_registers(struct amdgpu_device *adev) */ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) { - int r; + int r, i; bool value; u32 tmp; @@ -1353,7 +1354,9 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) mmhub_v9_4_set_fault_enable_default(adev, value); else mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0, 0); + + for (i = 0; i < adev->num_vmhubs; ++i) + gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), -- 2.30.2