This is for kfd to reuse amdgpu TLB invalidation function.
On gfx10, kfd only needs to flush TLB on gfx hub but not
on mm hub. So export a function for KFD flush TLB only on
specific hub.
Signed-off-by: Oak Zeng <Oak.Zeng@amd.com>
Reviewed-by: Christian Konig <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
- int vmid;
+ int vmid, i;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
uint32_t flush_type = 0;
if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
if (kgd_gfx_v9_get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
== pasid) {
- amdgpu_gmc_flush_gpu_tlb(adev, vmid,
- flush_type);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+ i, flush_type);
break;
}
}
int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
{
struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+ int i;
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
pr_err("non kfd vmid %d\n", vmid);
* TODO 2: support range-based invalidation, requires kfg2kgd
* interface change
*/
- amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+
return 0;
}
}
mb();
amdgpu_asic_flush_hdp(adev, NULL);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
+
return 0;
}
#ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
unsigned i,t,p;
#endif
- int r;
+ int r, i;
if (!adev->gart.ready) {
WARN(1, "trying to bind memory to uninitialized GART !\n");
mb();
amdgpu_asic_flush_hdp(adev, NULL);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
return 0;
}
*/
struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
- void (*flush_gpu_tlb)(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type);
+ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
uint64_t pd_addr);
struct ras_common_if *mmhub_ras_if;
};
-#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type))
+#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))
#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags))
static void gfx_v10_0_init_pg(struct amdgpu_device *adev)
{
+ int i;
+
gfx_v10_0_init_csb(adev);
- amdgpu_gmc_flush_gpu_tlb(adev, 0, 0);
+ for (i = 0; i < adev->num_vmhubs; i++)
+ amdgpu_gmc_flush_gpu_tlb(adev, 0, i, 0);
/* TODO: init power gating */
return;
*
* Flush the TLB for the requested page table.
*/
-static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
struct dma_fence *fence;
mutex_lock(&adev->mman.gtt_window_lock);
- gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+ if (vmhub == AMDGPU_MMHUB_0) {
+ gmc_v10_0_flush_vm_hub(adev, vmid, AMDGPU_MMHUB_0, 0);
+ mutex_unlock(&adev->mman.gtt_window_lock);
+ return;
+ }
+
+ BUG_ON(vmhub != AMDGPU_GFXHUB_0);
+
if (!adev->mman.buffer_funcs_enabled ||
!adev->ib_pool_ready ||
adev->in_gpu_reset) {
gfxhub_v2_0_set_fault_enable_default(adev, value);
mmhub_v2_0_set_fault_enable_default(adev, value);
- gmc_v10_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_MMHUB_0, 0);
+ gmc_v10_0_flush_gpu_tlb(adev, 0, AMDGPU_GFXHUB_0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
return 0;
}
-static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
}
else
gmc_v6_0_set_fault_enable_default(adev, true);
- gmc_v6_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v6_0_flush_gpu_tlb(adev, 0, 0, 0);
dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
*
* Flush the TLB for the requested page table (CIK).
*/
-static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
WREG32(mmCHUB_CONTROL, tmp);
}
- gmc_v7_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v7_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
*
* Flush the TLB for the requested page table (VI).
*/
-static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
/* bits 0-15 are the VM contexts0-15 */
WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid);
else
gmc_v8_0_set_fault_enable_default(adev, true);
- gmc_v8_0_flush_gpu_tlb(adev, 0, 0);
+ gmc_v8_0_flush_gpu_tlb(adev, 0, 0, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),
(unsigned long long)table_addr);
*
* Flush the TLB for the requested page table using certain type.
*/
-static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev,
- uint32_t vmid, uint32_t flush_type)
+static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
+ uint32_t vmhub, uint32_t flush_type)
{
const unsigned eng = 17;
- unsigned i, j;
+ u32 j, tmp;
+ struct amdgpu_vmhub *hub;
- for (i = 0; i < adev->num_vmhubs; ++i) {
- struct amdgpu_vmhub *hub = &adev->vmhub[i];
- u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+ BUG_ON(vmhub >= adev->num_vmhubs);
- /* This is necessary for a HW workaround under SRIOV as well
- * as GFXOFF under bare metal
- */
- if (adev->gfx.kiq.ring.sched.ready &&
- (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
- !adev->in_gpu_reset) {
- uint32_t req = hub->vm_inv_eng0_req + eng;
- uint32_t ack = hub->vm_inv_eng0_ack + eng;
-
- amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
- 1 << vmid);
- continue;
- }
+ hub = &adev->vmhub[vmhub];
+ tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
- spin_lock(&adev->gmc.invalidate_lock);
- WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
- for (j = 0; j < adev->usec_timeout; j++) {
- tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
- if (tmp & (1 << vmid))
- break;
- udelay(1);
- }
- spin_unlock(&adev->gmc.invalidate_lock);
- if (j < adev->usec_timeout)
- continue;
+ /* This is necessary for a HW workaround under SRIOV as well
+ * as GFXOFF under bare metal
+ */
+ if (adev->gfx.kiq.ring.sched.ready &&
+ (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) &&
+ !adev->in_gpu_reset) {
+ uint32_t req = hub->vm_inv_eng0_req + eng;
+ uint32_t ack = hub->vm_inv_eng0_ack + eng;
+
+ amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,
+ 1 << vmid);
+ return;
+ }
- DRM_ERROR("Timeout waiting for VM flush ACK!\n");
+ spin_lock(&adev->gmc.invalidate_lock);
+ WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
+ for (j = 0; j < adev->usec_timeout; j++) {
+ tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng);
+ if (tmp & (1 << vmid))
+ break;
+ udelay(1);
}
+ spin_unlock(&adev->gmc.invalidate_lock);
+ if (j < adev->usec_timeout)
+ return;
+
+ DRM_ERROR("Timeout waiting for VM flush ACK!\n");
}
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
*/
static int gmc_v9_0_gart_enable(struct amdgpu_device *adev)
{
- int r;
+ int r, i;
bool value;
u32 tmp;
mmhub_v9_4_set_fault_enable_default(adev, value);
else
mmhub_v1_0_set_fault_enable_default(adev, value);
- gmc_v9_0_flush_gpu_tlb(adev, 0, 0);
+
+ for (i = 0; i < adev->num_vmhubs; ++i)
+ gmc_v9_0_flush_gpu_tlb(adev, 0, i, 0);
DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
(unsigned)(adev->gmc.gart_size >> 20),