On GFX7 the CP does not perform a TC flush when queues are unmapped.
To avoid TC eviction from accessing an invalid VMID, flush it
explicitly before releasing a VMID.
v2: Fix unnecessary list_for_each_entry_safe
v3: Moved allocation to kfd_process_device_init_vm
Signed-off-by: Amber Lin <Amber.Lin@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
return 0;
}
+static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
+ struct qcm_process_device *qpd)
+{
+ uint32_t len;
+
+ if (!qpd->ib_kaddr)
+ return -ENOMEM;
+
+ len = pm_create_release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
+
+ return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
+ qpd->ib_base, (uint32_t *)qpd->ib_kaddr, len);
+}
+
static void deallocate_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
struct queue *q)
{
int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
+ /* On GFX v7, CP doesn't flush TC at dequeue */
+ if (q->device->device_info->asic_family == CHIP_HAWAII)
+ if (flush_texture_cache_nocpsch(q->device, qpd))
+ pr_err("Failed to flush TC\n");
+
kfd_flush_tlb(qpd_to_pdd(qpd));
/* Release the vmid mapping */
static int start_nocpsch(struct device_queue_manager *dqm)
{
init_interrupts(dqm);
- return 0;
+ return pm_init(&dqm->packets, dqm);
}
static int stop_nocpsch(struct device_queue_manager *dqm)
{
+ pm_uninit(&dqm->packets);
return 0;
}
return retval;
}
+/* pm_create_release_mem - Create a RELEASE_MEM packet and return the size
+ * of this packet
+ * @gpu_addr - GPU address of the packet. It's a virtual address.
+ * @buffer - buffer to fill up with the packet. It's a CPU kernel pointer
+ * Return - length of the packet
+ */
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer)
+{
+ struct pm4_mec_release_mem *packet;
+
+ WARN_ON(!buffer);
+
+ packet = (struct pm4_mec_release_mem *)buffer;
+ memset(buffer, 0, sizeof(*packet));
+
+ packet->header.u32All = build_pm4_header(IT_RELEASE_MEM,
+ sizeof(*packet));
+
+ packet->bitfields2.event_type = CACHE_FLUSH_AND_INV_TS_EVENT;
+ packet->bitfields2.event_index = event_index___release_mem__end_of_pipe;
+ packet->bitfields2.tcl1_action_ena = 1;
+ packet->bitfields2.tc_action_ena = 1;
+ packet->bitfields2.cache_policy = cache_policy___release_mem__lru;
+ packet->bitfields2.atc = 0;
+
+ packet->bitfields3.data_sel = data_sel___release_mem__send_32_bit_low;
+ packet->bitfields3.int_sel =
+ int_sel___release_mem__send_interrupt_after_write_confirm;
+
+ packet->bitfields4.address_lo_32b = (gpu_addr & 0xffffffff) >> 2;
+ packet->address_hi = upper_32_bits(gpu_addr);
+
+ packet->data_lo = 0;
+
+ return sizeof(*packet) / sizeof(unsigned int);
+}
+
int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm)
{
pm->dqm = dqm;
/* IB memory */
uint64_t ib_base;
+ void *ib_kaddr;
};
/* KFD Memory Eviction */
void pm_release_ib(struct packet_manager *pm);
+uint32_t pm_create_release_mem(uint64_t gpu_addr, uint32_t *buffer);
+
uint64_t kfd_get_number_elems(struct kfd_dev *kfd);
/* Events */
return err;
}
+/* kfd_process_device_reserve_ib_mem - Reserve memory inside the
+ * process for IB usage The memory reserved is for KFD to submit
+ * IB to AMDGPU from kernel. If the memory is reserved
+ * successfully, ib_kaddr will have the CPU/kernel
+ * address. Check ib_kaddr before accessing the memory.
+ */
+static int kfd_process_device_reserve_ib_mem(struct kfd_process_device *pdd)
+{
+ struct qcm_process_device *qpd = &pdd->qpd;
+ uint32_t flags = ALLOC_MEM_FLAGS_GTT |
+ ALLOC_MEM_FLAGS_NO_SUBSTITUTE |
+ ALLOC_MEM_FLAGS_WRITABLE |
+ ALLOC_MEM_FLAGS_EXECUTABLE;
+ void *kaddr;
+ int ret;
+
+ if (qpd->ib_kaddr || !qpd->ib_base)
+ return 0;
+
+ /* ib_base is only set for dGPU */
+ ret = kfd_process_alloc_gpuvm(pdd, qpd->ib_base, PAGE_SIZE, flags,
+ &kaddr);
+ if (ret)
+ return ret;
+
+ qpd->ib_kaddr = kaddr;
+
+ return 0;
+}
+
struct kfd_process *kfd_create_process(struct file *filep)
{
struct kfd_process *process;
return ret;
}
+ ret = kfd_process_device_reserve_ib_mem(pdd);
+ if (ret)
+ goto err_reserve_ib_mem;
ret = kfd_process_device_init_cwsr_dgpu(pdd);
if (ret)
goto err_init_cwsr;
return 0;
err_init_cwsr:
+err_reserve_ib_mem:
kfd_process_device_free_bos(pdd);
if (!drm_file)
dev->kfd2kgd->destroy_process_vm(dev->kgd, pdd->vm);