drm/amdkfd: fix zero reading of VMID and PASID for Hawaii
authorLan Xiao <Lan.Xiao@amd.com>
Thu, 12 Jul 2018 02:32:51 +0000 (22:32 -0400)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 12 Jul 2018 02:32:51 +0000 (22:32 -0400)
Upon VM Fault, the VMID and PASID written by HW are zeros in
Hawaii. Instead of reading from ih_ring_entry, read directly
from the registers. This workaround fix the soft hang issues
caused by mishandled VM Fault in Hawaii.

Signed-off-by: Lan Xiao <Lan.Xiao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index befc7c48b1cfad251746d73217d3d71cf7b2ee4e..b4a05c510c75fe62afc5accd1a1681a9ff185121 100644 (file)
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
                uint32_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
 
 /* Because of REG_GET_FIELD() being used, we put this function in the
  * asic specific file.
@@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .invalidate_tlbs = invalidate_tlbs,
        .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
        .submit_ib = amdgpu_amdkfd_submit_ib,
-       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info
+       .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+       .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
 };
 
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
        RREG32(mmVM_INVALIDATE_RESPONSE);
        return 0;
 }
+
+ /**
+  * read_vmid_from_vmfault_reg - read vmid from register
+  *
+  * adev: amdgpu_device pointer
+  * @vmid: vmid pointer
+  * read vmid from register (CIK).
+  */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+       uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+       return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
index cc33870e7edb95dce9fcbd8d1707d68cc8d1e8d3..5d2475d5392ce25cbb719c0fa519e0c0d94777fc 100644 (file)
 #include "cik_int.h"
 
 static bool cik_event_interrupt_isr(struct kfd_dev *dev,
-                                       const uint32_t *ih_ring_entry)
+                                       const uint32_t *ih_ring_entry,
+                                       uint32_t *patched_ihre,
+                                       bool *patched_flag)
 {
        const struct cik_ih_ring_entry *ihre =
                        (const struct cik_ih_ring_entry *)ih_ring_entry;
+       const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
        unsigned int vmid, pasid;
 
+       /* This workaround is due to HW/FW limitation on Hawaii that
+        * VMID and PASID are not written into ih_ring_entry
+        */
+       if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
+               ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
+               dev->device_info->asic_family == CHIP_HAWAII) {
+               struct cik_ih_ring_entry *tmp_ihre =
+                       (struct cik_ih_ring_entry *)patched_ihre;
+
+               *patched_flag = true;
+               *tmp_ihre = *ihre;
+
+               vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
+               pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
+
+               tmp_ihre->ring_id &= 0x000000ff;
+               tmp_ihre->ring_id |= vmid << 8;
+               tmp_ihre->ring_id |= pasid << 16;
+
+               return (pasid != 0) &&
+                       vmid >= dev->vm_info.first_vmid_kfd &&
+                       vmid <= dev->vm_info.last_vmid_kfd;
+       }
+
        /* Only handle interrupts from KFD VMIDs */
        vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
        if (vmid < dev->vm_info.first_vmid_kfd ||
index 48c505e83217efbd8a9e7c3874f021dac4bbecbb..6007511757606d1b25c357d06481f344a2bebfaf 100644 (file)
@@ -577,14 +577,24 @@ dqm_start_error:
 /* This is called directly from KGD at ISR. */
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
 {
+       uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
+       bool is_patched = false;
+
        if (!kfd->init_complete)
                return;
 
+       if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
+               dev_err_once(kfd_device, "Ring entry too small\n");
+               return;
+       }
+
        spin_lock(&kfd->interrupt_lock);
 
        if (kfd->interrupts_active
-           && interrupt_is_wanted(kfd, ih_ring_entry)
-           && enqueue_ih_ring_entry(kfd, ih_ring_entry))
+           && interrupt_is_wanted(kfd, ih_ring_entry,
+                                  patched_ihre, &is_patched)
+           && enqueue_ih_ring_entry(kfd,
+                                    is_patched ? patched_ihre : ih_ring_entry))
                queue_work(kfd->ih_wq, &kfd->interrupt_work);
 
        spin_unlock(&kfd->interrupt_lock);
index d6b64e69276083bd98c86a85195562f557d4b54c..f836897bbf5833799e7cb94e45be4c10975b9c8c 100644 (file)
@@ -26,7 +26,9 @@
 
 
 static bool event_interrupt_isr_v9(struct kfd_dev *dev,
-                                       const uint32_t *ih_ring_entry)
+                                       const uint32_t *ih_ring_entry,
+                                       uint32_t *patched_ihre,
+                                       bool *patched_flag)
 {
        uint16_t source_id, client_id, pasid, vmid;
        const uint32_t *data = ih_ring_entry;
index db6d9336b80d2c7b6b12f7b131c07014cc40bd22..c56ac47cd3189779333acc9e8da89a8f9ad7c5be 100644 (file)
@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
                                                                ih_ring_entry);
 }
 
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry)
+bool interrupt_is_wanted(struct kfd_dev *dev,
+                       const uint32_t *ih_ring_entry,
+                       uint32_t *patched_ihre, bool *flag)
 {
        /* integer and bitwise OR so there is no boolean short-circuiting */
        unsigned int wanted = 0;
 
        wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
-                                                               ih_ring_entry);
+                                        ih_ring_entry, patched_ihre, flag);
 
        return wanted != 0;
 }
index 91a3368421b1f815035655d58062f543befa4fbf..cd5121d925e048f534d0015665b1cbe64c4dbfca 100644 (file)
@@ -180,9 +180,10 @@ enum cache_policy {
 
 struct kfd_event_interrupt_class {
        bool (*interrupt_isr)(struct kfd_dev *dev,
-                               const uint32_t *ih_ring_entry);
+                       const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
+                       bool *patched_flag);
        void (*interrupt_wq)(struct kfd_dev *dev,
-                               const uint32_t *ih_ring_entry);
+                       const uint32_t *ih_ring_entry);
 };
 
 struct kfd_device_info {
@@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev);
 void kfd_interrupt_exit(struct kfd_dev *dev);
 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
 bool enqueue_ih_ring_entry(struct kfd_dev *kfd,        const void *ih_ring_entry);
-bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry);
+bool interrupt_is_wanted(struct kfd_dev *dev,
+                               const uint32_t *ih_ring_entry,
+                               uint32_t *patched_ihre, bool *flag);
 
 /* Power Management */
 void kgd2kfd_suspend(struct kfd_dev *kfd);
index 28b11d1052884f5f40ebf3cb4a55e4152d883508..76a30cbeee19a93b3bf1c0b9e22c94bef545cd2e 100644 (file)
@@ -276,6 +276,10 @@ struct tile_config {
  * faults. On GFXv9 VM fault information is fully contained in the IH
  * packet and this function is not needed.
  *
+ * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
+ * IH ring entry. This function allows the KFD ISR to get the VMID
+ * from the fault status register as early as possible.
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -394,6 +398,7 @@ struct kfd2kgd_calls {
 
        int (*get_vm_fault_info)(struct kgd_dev *kgd,
                        struct kfd_vm_fault_info *info);
+       uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
 };
 
 /**