#include "kfd_iommu.h"
#define MQD_SIZE_ALIGNED 768
-static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
+
+/*
+ * kfd_locked is used to lock the kfd driver during suspend or reset
+ * once locked, kfd driver will stop any further GPU execution.
+ * create process (open) will return -EAGAIN.
+ */
+static atomic_t kfd_locked = ATOMIC_INIT(0);
#ifdef KFD_SUPPORT_IOMMU_V2
static const struct kfd_device_info kaveri_device_info = {
int kgd2kfd_pre_reset(struct kfd_dev *kfd)
{
+ if (!kfd->init_complete)
+ return 0;
+ kgd2kfd_suspend(kfd);
+
+ /* hold dqm->lock to prevent further execution*/
+ dqm_lock(kfd->dqm);
+
+ kfd_signal_reset_event(kfd);
return 0;
}
+/*
+ * Fix me. KFD won't be able to resume existing process for now.
+ * We will keep all existing process in a evicted state and
+ * wait the process to be terminated.
+ */
+
int kgd2kfd_post_reset(struct kfd_dev *kfd)
{
+ int ret, count;
+
+ if (!kfd->init_complete)
+ return 0;
+
+ dqm_unlock(kfd->dqm);
+
+ ret = kfd_resume(kfd);
+ if (ret)
+ return ret;
+ count = atomic_dec_return(&kfd_locked);
+ WARN_ONCE(count != 0, "KFD reset ref. error");
return 0;
}
+bool kfd_is_locked(void)
+{
+ return (atomic_read(&kfd_locked) > 0);
+}
+
void kgd2kfd_suspend(struct kfd_dev *kfd)
{
if (!kfd->init_complete)
return;
/* For first KFD device suspend all the KFD processes */
- if (atomic_inc_return(&kfd_device_suspended) == 1)
+ if (atomic_inc_return(&kfd_locked) == 1)
kfd_suspend_all_processes();
kfd->dqm->ops.stop(kfd->dqm);
if (ret)
return ret;
- count = atomic_dec_return(&kfd_device_suspended);
+ count = atomic_dec_return(&kfd_locked);
WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
if (count == 0)
ret = kfd_resume_all_processes();
mutex_unlock(&p->event_mutex);
kfd_unref_process(p);
}
+
+void kfd_signal_reset_event(struct kfd_dev *dev)
+{
+ struct kfd_hsa_hw_exception_data hw_exception_data;
+ struct kfd_process *p;
+ struct kfd_event *ev;
+ unsigned int temp;
+ uint32_t id, idx;
+
+ /* Whole gpu reset caused by GPU hang and memory is lost */
+ memset(&hw_exception_data, 0, sizeof(hw_exception_data));
+ hw_exception_data.gpu_id = dev->id;
+ hw_exception_data.memory_lost = 1;
+
+ idx = srcu_read_lock(&kfd_processes_srcu);
+ hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+ mutex_lock(&p->event_mutex);
+ id = KFD_FIRST_NONSIGNAL_EVENT_ID;
+ idr_for_each_entry_continue(&p->event_idr, ev, id)
+ if (ev->type == KFD_EVENT_TYPE_HW_EXCEPTION) {
+ ev->hw_exception_data = hw_exception_data;
+ set_event(ev);
+ }
+ mutex_unlock(&p->event_mutex);
+ }
+ srcu_read_unlock(&kfd_processes_srcu, idx);
+}
void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid,
struct kfd_vm_fault_info *info);
+void kfd_signal_reset_event(struct kfd_dev *dev);
+
void kfd_flush_tlb(struct kfd_process_device *pdd);
int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p);
+bool kfd_is_locked(void);
+
/* Debugfs */
#if defined(CONFIG_DEBUG_FS)