drm/amdkfd: Add wavefront context save state retrieval ioctl
authorJay Cornwall <Jay.Cornwall@amd.com>
Tue, 2 May 2017 22:39:37 +0000 (17:39 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 27 Sep 2018 02:09:15 +0000 (21:09 -0500)
Wavefront context save data is of interest to userspace clients for
debugging static wavefront state. The MQD contains two parameters
required to parse the control stack and the control stack itself
is kept in the MQD from gfx9 onwards.

Add an ioctl to fetch the context save area and control stack offsets
and to copy the control stack to a userspace address if it is kept in
the MQD.

Signed-off-by: Jay Cornwall <Jay.Cornwall@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
include/uapi/linux/kfd_ioctl.h

index 758398bdb39b68ad7d371192d69b9186b4b21ddd..14d5b5fa822d4e8722cc2d6a3e6cc1cba0b1b7bd 100644 (file)
@@ -447,6 +447,24 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
        return retval;
 }
 
+static int kfd_ioctl_get_queue_wave_state(struct file *filep,
+                                         struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_get_queue_wave_state_args *args = data;
+       int r;
+
+       mutex_lock(&p->mutex);
+
+       r = pqm_get_wave_state(&p->pqm, args->queue_id,
+                              (void __user *)args->ctl_stack_address,
+                              &args->ctl_stack_used_size,
+                              &args->save_area_used_size);
+
+       mutex_unlock(&p->mutex);
+
+       return r;
+}
+
 static int kfd_ioctl_set_memory_policy(struct file *filep,
                                        struct kfd_process *p, void *data)
 {
@@ -1615,6 +1633,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
                        kfd_ioctl_set_cu_mask, 0),
 
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
+                       kfd_ioctl_get_queue_wave_state, 0)
+
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
index ec0d62a16e538c305f631b831432df0566b051c8..408888911361f140d733731025fd3cb71ae30912 100644 (file)
@@ -1528,6 +1528,41 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm,
        return retval;
 }
 
+static int get_wave_state(struct device_queue_manager *dqm,
+                         struct queue *q,
+                         void __user *ctl_stack,
+                         u32 *ctl_stack_used_size,
+                         u32 *save_area_used_size)
+{
+       struct mqd_manager *mqd;
+       int r;
+
+       dqm_lock(dqm);
+
+       if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
+           q->properties.is_active || !q->device->cwsr_enabled) {
+               r = -EINVAL;
+               goto dqm_unlock;
+       }
+
+       mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
+       if (!mqd) {
+               r = -ENOMEM;
+               goto dqm_unlock;
+       }
+
+       if (!mqd->get_wave_state) {
+               r = -EINVAL;
+               goto dqm_unlock;
+       }
+
+       r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size,
+                               save_area_used_size);
+
+dqm_unlock:
+       dqm_unlock(dqm);
+       return r;
+}
 
 static int process_termination_cpsch(struct device_queue_manager *dqm,
                struct qcm_process_device *qpd)
@@ -1649,6 +1684,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                dqm->ops.process_termination = process_termination_cpsch;
                dqm->ops.evict_process_queues = evict_process_queues_cpsch;
                dqm->ops.restore_process_queues = restore_process_queues_cpsch;
+               dqm->ops.get_wave_state = get_wave_state;
                break;
        case KFD_SCHED_POLICY_NO_HWS:
                /* initialize dqm for no cp scheduling */
@@ -1668,6 +1704,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
                dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
                dqm->ops.restore_process_queues =
                        restore_process_queues_nocpsch;
+               dqm->ops.get_wave_state = get_wave_state;
                break;
        default:
                pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
index 00da3169a0044ace318a8f0b19fde3e8dfab227b..e7bd19d09845a2e2ea2f66a459ffcb01d8dcfb65 100644 (file)
@@ -82,6 +82,8 @@ struct device_process_node {
  *
  * @restore_process_queues: Restore all evicted queues queues of a process
  *
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
  */
 
 struct device_queue_manager_ops {
@@ -137,6 +139,12 @@ struct device_queue_manager_ops {
                                    struct qcm_process_device *qpd);
        int (*restore_process_queues)(struct device_queue_manager *dqm,
                                      struct qcm_process_device *qpd);
+
+       int     (*get_wave_state)(struct device_queue_manager *dqm,
+                                 struct queue *q,
+                                 void __user *ctl_stack,
+                                 u32 *ctl_stack_used_size,
+                                 u32 *save_area_used_size);
 };
 
 struct device_queue_manager_asic_ops {
index 4e84052d4e210e471b326e4715903d66830b7365..f8261313ae7b128814399cc8dae1df2c922f9c69 100644 (file)
@@ -43,6 +43,9 @@
  *
  * @is_occupied: Checks if the relevant HQD slot is occupied.
  *
+ * @get_wave_state: Retrieves context save state and optionally copies the
+ * control stack, if kept in the MQD, to the given userspace address.
+ *
  * @mqd_mutex: Mqd manager mutex.
  *
  * @dev: The kfd device structure coupled with this module.
@@ -85,6 +88,11 @@ struct mqd_manager {
                                uint64_t queue_address, uint32_t pipe_id,
                                uint32_t queue_id);
 
+       int     (*get_wave_state)(struct mqd_manager *mm, void *mqd,
+                                 void __user *ctl_stack,
+                                 u32 *ctl_stack_used_size,
+                                 u32 *save_area_used_size);
+
 #if defined(CONFIG_DEBUG_FS)
        int     (*debugfs_show_mqd)(struct seq_file *m, void *data);
 #endif
index 0cedb37cf513563dc6fea50e6b40ef0889c3bb61..f381c1cb27bdc867d67308e4f0cc2176e777dc5c 100644 (file)
@@ -266,6 +266,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
                pipe_id, queue_id);
 }
 
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+                         void __user *ctl_stack,
+                         u32 *ctl_stack_used_size,
+                         u32 *save_area_used_size)
+{
+       struct v9_mqd *m;
+
+       /* Control stack is located one page after MQD. */
+       void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
+
+       m = get_mqd(mqd);
+
+       *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+               m->cp_hqd_cntl_stack_offset;
+       *save_area_used_size = m->cp_hqd_wg_state_offset;
+
+       if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
+               return -EFAULT;
+
+       return 0;
+}
+
 static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
                        struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
                        struct queue_properties *q)
@@ -435,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type,
                mqd->update_mqd = update_mqd;
                mqd->destroy_mqd = destroy_mqd;
                mqd->is_occupied = is_occupied;
+               mqd->get_wave_state = get_wave_state;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
index b81fda3754dac850c112b56deef4c84c0c77a50f..6469b3456f00e8174751bd023424bc98cc0260e4 100644 (file)
@@ -269,6 +269,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd,
                pipe_id, queue_id);
 }
 
+static int get_wave_state(struct mqd_manager *mm, void *mqd,
+                         void __user *ctl_stack,
+                         u32 *ctl_stack_used_size,
+                         u32 *save_area_used_size)
+{
+       struct vi_mqd *m;
+
+       m = get_mqd(mqd);
+
+       *ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
+               m->cp_hqd_cntl_stack_offset;
+       *save_area_used_size = m->cp_hqd_wg_state_offset -
+               m->cp_hqd_cntl_stack_size;
+
+       /* Control stack is not copied to user mode for GFXv8 because
+        * it's part of the context save area that is already
+        * accessible to user mode
+        */
+
+       return 0;
+}
+
 static int init_mqd_hiq(struct mqd_manager *mm, void **mqd,
                        struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr,
                        struct queue_properties *q)
@@ -436,6 +458,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type,
                mqd->update_mqd = update_mqd;
                mqd->destroy_mqd = destroy_mqd;
                mqd->is_occupied = is_occupied;
+               mqd->get_wave_state = get_wave_state;
 #if defined(CONFIG_DEBUG_FS)
                mqd->debugfs_show_mqd = debugfs_show_mqd;
 #endif
index 6f3a5bd489bd6f7c0a6407f7e362e2b188bc87d3..968098bf76dc56e368ec3bbad2b25a23711ee827 100644 (file)
@@ -862,6 +862,11 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid,
                        struct queue_properties *p);
 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm,
                                                unsigned int qid);
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+                      unsigned int qid,
+                      void __user *ctl_stack,
+                      u32 *ctl_stack_used_size,
+                      u32 *save_area_used_size);
 
 int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
                                unsigned int fence_value,
index c8cad9c078ae367f9096a9061b05992dc46c3615..fcaaf93681ac7f50c50c8c901ed3205e7c6e8b06 100644 (file)
@@ -408,6 +408,28 @@ struct kernel_queue *pqm_get_kernel_queue(
        return NULL;
 }
 
+int pqm_get_wave_state(struct process_queue_manager *pqm,
+                      unsigned int qid,
+                      void __user *ctl_stack,
+                      u32 *ctl_stack_used_size,
+                      u32 *save_area_used_size)
+{
+       struct process_queue_node *pqn;
+
+       pqn = get_queue_by_qid(pqm, qid);
+       if (!pqn) {
+               pr_debug("amdkfd: No queue %d exists for operation\n",
+                        qid);
+               return -EFAULT;
+       }
+
+       return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
+                                                      pqn->q,
+                                                      ctl_stack,
+                                                      ctl_stack_used_size,
+                                                      save_area_used_size);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 int pqm_debugfs_mqds(struct seq_file *m, void *data)
index 01674b56e14f2f47c053b8104490a89e6a5052c9..f5ff8a76e208fc45584bb76503860c6fdf6650fd 100644 (file)
@@ -82,6 +82,14 @@ struct kfd_ioctl_set_cu_mask_args {
        __u64 cu_mask_ptr;              /* to KFD */
 };
 
+struct kfd_ioctl_get_queue_wave_state_args {
+       uint64_t ctl_stack_address;     /* to KFD */
+       uint32_t ctl_stack_used_size;   /* from KFD */
+       uint32_t save_area_used_size;   /* from KFD */
+       uint32_t queue_id;              /* to KFD */
+       uint32_t pad;
+};
+
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
 #define KFD_IOC_CACHE_POLICY_COHERENT 0
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
@@ -475,7 +483,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 #define AMDKFD_IOC_SET_CU_MASK         \
                AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args)
 
+#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE                \
+               AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
+
 #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x1B
+#define AMDKFD_COMMAND_END             0x1C
 
 #endif