From: Jay Cornwall Date: Tue, 2 May 2017 22:39:37 +0000 (-0500) Subject: drm/amdkfd: Add wavefront context save state retrieval ioctl X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=5df099e8bc83f4f3af8711ee0b9b8faef359ffff;p=openwrt%2Fstaging%2Fblogic.git drm/amdkfd: Add wavefront context save state retrieval ioctl Wavefront context save data is of interest to userspace clients for debugging static wavefront state. The MQD contains two parameters required to parse the control stack and the control stack itself is kept in the MQD from gfx9 onwards. Add an ioctl to fetch the context save area and control stack offsets and to copy the control stack to a userspace address if it is kept in the MQD. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 758398bdb39b..14d5b5fa822d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -447,6 +447,24 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, return retval; } +static int kfd_ioctl_get_queue_wave_state(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_get_queue_wave_state_args *args = data; + int r; + + mutex_lock(&p->mutex); + + r = pqm_get_wave_state(&p->pqm, args->queue_id, + (void __user *)args->ctl_stack_address, + &args->ctl_stack_used_size, + &args->save_area_used_size); + + mutex_unlock(&p->mutex); + + return r; +} + static int kfd_ioctl_set_memory_policy(struct file *filep, struct kfd_process *p, void *data) { @@ -1615,6 +1633,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, kfd_ioctl_set_cu_mask, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, + kfd_ioctl_get_queue_wave_state, 0) + }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index ec0d62a16e53..408888911361 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1528,6 +1528,41 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, return retval; } +static int get_wave_state(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct mqd_manager *mqd; + int r; + + dqm_lock(dqm); + + if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || + q->properties.is_active || !q->device->cwsr_enabled) { + r = -EINVAL; + goto dqm_unlock; + } + + mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); + if (!mqd) { + r = -ENOMEM; + goto dqm_unlock; + } + + if (!mqd->get_wave_state) { + r = -EINVAL; + goto dqm_unlock; + } + + r = mqd->get_wave_state(mqd, q->mqd, ctl_stack, ctl_stack_used_size, + save_area_used_size); + +dqm_unlock: + dqm_unlock(dqm); + return r; +} static int process_termination_cpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) @@ -1649,6 +1684,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.process_termination = process_termination_cpsch; dqm->ops.evict_process_queues = evict_process_queues_cpsch; dqm->ops.restore_process_queues = restore_process_queues_cpsch; + dqm->ops.get_wave_state = get_wave_state; break; case KFD_SCHED_POLICY_NO_HWS: /* initialize dqm for no cp scheduling */ @@ -1668,6 +1704,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.evict_process_queues = evict_process_queues_nocpsch; dqm->ops.restore_process_queues = restore_process_queues_nocpsch; + dqm->ops.get_wave_state = get_wave_state; break; default: pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 00da3169a004..e7bd19d09845 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -82,6 +82,8 @@ struct device_process_node { * * @restore_process_queues: Restore all evicted queues queues of a process * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. */ struct device_queue_manager_ops { @@ -137,6 +139,12 @@ struct device_queue_manager_ops { struct qcm_process_device *qpd); int (*restore_process_queues)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); + + int (*get_wave_state)(struct device_queue_manager *dqm, + struct queue *q, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); }; struct device_queue_manager_asic_ops { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index 4e84052d4e21..f8261313ae7b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -43,6 +43,9 @@ * * @is_occupied: Checks if the relevant HQD slot is occupied. * + * @get_wave_state: Retrieves context save state and optionally copies the + * control stack, if kept in the MQD, to the given userspace address. + * * @mqd_mutex: Mqd manager mutex. * * @dev: The kfd device structure coupled with this module. @@ -85,6 +88,11 @@ struct mqd_manager { uint64_t queue_address, uint32_t pipe_id, uint32_t queue_id); + int (*get_wave_state)(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); + #if defined(CONFIG_DEBUG_FS) int (*debugfs_show_mqd)(struct seq_file *m, void *data); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 0cedb37cf513..f381c1cb27bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -266,6 +266,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct v9_mqd *m; + + /* Control stack is located one page after MQD. */ + void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset; + + if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) + return -EFAULT; + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -435,6 +457,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index b81fda3754da..6469b3456f00 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -269,6 +269,28 @@ static bool is_occupied(struct mqd_manager *mm, void *mqd, pipe_id, queue_id); } +static int get_wave_state(struct mqd_manager *mm, void *mqd, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct vi_mqd *m; + + m = get_mqd(mqd); + + *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - + m->cp_hqd_cntl_stack_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size; + + /* Control stack is not copied to user mode for GFXv8 because + * it's part of the context save area that is already + * accessible to user mode + */ + + return 0; +} + static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -436,6 +458,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->get_wave_state = get_wave_state; #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 6f3a5bd489bd..968098bf76dc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -862,6 +862,11 @@ int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size); int amdkfd_fence_wait_timeout(unsigned int *fence_addr, unsigned int fence_value, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c8cad9c078ae..fcaaf93681ac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -408,6 +408,28 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +int pqm_get_wave_state(struct process_queue_manager *pqm, + unsigned int qid, + void __user *ctl_stack, + u32 *ctl_stack_used_size, + u32 *save_area_used_size) +{ + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_debug("amdkfd: No queue %d exists for operation\n", + qid); + return -EFAULT; + } + + return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm, + pqn->q, + ctl_stack, + ctl_stack_used_size, + save_area_used_size); +} + #if defined(CONFIG_DEBUG_FS) int pqm_debugfs_mqds(struct seq_file *m, void *data) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 01674b56e14f..f5ff8a76e208 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -82,6 +82,14 @@ struct kfd_ioctl_set_cu_mask_args { __u64 cu_mask_ptr; /* to KFD */ }; +struct kfd_ioctl_get_queue_wave_state_args { + uint64_t ctl_stack_address; /* to KFD */ + uint32_t ctl_stack_used_size; /* from KFD */ + uint32_t save_area_used_size; /* from KFD */ + uint32_t queue_id; /* to KFD */ + uint32_t pad; +}; + /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ #define KFD_IOC_CACHE_POLICY_COHERENT 0 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 @@ -475,7 +483,10 @@ struct kfd_ioctl_unmap_memory_from_gpu_args { #define AMDKFD_IOC_SET_CU_MASK \ AMDKFD_IOW(0x1A, struct kfd_ioctl_set_cu_mask_args) +#define AMDKFD_IOC_GET_QUEUE_WAVE_STATE \ + AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x1B +#define AMDKFD_COMMAND_END 0x1C #endif