drm/amdgpu: Add kfd2kgd interface to acquire an existing VM
authorFelix Kuehling <Felix.Kuehling@amd.com>
Thu, 15 Mar 2018 21:27:43 +0000 (17:27 -0400)
committerOded Gabbay <oded.gabbay@gmail.com>
Thu, 15 Mar 2018 21:27:43 +0000 (17:27 -0400)
This allows acquiring an existing VM from a render node FD to use it
for a compute process.

Such VMs get destroyed when the original file descriptor is released.
Added a callback from amdgpu_vm_fini to handle KFD VM destruction
correctly in this case.

v2:
* Removed vm->vm_context check in amdgpu_amdkfd_gpuvm_destroy_cb,
  check vm->process_info earlier instead

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index 6ba3dd689875375fd9fdb3c14ce78563e70c7eec..c2c2bea731e0473c940da630a9bd4cb41efc4dbd 100644 (file)
@@ -145,6 +145,12 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
                                          void **process_info,
                                          struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+                                          struct file *filp,
+                                          void **vm, void **process_info,
+                                          struct dma_fence **ef);
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+                                   struct amdgpu_vm *vm);
 void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm);
 uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
 int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
index 7485c376b90ee2cb850e174cb65365c14dfd9f8a..ea54e53172b92dab7b9813a9fe95529e45f65d4c 100644 (file)
@@ -205,6 +205,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_cu_info = get_cu_info,
        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+       .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
        .set_vm_context_page_table_base = set_vm_context_page_table_base,
index 7be453494423cce6a1ed5293bb73f705c4ee6857..89264c9a5e9f9cf8fbbaa7fb95fb10eb04bbfd67 100644 (file)
@@ -165,6 +165,7 @@ static const struct kfd2kgd_calls kfd2kgd = {
        .get_cu_info = get_cu_info,
        .get_vram_usage = amdgpu_amdkfd_get_vram_usage,
        .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm,
+       .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm,
        .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm,
        .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir,
        .set_vm_context_page_table_base = set_vm_context_page_table_base,
index cafe8a5716367093a23ce8682750f6fe24c70d6d..1d6e1479da385feb7cbe27d889cd5462dfd6e383 100644 (file)
@@ -806,32 +806,16 @@ static int process_update_pds(struct amdkfd_process_info *process_info,
        return 0;
 }
 
-int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
-                                         void **process_info,
-                                         struct dma_fence **ef)
+static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
+                      struct dma_fence **ef)
 {
-       int ret;
-       struct amdgpu_vm *new_vm;
        struct amdkfd_process_info *info = NULL;
-       struct amdgpu_device *adev = get_amdgpu_device(kgd);
-
-       new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
-       if (!new_vm)
-               return -ENOMEM;
-
-       /* Initialize the VM context, allocate the page directory and zero it */
-       ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
-       if (ret) {
-               pr_err("Failed init vm ret %d\n", ret);
-               goto vm_init_fail;
-       }
+       int ret;
 
        if (!*process_info) {
                info = kzalloc(sizeof(*info), GFP_KERNEL);
-               if (!info) {
-                       ret = -ENOMEM;
-                       goto alloc_process_info_fail;
-               }
+               if (!info)
+                       return -ENOMEM;
 
                mutex_init(&info->lock);
                INIT_LIST_HEAD(&info->vm_list_head);
@@ -842,6 +826,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
                                                   current->mm);
                if (!info->eviction_fence) {
                        pr_err("Failed to create eviction fence\n");
+                       ret = -ENOMEM;
                        goto create_evict_fence_fail;
                }
 
@@ -849,77 +834,137 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
                *ef = dma_fence_get(&info->eviction_fence->base);
        }
 
-       new_vm->process_info = *process_info;
+       vm->process_info = *process_info;
 
        /* Validate page directory and attach eviction fence */
-       ret = amdgpu_bo_reserve(new_vm->root.base.bo, true);
+       ret = amdgpu_bo_reserve(vm->root.base.bo, true);
        if (ret)
                goto reserve_pd_fail;
-       ret = vm_validate_pt_pd_bos(new_vm);
+       ret = vm_validate_pt_pd_bos(vm);
        if (ret) {
                pr_err("validate_pt_pd_bos() failed\n");
                goto validate_pd_fail;
        }
-       ret = ttm_bo_wait(&new_vm->root.base.bo->tbo, false, false);
+       ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
        if (ret)
                goto wait_pd_fail;
-       amdgpu_bo_fence(new_vm->root.base.bo,
-                       &new_vm->process_info->eviction_fence->base, true);
-       amdgpu_bo_unreserve(new_vm->root.base.bo);
+       amdgpu_bo_fence(vm->root.base.bo,
+                       &vm->process_info->eviction_fence->base, true);
+       amdgpu_bo_unreserve(vm->root.base.bo);
 
        /* Update process info */
-       mutex_lock(&new_vm->process_info->lock);
-       list_add_tail(&new_vm->vm_list_node,
-                       &(new_vm->process_info->vm_list_head));
-       new_vm->process_info->n_vms++;
-       mutex_unlock(&new_vm->process_info->lock);
-
-       *vm = (void *) new_vm;
-
-       pr_debug("Created process vm %p\n", *vm);
+       mutex_lock(&vm->process_info->lock);
+       list_add_tail(&vm->vm_list_node,
+                       &(vm->process_info->vm_list_head));
+       vm->process_info->n_vms++;
+       mutex_unlock(&vm->process_info->lock);
 
-       return ret;
+       return 0;
 
 wait_pd_fail:
 validate_pd_fail:
-       amdgpu_bo_unreserve(new_vm->root.base.bo);
+       amdgpu_bo_unreserve(vm->root.base.bo);
 reserve_pd_fail:
+       vm->process_info = NULL;
+       if (info) {
+               /* Two fence references: one in info and one in *ef */
+               dma_fence_put(&info->eviction_fence->base);
+               dma_fence_put(*ef);
+               *ef = NULL;
+               *process_info = NULL;
 create_evict_fence_fail:
-       mutex_destroy(&info->lock);
-       kfree(info);
-alloc_process_info_fail:
+               mutex_destroy(&info->lock);
+               kfree(info);
+       }
+       return ret;
+}
+
+int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm,
+                                         void **process_info,
+                                         struct dma_fence **ef)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct amdgpu_vm *new_vm;
+       int ret;
+
+       new_vm = kzalloc(sizeof(*new_vm), GFP_KERNEL);
+       if (!new_vm)
+               return -ENOMEM;
+
+       /* Initialize AMDGPU part of the VM */
+       ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0);
+       if (ret) {
+               pr_err("Failed init vm ret %d\n", ret);
+               goto amdgpu_vm_init_fail;
+       }
+
+       /* Initialize KFD part of the VM and process info */
+       ret = init_kfd_vm(new_vm, process_info, ef);
+       if (ret)
+               goto init_kfd_vm_fail;
+
+       *vm = (void *) new_vm;
+
+       return 0;
+
+init_kfd_vm_fail:
        amdgpu_vm_fini(adev, new_vm);
-vm_init_fail:
+amdgpu_vm_init_fail:
        kfree(new_vm);
        return ret;
-
 }
 
-void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
+                                          struct file *filp,
+                                          void **vm, void **process_info,
+                                          struct dma_fence **ef)
 {
        struct amdgpu_device *adev = get_amdgpu_device(kgd);
-       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
-       struct amdgpu_bo *pd;
-       struct amdkfd_process_info *process_info;
+       struct drm_file *drm_priv = filp->private_data;
+       struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
+       struct amdgpu_vm *avm = &drv_priv->vm;
+       int ret;
 
-       if (WARN_ON(!kgd || !vm))
+       /* Already a compute VM? */
+       if (avm->process_info)
+               return -EINVAL;
+
+       /* Convert VM into a compute VM */
+       ret = amdgpu_vm_make_compute(adev, avm);
+       if (ret)
+               return ret;
+
+       /* Initialize KFD part of the VM and process info */
+       ret = init_kfd_vm(avm, process_info, ef);
+       if (ret)
+               return ret;
+
+       *vm = (void *)avm;
+
+       return 0;
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
+                                   struct amdgpu_vm *vm)
+{
+       struct amdkfd_process_info *process_info = vm->process_info;
+       struct amdgpu_bo *pd = vm->root.base.bo;
+
+       if (!process_info)
                return;
 
-       pr_debug("Destroying process vm %p\n", vm);
        /* Release eviction fence from PD */
-       pd = avm->root.base.bo;
        amdgpu_bo_reserve(pd, false);
        amdgpu_bo_fence(pd, NULL, false);
        amdgpu_bo_unreserve(pd);
 
-       process_info = avm->process_info;
-
+       /* Update process info */
        mutex_lock(&process_info->lock);
        process_info->n_vms--;
-       list_del(&avm->vm_list_node);
+       list_del(&vm->vm_list_node);
        mutex_unlock(&process_info->lock);
 
-       /* Release per-process resources */
+       /* Release per-process resources when last compute VM is destroyed */
        if (!process_info->n_vms) {
                WARN_ON(!list_empty(&process_info->kfd_bo_list));
 
@@ -927,6 +972,17 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
                mutex_destroy(&process_info->lock);
                kfree(process_info);
        }
+}
+
+void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm)
+{
+       struct amdgpu_device *adev = get_amdgpu_device(kgd);
+       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+       if (WARN_ON(!kgd || !vm))
+               return;
+
+       pr_debug("Destroying process vm %p\n", vm);
 
        /* Release the VM context */
        amdgpu_vm_fini(adev, avm);
index ea39ccf288eda8db1884e2fed4b8b4897696afba..cbf1421c19630a47eb0885634d8f317ef930a997 100644 (file)
@@ -32,6 +32,7 @@
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 /*
  * GPUVM
@@ -2575,6 +2576,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        u64 fault;
        int i, r;
 
+       amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm);
+
        /* Clear pending page faults from IH when the VM is destroyed */
        while (kfifo_get(&vm->faults, &fault))
                amdgpu_ih_clear_fault(adev, fault);
index 1e5c22ceb256880e8ffe38bf1ad7cb16ce7431ef..b1f35c8be2cf50cd1b39b8cc9a87854f366eedd8 100644 (file)
@@ -336,6 +336,8 @@ struct kfd2kgd_calls {
 
        int (*create_process_vm)(struct kgd_dev *kgd, void **vm,
                        void **process_info, struct dma_fence **ef);
+       int (*acquire_process_vm)(struct kgd_dev *kgd, struct file *filp,
+                       void **vm, void **process_info, struct dma_fence **ef);
        void (*destroy_process_vm)(struct kgd_dev *kgd, void *vm);
        uint32_t (*get_process_page_dir)(void *vm);
        void (*set_vm_context_page_table_base)(struct kgd_dev *kgd,