drm/amdkfd: Add DMABuf import functionality
authorFelix Kuehling <Felix.Kuehling@amd.com>
Wed, 21 Nov 2018 02:00:29 +0000 (21:00 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 7 Dec 2018 23:13:54 +0000 (18:13 -0500)
This is used for interoperability between ROCm compute and graphics
APIs. It allows importing graphics driver BOs into the ROCm SVM
address space for zero-copy GPU access.

The API is split into two steps (query and import) to allow user mode
to manage the virtual address space allocation for the imported buffer.

Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_topology.c
include/uapi/linux/kfd_ioctl.h

index 68b29a210eaae6d65f60036e477ecb038df3464e..68e4cf1b655cd7a2ed9cde3545ff8feb9d0ad2ee 100644 (file)
@@ -26,6 +26,7 @@
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include <linux/module.h>
+#include <linux/dma-buf.h>
 
 const struct kgd2kfd_calls *kgd2kfd;
 
@@ -433,6 +434,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
        cu_info->lds_size = acu_info.lds_size;
 }
 
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+                                 struct kgd_dev **dma_buf_kgd,
+                                 uint64_t *bo_size, void *metadata_buffer,
+                                 size_t buffer_size, uint32_t *metadata_size,
+                                 uint32_t *flags)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+       struct dma_buf *dma_buf;
+       struct drm_gem_object *obj;
+       struct amdgpu_bo *bo;
+       uint64_t metadata_flags;
+       int r = -EINVAL;
+
+       dma_buf = dma_buf_get(dma_buf_fd);
+       if (IS_ERR(dma_buf))
+               return PTR_ERR(dma_buf);
+
+       if (dma_buf->ops != &amdgpu_dmabuf_ops)
+               /* Can't handle non-graphics buffers */
+               goto out_put;
+
+       obj = dma_buf->priv;
+       if (obj->dev->driver != adev->ddev->driver)
+               /* Can't handle buffers from different drivers */
+               goto out_put;
+
+       adev = obj->dev->dev_private;
+       bo = gem_to_amdgpu_bo(obj);
+       if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+                                   AMDGPU_GEM_DOMAIN_GTT)))
+               /* Only VRAM and GTT BOs are supported */
+               goto out_put;
+
+       r = 0;
+       if (dma_buf_kgd)
+               *dma_buf_kgd = (struct kgd_dev *)adev;
+       if (bo_size)
+               *bo_size = amdgpu_bo_size(bo);
+       if (metadata_size)
+               *metadata_size = bo->metadata_size;
+       if (metadata_buffer)
+               r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
+                                          metadata_size, &metadata_flags);
+       if (flags) {
+               *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+                       ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
+
+               if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+                       *flags |= ALLOC_MEM_FLAGS_PUBLIC;
+       }
+
+out_put:
+       dma_buf_put(dma_buf);
+       return r;
+}
+
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
index 131c6e5e6f107f5cee6df0236b541cb142c5875a..70429f7aa9a84c0189ce3e3748a55f4b3549b08b 100644 (file)
@@ -149,6 +149,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
 
 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
 void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+                                 struct kgd_dev **dmabuf_kgd,
+                                 uint64_t *bo_size, void *metadata_buffer,
+                                 size_t buffer_size, uint32_t *metadata_size,
+                                 uint32_t *flags);
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 
@@ -200,6 +205,12 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
                                              struct kfd_vm_fault_info *info);
 
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+                                     struct dma_buf *dmabuf,
+                                     uint64_t va, void *vm,
+                                     struct kgd_mem **mem, uint64_t *size,
+                                     uint64_t *mmap_offset);
+
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
 
index 5fb60e1d713aabbacfa3afb95ebb0ab61a2228ef..a0a500d458864aac40e406f04e01a6c89cdf181e 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
+#include <linux/dma-buf.h>
 #include <drm/drmP.h>
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -1664,6 +1665,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
        return 0;
 }
 
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+                                     struct dma_buf *dma_buf,
+                                     uint64_t va, void *vm,
+                                     struct kgd_mem **mem, uint64_t *size,
+                                     uint64_t *mmap_offset)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+       struct drm_gem_object *obj;
+       struct amdgpu_bo *bo;
+       struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+       if (dma_buf->ops != &amdgpu_dmabuf_ops)
+               /* Can't handle non-graphics buffers */
+               return -EINVAL;
+
+       obj = dma_buf->priv;
+       if (obj->dev->dev_private != adev)
+               /* Can't handle buffers from other devices */
+               return -EINVAL;
+
+       bo = gem_to_amdgpu_bo(obj);
+       if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+                                   AMDGPU_GEM_DOMAIN_GTT)))
+               /* Only VRAM and GTT BOs are supported */
+               return -EINVAL;
+
+       *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+       if (!*mem)
+               return -ENOMEM;
+
+       if (size)
+               *size = amdgpu_bo_size(bo);
+
+       if (mmap_offset)
+               *mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+       INIT_LIST_HEAD(&(*mem)->bo_va_list);
+       mutex_init(&(*mem)->lock);
+       (*mem)->mapping_flags =
+               AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+               AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+
+       (*mem)->bo = amdgpu_bo_ref(bo);
+       (*mem)->va = va;
+       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+               AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+       (*mem)->mapped_to_gpu_memory = 0;
+       (*mem)->process_info = avm->process_info;
+       add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+       amdgpu_sync_create(&(*mem)->sync);
+
+       return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
index d63daba9b17c554912e2a82fd4c1620b5b654455..f1ddfc50bcc763636d6080a527870a10001dd27b 100644 (file)
@@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
 /*
  * GEM objects.
  */
index 3e44d889f7af2fe6d7a0eaaac054839fd3b3cffd..71913a18d142cb028fe4077b0d4dca0d36be7f02 100644 (file)
@@ -39,8 +39,6 @@
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 
-static const struct dma_buf_ops amdgpu_dmabuf_ops;
-
 /**
  * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
  * implementation
@@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
        return ret;
 }
 
-static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
        .attach = amdgpu_gem_map_attach,
        .detach = amdgpu_gem_map_detach,
        .map_dma_buf = drm_gem_map_dma_buf,
index 5f4062b41adddab4cc30797766d18756ff1def79..ae3ae0fb260255d3fe948744713cd8d313710d2f 100644 (file)
@@ -33,6 +33,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/dma-buf.h>
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -1550,6 +1551,115 @@ copy_from_user_failed:
        return err;
 }
 
+static int kfd_ioctl_get_dmabuf_info(struct file *filep,
+               struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_get_dmabuf_info_args *args = data;
+       struct kfd_dev *dev = NULL;
+       struct kgd_dev *dma_buf_kgd;
+       void *metadata_buffer = NULL;
+       uint32_t flags;
+       unsigned int i;
+       int r;
+
+       /* Find a KFD GPU device that supports the get_dmabuf_info query */
+       for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
+               if (dev)
+                       break;
+       if (!dev)
+               return -EINVAL;
+
+       if (args->metadata_ptr) {
+               metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
+               if (!metadata_buffer)
+                       return -ENOMEM;
+       }
+
+       /* Get dmabuf info from KGD */
+       r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
+                                         &dma_buf_kgd, &args->size,
+                                         metadata_buffer, args->metadata_size,
+                                         &args->metadata_size, &flags);
+       if (r)
+               goto exit;
+
+       /* Reverse-lookup gpu_id from kgd pointer */
+       dev = kfd_device_by_kgd(dma_buf_kgd);
+       if (!dev) {
+               r = -EINVAL;
+               goto exit;
+       }
+       args->gpu_id = dev->id;
+       args->flags = flags;
+
+       /* Copy metadata buffer to user mode */
+       if (metadata_buffer) {
+               r = copy_to_user((void __user *)args->metadata_ptr,
+                                metadata_buffer, args->metadata_size);
+               if (r != 0)
+                       r = -EFAULT;
+       }
+
+exit:
+       kfree(metadata_buffer);
+
+       return r;
+}
+
+static int kfd_ioctl_import_dmabuf(struct file *filep,
+                                  struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_import_dmabuf_args *args = data;
+       struct kfd_process_device *pdd;
+       struct dma_buf *dmabuf;
+       struct kfd_dev *dev;
+       int idr_handle;
+       uint64_t size;
+       void *mem;
+       int r;
+
+       dev = kfd_device_by_id(args->gpu_id);
+       if (!dev)
+               return -EINVAL;
+
+       dmabuf = dma_buf_get(args->dmabuf_fd);
+       if (!dmabuf)
+               return -EINVAL;
+
+       mutex_lock(&p->mutex);
+
+       pdd = kfd_bind_process_to_device(dev, p);
+       if (IS_ERR(pdd)) {
+               r = PTR_ERR(pdd);
+               goto err_unlock;
+       }
+
+       r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
+                                             args->va_addr, pdd->vm,
+                                             (struct kgd_mem **)&mem, &size,
+                                             NULL);
+       if (r)
+               goto err_unlock;
+
+       idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+       if (idr_handle < 0) {
+               r = -EFAULT;
+               goto err_free;
+       }
+
+       mutex_unlock(&p->mutex);
+
+       args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+
+       return 0;
+
+err_free:
+       amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+       mutex_unlock(&p->mutex);
+       return r;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
                            .cmd_drv = 0, .name = #ioctl}
@@ -1635,7 +1745,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
                        kfd_ioctl_set_cu_mask, 0),
 
        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
-                       kfd_ioctl_get_queue_wave_state, 0)
+                       kfd_ioctl_get_queue_wave_state, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
+                               kfd_ioctl_get_dmabuf_info, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
+                               kfd_ioctl_import_dmabuf, 0),
 
 };
 
index dec8e64f36bdc80ef2ac851ba35d700bf8d32140..0689d4ccbbc0f6fd59164f5ebab76246fca396e6 100644 (file)
@@ -793,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
 int kfd_numa_node_to_apic_id(int numa_node_id);
 
index c5ed21ef246219b805db56641d65b5a7ec6d5734..5f5b2acedbac3bf0e15d8727d5327af37e3f7a79 100644 (file)
@@ -111,6 +111,24 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
        return device;
 }
 
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+{
+       struct kfd_topology_device *top_dev;
+       struct kfd_dev *device = NULL;
+
+       down_read(&topology_lock);
+
+       list_for_each_entry(top_dev, &topology_device_list, list)
+               if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
+                       device = top_dev->gpu;
+                       break;
+               }
+
+       up_read(&topology_lock);
+
+       return device;
+}
+
 /* Called with write topology_lock acquired */
 static void kfd_release_topology_device(struct kfd_topology_device *dev)
 {
index b01eb502d49c55d04f33cace28a410171239eaf5..e622fd1fbd46399c967e448c762b69b8dd3220b2 100644 (file)
@@ -398,6 +398,24 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
        __u32 n_success;                /* to/from KFD */
 };
 
+struct kfd_ioctl_get_dmabuf_info_args {
+       __u64 size;             /* from KFD */
+       __u64 metadata_ptr;     /* to KFD */
+       __u32 metadata_size;    /* to KFD (space allocated by user)
+                                * from KFD (actual metadata size)
+                                */
+       __u32 gpu_id;   /* from KFD */
+       __u32 flags;            /* from KFD (KFD_IOC_ALLOC_MEM_FLAGS) */
+       __u32 dmabuf_fd;        /* to KFD */
+};
+
+struct kfd_ioctl_import_dmabuf_args {
+       __u64 va_addr;  /* to KFD */
+       __u64 handle;   /* from KFD */
+       __u32 gpu_id;   /* to KFD */
+       __u32 dmabuf_fd;        /* to KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)                  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)           _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -486,7 +504,13 @@ struct kfd_ioctl_unmap_memory_from_gpu_args {
 #define AMDKFD_IOC_GET_QUEUE_WAVE_STATE                \
                AMDKFD_IOWR(0x1B, struct kfd_ioctl_get_queue_wave_state_args)
 
+#define AMDKFD_IOC_GET_DMABUF_INFO             \
+               AMDKFD_IOWR(0x1C, struct kfd_ioctl_get_dmabuf_info_args)
+
+#define AMDKFD_IOC_IMPORT_DMABUF               \
+               AMDKFD_IOWR(0x1D, struct kfd_ioctl_import_dmabuf_args)
+
 #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x1C
+#define AMDKFD_COMMAND_END             0x1E
 
 #endif