drm/amdkfd: Remove limit on number of GPUs

author Felix Kuehling <Felix.Kuehling@amd.com>

Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)

committer Oded Gabbay <oded.gabbay@gmail.com>

Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)
author Felix Kuehling <Felix.Kuehling@amd.com>
Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)
committer Oded Gabbay <oded.gabbay@gmail.com>
Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 6fe24964540b4480be764392f3409e8dcec9e9fe..7d4009418ec3773ef7a965cd056144b229d1e070 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -825,6 +825,97 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
         return 0;
  }
  
+static int kfd_ioctl_get_process_apertures_new(struct file *filp,
+                               struct kfd_process *p, void *data)
+{
+       struct kfd_ioctl_get_process_apertures_new_args *args = data;
+       struct kfd_process_device_apertures *pa;
+       struct kfd_process_device *pdd;
+       uint32_t nodes = 0;
+       int ret;
+
+       dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
+
+       if (args->num_of_nodes == 0) {
+               /* Return number of nodes, so that user space can alloacate
+                * sufficient memory
+                */
+               mutex_lock(&p->mutex);
+
+               if (!kfd_has_process_device_data(p))
+                       goto out_unlock;
+
+               /* Run over all pdd of the process */
+               pdd = kfd_get_first_process_device_data(p);
+               do {
+                       args->num_of_nodes++;
+                       pdd = kfd_get_next_process_device_data(p, pdd);
+               } while (pdd);
+
+               goto out_unlock;
+       }
+
+       /* Fill in process-aperture information for all available
+        * nodes, but not more than args->num_of_nodes as that is
+        * the amount of memory allocated by user
+        */
+       pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
+                               args->num_of_nodes), GFP_KERNEL);
+       if (!pa)
+               return -ENOMEM;
+
+       mutex_lock(&p->mutex);
+
+       if (!kfd_has_process_device_data(p)) {
+               args->num_of_nodes = 0;
+               kfree(pa);
+               goto out_unlock;
+       }
+
+       /* Run over all pdd of the process */
+       pdd = kfd_get_first_process_device_data(p);
+       do {
+               pa[nodes].gpu_id = pdd->dev->id;
+               pa[nodes].lds_base = pdd->lds_base;
+               pa[nodes].lds_limit = pdd->lds_limit;
+               pa[nodes].gpuvm_base = pdd->gpuvm_base;
+               pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
+               pa[nodes].scratch_base = pdd->scratch_base;
+               pa[nodes].scratch_limit = pdd->scratch_limit;
+
+               dev_dbg(kfd_device,
+                       "gpu id %u\n", pdd->dev->id);
+               dev_dbg(kfd_device,
+                       "lds_base %llX\n", pdd->lds_base);
+               dev_dbg(kfd_device,
+                       "lds_limit %llX\n", pdd->lds_limit);
+               dev_dbg(kfd_device,
+                       "gpuvm_base %llX\n", pdd->gpuvm_base);
+               dev_dbg(kfd_device,
+                       "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+               dev_dbg(kfd_device,
+                       "scratch_base %llX\n", pdd->scratch_base);
+               dev_dbg(kfd_device,
+                       "scratch_limit %llX\n", pdd->scratch_limit);
+               nodes++;
+
+               pdd = kfd_get_next_process_device_data(p, pdd);
+       } while (pdd && (nodes < args->num_of_nodes));
+       mutex_unlock(&p->mutex);
+
+       args->num_of_nodes = nodes;
+       ret = copy_to_user(
+                       (void __user *)args->kfd_process_device_apertures_ptr,
+                       pa,
+                       (nodes * sizeof(struct kfd_process_device_apertures)));
+       kfree(pa);
+       return ret ? -EFAULT : 0;
+
+out_unlock:
+       mutex_unlock(&p->mutex);
+       return 0;
+}
+
  static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
                                         void *data)
  {
@@ -1017,6 +1108,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
  
         AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
                         kfd_ioctl_set_trap_handler, 0),
+
+       AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
+                       kfd_ioctl_get_process_apertures_new, 0),
  };
  
  #define AMDKFD_CORE_IOCTL_COUNT        ARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c

index 7377513050e663901071486492ce9a1881a65353..a06b0100af96724a8e0865ddc2c35726a452feb7 100644 (file)
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -282,14 +282,14 @@
         (((uint64_t)(base) & \
                 0xFFFFFF0000000000UL) | 0xFFFFFFFFFFL)
  
-#define MAKE_SCRATCH_APP_BASE(gpu_num) \
-       (((uint64_t)(gpu_num) << 61) + 0x100000000L)
+#define MAKE_SCRATCH_APP_BASE() \
+       (((uint64_t)(0x1UL) << 61) + 0x100000000L)
  
  #define MAKE_SCRATCH_APP_LIMIT(base) \
         (((uint64_t)base & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
  
-#define MAKE_LDS_APP_BASE(gpu_num) \
-       (((uint64_t)(gpu_num) << 61) + 0x0)
+#define MAKE_LDS_APP_BASE() \
+       (((uint64_t)(0x1UL) << 61) + 0x0)
  #define MAKE_LDS_APP_LIMIT(base) \
         (((uint64_t)(base) & 0xFFFFFFFF00000000UL) | 0xFFFFFFFF)
  
@@ -314,7 +314,7 @@ int kfd_init_apertures(struct kfd_process *process)
                         return -1;
                 }
                 /*
-                * For 64 bit process aperture will be statically reserved in
+                * For 64 bit process apertures will be statically reserved in
                  * the x86_64 non canonical process address space
                  * amdkfd doesn't currently support apertures for 32 bit process
                  */
@@ -323,12 +323,11 @@ int kfd_init_apertures(struct kfd_process *process)
                         pdd->gpuvm_base = pdd->gpuvm_limit = 0;
                         pdd->scratch_base = pdd->scratch_limit = 0;
                 } else {
-                       /*
-                        * node id couldn't be 0 - the three MSB bits of
-                        * aperture shoudn't be 0
+                       /* Same LDS and scratch apertures can be used
+                        * on all GPUs. This allows using more dGPUs
+                        * than placement options for apertures.
                          */
-                       pdd->lds_base = MAKE_LDS_APP_BASE(id + 1);
-
+                       pdd->lds_base = MAKE_LDS_APP_BASE();
                         pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
  
                         pdd->gpuvm_base = MAKE_GPUVM_APP_BASE(id + 1);
@@ -336,8 +335,7 @@ int kfd_init_apertures(struct kfd_process *process)
                         pdd->gpuvm_limit =
                                         MAKE_GPUVM_APP_LIMIT(pdd->gpuvm_base);
  
-                       pdd->scratch_base = MAKE_SCRATCH_APP_BASE(id + 1);
-
+                       pdd->scratch_base = MAKE_SCRATCH_APP_BASE();
                         pdd->scratch_limit =
                                 MAKE_SCRATCH_APP_LIMIT(pdd->scratch_base);
                 }
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h

index 111d73ba2d96fc71f8f0b499b6bbc787094415b6..52014370e2e5ec6e3657c8fa0d679b292f84ad36 100644 (file)
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -107,8 +107,6 @@ struct kfd_ioctl_get_clock_counters_args {
         __u32 pad;
  };
  
-#define NUM_OF_SUPPORTED_GPUS 7
-
  struct kfd_process_device_apertures {
         __u64 lds_base;         /* from KFD */
         __u64 lds_limit;                /* from KFD */
@@ -120,6 +118,12 @@ struct kfd_process_device_apertures {
         __u32 pad;
  };
  
+/*
+ * AMDKFD_IOC_GET_PROCESS_APERTURES is deprecated. Use
+ * AMDKFD_IOC_GET_PROCESS_APERTURES_NEW instead, which supports an
+ * unlimited number of GPUs.
+ */
+#define NUM_OF_SUPPORTED_GPUS 7
  struct kfd_ioctl_get_process_apertures_args {
         struct kfd_process_device_apertures
                         process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
@@ -129,6 +133,19 @@ struct kfd_ioctl_get_process_apertures_args {
         __u32 pad;
  };
  
+struct kfd_ioctl_get_process_apertures_new_args {
+       /* User allocated. Pointer to struct kfd_process_device_apertures
+        * filled in by Kernel
+        */
+       __u64 kfd_process_device_apertures_ptr;
+       /* to KFD - indicates amount of memory present in
+        *  kfd_process_device_apertures_ptr
+        * from KFD - Number of entries filled by KFD.
+        */
+       __u32 num_of_nodes;
+       __u32 pad;
+};
+
  #define MAX_ALLOWED_NUM_POINTS    100
  #define MAX_ALLOWED_AW_BUFF_SIZE 4096
  #define MAX_ALLOWED_WAC_BUFF_SIZE  128
@@ -332,7 +349,11 @@ struct kfd_ioctl_set_trap_handler_args {
  #define AMDKFD_IOC_SET_TRAP_HANDLER            \
                 AMDKFD_IOW(0x13, struct kfd_ioctl_set_trap_handler_args)
  
+#define AMDKFD_IOC_GET_PROCESS_APERTURES_NEW   \
+               AMDKFD_IOWR(0x14,               \
+                       struct kfd_ioctl_get_process_apertures_new_args)
+
  #define AMDKFD_COMMAND_START           0x01
-#define AMDKFD_COMMAND_END             0x14
+#define AMDKFD_COMMAND_END             0x15
  
  #endif
author	Felix Kuehling <Felix.Kuehling@amd.com>
	Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)
committer	Oded Gabbay <oded.gabbay@gmail.com>
	Thu, 15 Mar 2018 21:27:46 +0000 (17:27 -0400)
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c		patch \| blob \| history
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c		patch \| blob \| history
include/uapi/linux/kfd_ioctl.h		patch \| blob \| history