drm/amdgpu: Add KFD eviction fence
authorFelix Kuehling <Felix.Kuehling@amd.com>
Wed, 7 Feb 2018 01:32:35 +0000 (20:32 -0500)
committerOded Gabbay <oded.gabbay@gmail.com>
Wed, 7 Feb 2018 01:32:35 +0000 (20:32 -0500)
This fence is used by KFD to keep memory resident while user mode
queues are enabled. Trying to evict memory will trigger the
enable_signaling callback, which starts a KFD eviction, which
involves preempting user mode queues before signaling the fence.
There is one such fence per process.

v2:
* Grab a reference to mm_struct
* Dereference fence after NULL check
* Simplify fence release, no need to signal without anyone waiting
* Added signed-off-by Harish, who is the original author of this code

v3:
* update MAINTAINERS file
* change amd_kfd_ prefix to amdkfd_
* remove useless initialization of variable to NULL

v4:
* set amdkfd_fence_ops to be static
* Suggested by: Fengguang Wu <fengguang.wu@intel.com>

Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
MAINTAINERS
drivers/gpu/drm/amd/amdgpu/Makefile
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c [new file with mode: 0644]
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/include/kgd_kfd_interface.h

index 13c8ec11135a9c4c2008d61fa99a037f9cfeb787..9e376c7f15b1ba947caff8bb2424c8e4751ec3cd 100644 (file)
@@ -766,6 +766,7 @@ F:  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
 F:     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
 F:     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
 F:     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+F:     drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
 F:     drivers/gpu/drm/amd/amdkfd/
 F:     drivers/gpu/drm/amd/include/cik_structs.h
 F:     drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 353c937d947dde8ebbc0489dd9ed6d5e60fd0c8f..5dd317579e77ee3a234cd442272bd7ec5d0ddfec 100644 (file)
@@ -129,6 +129,7 @@ amdgpu-y += \
 # add amdkfd interfaces
 amdgpu-y += \
         amdgpu_amdkfd.o \
+        amdgpu_amdkfd_fence.o \
         amdgpu_amdkfd_gfx_v8.o
 
 # add cgs
index 2a519f9062eeaf3357c378f90b4c0ef8c0971fd4..833dc26d402f6fc9952a4aa7679e3cf31b6b58ff 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/mmu_context.h>
 #include <kgd_kfd_interface.h>
 
+extern const struct kgd2kfd_calls *kgd2kfd;
+
 struct amdgpu_device;
 
 struct kgd_mem {
@@ -37,6 +39,19 @@ struct kgd_mem {
        void *cpu_ptr;
 };
 
+/* KFD Memory Eviction */
+struct amdgpu_amdkfd_fence {
+       struct dma_fence base;
+       struct mm_struct *mm;
+       spinlock_t lock;
+       char timeline_name[TASK_COMM_LEN];
+};
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+                                                      struct mm_struct *mm);
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
+
 int amdgpu_amdkfd_init(void);
 void amdgpu_amdkfd_fini(void);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
new file mode 100644 (file)
index 0000000..2c14025
--- /dev/null
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016-2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/stacktrace.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/sched/mm.h>
+#include "amdgpu_amdkfd.h"
+
+static const struct dma_fence_ops amdkfd_fence_ops;
+static atomic_t fence_seq = ATOMIC_INIT(0);
+
+/* Eviction Fence
+ * Fence helper functions to deal with KFD memory eviction.
+ * Big Idea - Since KFD submissions are done by user queues, a BO cannot be
+ *  evicted unless all the user queues for that process are evicted.
+ *
+ * All the BOs in a process share an eviction fence. When process X wants
+ * to map VRAM memory but TTM can't find enough space, TTM will attempt to
+ * evict BOs from its LRU list. TTM checks if the BO is valuable to evict
+ * by calling ttm_bo_driver->eviction_valuable().
+ *
+ * ttm_bo_driver->eviction_valuable() - will return false if the BO belongs
+ *  to process X. Otherwise, it will return true to indicate BO can be
+ *  evicted by TTM.
+ *
+ * If ttm_bo_driver->eviction_valuable returns true, then TTM will continue
+ * the evcition process for that BO by calling ttm_bo_evict --> amdgpu_bo_move
+ * --> amdgpu_copy_buffer(). This sets up job in GPU scheduler.
+ *
+ * GPU Scheduler (amd_sched_main) - sets up a cb (fence_add_callback) to
+ *  nofity when the BO is free to move. fence_add_callback --> enable_signaling
+ *  --> amdgpu_amdkfd_fence.enable_signaling
+ *
+ * amdgpu_amdkfd_fence.enable_signaling - Start a work item that will quiesce
+ * user queues and signal fence. The work item will also start another delayed
+ * work item to restore BOs
+ */
+
+struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
+                                                      struct mm_struct *mm)
+{
+       struct amdgpu_amdkfd_fence *fence;
+
+       fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+       if (fence == NULL)
+               return NULL;
+
+       /* This reference gets released in amdkfd_fence_release */
+       mmgrab(mm);
+       fence->mm = mm;
+       get_task_comm(fence->timeline_name, current);
+       spin_lock_init(&fence->lock);
+
+       dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
+                  context, atomic_inc_return(&fence_seq));
+
+       return fence;
+}
+
+struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
+{
+       struct amdgpu_amdkfd_fence *fence;
+
+       if (!f)
+               return NULL;
+
+       fence = container_of(f, struct amdgpu_amdkfd_fence, base);
+       if (fence && f->ops == &amdkfd_fence_ops)
+               return fence;
+
+       return NULL;
+}
+
+static const char *amdkfd_fence_get_driver_name(struct dma_fence *f)
+{
+       return "amdgpu_amdkfd_fence";
+}
+
+static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
+{
+       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+       return fence->timeline_name;
+}
+
+/**
+ * amdkfd_fence_enable_signaling - This gets called when TTM wants to evict
+ *  a KFD BO and schedules a job to move the BO.
+ *  If fence is already signaled return true.
+ *  If fence is not signaled schedule a evict KFD process work item.
+ */
+static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
+{
+       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+       if (!fence)
+               return false;
+
+       if (dma_fence_is_signaled(f))
+               return true;
+
+       if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+               return true;
+
+       return false;
+}
+
+/**
+ * amdkfd_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * Drops the mm_struct reference and RCU schedules freeing up the fence.
+ */
+static void amdkfd_fence_release(struct dma_fence *f)
+{
+       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+       /* Unconditionally signal the fence. The process is getting
+        * terminated.
+        */
+       if (WARN_ON(!fence))
+               return; /* Not an amdgpu_amdkfd_fence */
+
+       mmdrop(fence->mm);
+       kfree_rcu(f, rcu);
+}
+
+/**
+ * amdkfd_fence_check_mm - Check if @mm is same as that of the fence @f
+ *  if same return TRUE else return FALSE.
+ *
+ * @f: [IN] fence
+ * @mm: [IN] mm that needs to be verified
+ */
+bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
+{
+       struct amdgpu_amdkfd_fence *fence = to_amdgpu_amdkfd_fence(f);
+
+       if (!fence)
+               return false;
+       else if (fence->mm == mm)
+               return true;
+
+       return false;
+}
+
+static const struct dma_fence_ops amdkfd_fence_ops = {
+       .get_driver_name = amdkfd_fence_get_driver_name,
+       .get_timeline_name = amdkfd_fence_get_timeline_name,
+       .enable_signaling = amdkfd_fence_enable_signaling,
+       .signaled = NULL,
+       .wait = dma_fence_default_wait,
+       .release = amdkfd_fence_release,
+};
index 06436c3ebd2bf333e89b5362d6e243b24b9208f0..1a5911882657b599f79566df0ef3b6c6bc3bd925 100644 (file)
@@ -36,8 +36,9 @@
 #define AMDGPU_MAX_UVD_ENC_RINGS       2
 
 /* some special values for the owner field */
-#define AMDGPU_FENCE_OWNER_UNDEFINED   ((void*)0ul)
-#define AMDGPU_FENCE_OWNER_VM          ((void*)1ul)
+#define AMDGPU_FENCE_OWNER_UNDEFINED   ((void *)0ul)
+#define AMDGPU_FENCE_OWNER_VM          ((void *)1ul)
+#define AMDGPU_FENCE_OWNER_KFD         ((void *)2ul)
 
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
index df65c66dc956f7500810b5e3e42080650a484193..b8d3b87fd43e94b65d5b09a344043272c0b476bd 100644 (file)
@@ -31,6 +31,7 @@
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 
 struct amdgpu_sync_entry {
        struct hlist_node       node;
@@ -85,11 +86,20 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
  */
 static void *amdgpu_sync_get_owner(struct dma_fence *f)
 {
-       struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
+       struct drm_sched_fence *s_fence;
+       struct amdgpu_amdkfd_fence *kfd_fence;
+
+       if (!f)
+               return AMDGPU_FENCE_OWNER_UNDEFINED;
 
+       s_fence = to_drm_sched_fence(f);
        if (s_fence)
                return s_fence->owner;
 
+       kfd_fence = to_amdgpu_amdkfd_fence(f);
+       if (kfd_fence)
+               return AMDGPU_FENCE_OWNER_KFD;
+
        return AMDGPU_FENCE_OWNER_UNDEFINED;
 }
 
@@ -204,11 +214,18 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
        for (i = 0; i < flist->shared_count; ++i) {
                f = rcu_dereference_protected(flist->shared[i],
                                              reservation_object_held(resv));
+               /* We only want to trigger KFD eviction fences on
+                * evict or move jobs. Skip KFD fences otherwise.
+                */
+               fence_owner = amdgpu_sync_get_owner(f);
+               if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
+                   owner != AMDGPU_FENCE_OWNER_UNDEFINED)
+                       continue;
+
                if (amdgpu_sync_same_dev(adev, f)) {
                        /* VM updates are only interesting
                         * for other VM updates and moves.
                         */
-                       fence_owner = amdgpu_sync_get_owner(f);
                        if ((owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
                            (fence_owner != AMDGPU_FENCE_OWNER_UNDEFINED) &&
                            ((owner == AMDGPU_FENCE_OWNER_VM) !=
index 28c33d711bab3cbcf3a36ca862fdff478bd8caae..5fcb3488a5959cbbc2b3d07fb74c392bab747e87 100644 (file)
@@ -46,6 +46,7 @@
 #include "amdgpu.h"
 #include "amdgpu_object.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_amdkfd.h"
 #include "bif/bif_4_1_d.h"
 
 #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
@@ -1171,6 +1172,23 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 {
        unsigned long num_pages = bo->mem.num_pages;
        struct drm_mm_node *node = bo->mem.mm_node;
+       struct reservation_object_list *flist;
+       struct dma_fence *f;
+       int i;
+
+       /* If bo is a KFD BO, check if the bo belongs to the current process.
+        * If true, then return false as any KFD process needs all its BOs to
+        * be resident to run successfully
+        */
+       flist = reservation_object_get_list(bo->resv);
+       if (flist) {
+               for (i = 0; i < flist->shared_count; ++i) {
+                       f = rcu_dereference_protected(flist->shared[i],
+                               reservation_object_held(bo->resv));
+                       if (amdkfd_fence_check_mm(f, current->mm))
+                               return false;
+               }
+       }
 
        switch (bo->mem.mem_type) {
        case TTM_PL_TT:
index 94eab54864bf7dc3c81f836204e684c3e6d6423f..9e352499c409b0e4da92ad08e33f73df85814349 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <linux/types.h>
 #include <linux/bitmap.h>
+#include <linux/dma-fence.h>
 
 struct pci_dev;
 
@@ -286,6 +287,9 @@ struct kfd2kgd_calls {
  *
  * @resume: Notifies amdkfd about a resume action done to a kgd device
  *
+ * @schedule_evict_and_restore_process: Schedules work queue that will prepare
+ * for safe eviction of KFD BOs that belong to the specified process.
+ *
  * This structure contains function callback pointers so the kgd driver
  * will notify to the amdkfd about certain status changes.
  *
@@ -300,6 +304,8 @@ struct kgd2kfd_calls {
        void (*interrupt)(struct kfd_dev *kfd, const void *ih_ring_entry);
        void (*suspend)(struct kfd_dev *kfd);
        int (*resume)(struct kfd_dev *kfd);
+       int (*schedule_evict_and_restore_process)(struct mm_struct *mm,
+                       struct dma_fence *fence);
 };
 
 int kgd2kfd_init(unsigned interface_version,