drm/amdgpu: use a 64bit interval tree for VM management v2
authorChristian König <christian.koenig@amd.com>
Thu, 30 Mar 2017 12:03:59 +0000 (14:03 +0200)
committerAlex Deucher <alexander.deucher@amd.com>
Tue, 4 Apr 2017 17:40:32 +0000 (13:40 -0400)
This only makes a difference for 32-bit systems. The idea is to have a
fixed virtual address space size with 4-level page tables and to
minimize differences between 32 and 64-bit systems.

v2: Update commit message.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 262056778f5237388350ab4c54bbe235e96a68e9..5c6bcacd11affd03d3f18df5e2e3cd056aff3126 100644 (file)
@@ -32,7 +32,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-#include <linux/interval_tree.h>
+#include <linux/rbtree.h>
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 
@@ -379,7 +379,10 @@ struct amdgpu_bo_list_entry {
 
 struct amdgpu_bo_va_mapping {
        struct list_head                list;
-       struct interval_tree_node       it;
+       struct rb_node                  rb;
+       uint64_t                        start;
+       uint64_t                        last;
+       uint64_t                        __subtree_last;
        uint64_t                        offset;
        uint64_t                        flags;
 };
index 97f661372a1c3eb2fa30e81698d35a832eb9a91a..9c7f7ed07ea0afc59412442d60e134ae00e32f73 100644 (file)
@@ -949,7 +949,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
                        }
 
                        if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-                           (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+                           (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
                                DRM_ERROR("IB va_start+ib_bytes is invalid\n");
                                return -EINVAL;
                        }
@@ -960,7 +960,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
                                return r;
                        }
 
-                       offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
+                       offset = m->start * AMDGPU_GPU_PAGE_SIZE;
                        kptr += chunk_ib->va_start - offset;
 
                        r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
@@ -1388,8 +1388,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                        continue;
 
                list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
-                       if (mapping->it.start > addr ||
-                           addr > mapping->it.last)
+                       if (mapping->start > addr ||
+                           addr > mapping->last)
                                continue;
 
                        *bo = lobj->bo_va->bo;
@@ -1397,8 +1397,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
                }
 
                list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
-                       if (mapping->it.start > addr ||
-                           addr > mapping->it.last)
+                       if (mapping->start > addr ||
+                           addr > mapping->last)
                                continue;
 
                        *bo = lobj->bo_va->bo;
index 7ea3cacf9f9fb14f48e203b7cfabadf98e39e36d..38f739fb727bd2da4c81ad198ae18e46bee8e040 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/mmu_notifier.h>
+#include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
 
index a87de18160a878a82436b7d4827bb81d7a75af6d..ee9d0f346d75341197c9a50747c664d1c3bc77c4 100644 (file)
@@ -226,8 +226,8 @@ TRACE_EVENT(amdgpu_vm_bo_map,
 
            TP_fast_assign(
                           __entry->bo = bo_va ? bo_va->bo : NULL;
-                          __entry->start = mapping->it.start;
-                          __entry->last = mapping->it.last;
+                          __entry->start = mapping->start;
+                          __entry->last = mapping->last;
                           __entry->offset = mapping->offset;
                           __entry->flags = mapping->flags;
                           ),
@@ -250,8 +250,8 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
 
            TP_fast_assign(
                           __entry->bo = bo_va->bo;
-                          __entry->start = mapping->it.start;
-                          __entry->last = mapping->it.last;
+                          __entry->start = mapping->start;
+                          __entry->last = mapping->last;
                           __entry->offset = mapping->offset;
                           __entry->flags = mapping->flags;
                           ),
@@ -270,8 +270,8 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
                             ),
 
            TP_fast_assign(
-                          __entry->soffset = mapping->it.start;
-                          __entry->eoffset = mapping->it.last + 1;
+                          __entry->soffset = mapping->start;
+                          __entry->eoffset = mapping->last + 1;
                           __entry->flags = mapping->flags;
                           ),
            TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
index 0b92dd0c1d70a0394da2769453dbe10f4e6568ce..2ca09f111f08d02838cea38e3d7148752c6ffc0b 100644 (file)
@@ -741,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 
        start = amdgpu_bo_gpu_offset(bo);
 
-       end = (mapping->it.last + 1 - mapping->it.start);
+       end = (mapping->last + 1 - mapping->start);
        end = end * AMDGPU_GPU_PAGE_SIZE + start;
 
-       addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
+       addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
        start += addr;
 
        amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
index 0184197eb0004202856bd53d58a8bd0772169e45..c853400805d1c66c98496b548ae2c30c3f640987 100644 (file)
@@ -595,13 +595,13 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
        }
 
        if ((addr + (uint64_t)size) >
-           ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+           (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
                DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
                          addr, lo, hi);
                return -EINVAL;
        }
 
-       addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
+       addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
        addr += amdgpu_bo_gpu_offset(bo);
        addr -= ((uint64_t)size) * ((uint64_t)index);
 
index 0235d7933efd854bc242e19c097e54ef9579cf4e..5f3d932e77f21f93a2b33da3bdf599f5b08e0928 100644 (file)
@@ -26,6 +26,7 @@
  *          Jerome Glisse
  */
 #include <linux/dma-fence-array.h>
+#include <linux/interval_tree_generic.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
  * SI supports 16.
  */
 
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
+                    START, LAST, static, amdgpu_vm_it)
+
+#undef START
+#undef LAST
+
 /* Local structure. Encapsulate some VM table update parameters to reduce
  * the number of function parameters
  */
@@ -1301,7 +1311,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
                                      struct drm_mm_node *nodes,
                                      struct dma_fence **fence)
 {
-       uint64_t pfn, src = 0, start = mapping->it.start;
+       uint64_t pfn, src = 0, start = mapping->start;
        int r;
 
        /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1353,7 +1363,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
                }
                addr += pfn << PAGE_SHIFT;
 
-               last = min((uint64_t)mapping->it.last, start + max_entries - 1);
+               last = min((uint64_t)mapping->last, start + max_entries - 1);
                r = amdgpu_vm_bo_update_mapping(adev, exclusive,
                                                src, pages_addr, vm,
                                                start, last, flags, addr,
@@ -1368,7 +1378,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
                }
                start = last + 1;
 
-       } while (unlikely(start != mapping->it.last + 1));
+       } while (unlikely(start != mapping->last + 1));
 
        return 0;
 }
@@ -1724,9 +1734,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
                     uint64_t saddr, uint64_t offset,
                     uint64_t size, uint64_t flags)
 {
-       struct amdgpu_bo_va_mapping *mapping;
+       struct amdgpu_bo_va_mapping *mapping, *tmp;
        struct amdgpu_vm *vm = bo_va->vm;
-       struct interval_tree_node *it;
        uint64_t eaddr;
 
        /* validate the parameters */
@@ -1743,14 +1752,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
        saddr /= AMDGPU_GPU_PAGE_SIZE;
        eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-       it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-       if (it) {
-               struct amdgpu_bo_va_mapping *tmp;
-               tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
+       tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+       if (tmp) {
                /* bo and tmp overlap, invalid addr */
                dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
-                       "0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
-                       tmp->it.start, tmp->it.last + 1);
+                       "0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr,
+                       tmp->start, tmp->last + 1);
                return -EINVAL;
        }
 
@@ -1759,13 +1766,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
                return -ENOMEM;
 
        INIT_LIST_HEAD(&mapping->list);
-       mapping->it.start = saddr;
-       mapping->it.last = eaddr;
+       mapping->start = saddr;
+       mapping->last = eaddr;
        mapping->offset = offset;
        mapping->flags = flags;
 
        list_add(&mapping->list, &bo_va->invalids);
-       interval_tree_insert(&mapping->it, &vm->va);
+       amdgpu_vm_it_insert(mapping, &vm->va);
 
        if (flags & AMDGPU_PTE_PRT)
                amdgpu_vm_prt_get(adev);
@@ -1823,13 +1830,13 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        saddr /= AMDGPU_GPU_PAGE_SIZE;
        eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-       mapping->it.start = saddr;
-       mapping->it.last = eaddr;
+       mapping->start = saddr;
+       mapping->last = eaddr;
        mapping->offset = offset;
        mapping->flags = flags;
 
        list_add(&mapping->list, &bo_va->invalids);
-       interval_tree_insert(&mapping->it, &vm->va);
+       amdgpu_vm_it_insert(mapping, &vm->va);
 
        if (flags & AMDGPU_PTE_PRT)
                amdgpu_vm_prt_get(adev);
@@ -1860,7 +1867,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
        saddr /= AMDGPU_GPU_PAGE_SIZE;
 
        list_for_each_entry(mapping, &bo_va->valids, list) {
-               if (mapping->it.start == saddr)
+               if (mapping->start == saddr)
                        break;
        }
 
@@ -1868,7 +1875,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
                valid = false;
 
                list_for_each_entry(mapping, &bo_va->invalids, list) {
-                       if (mapping->it.start == saddr)
+                       if (mapping->start == saddr)
                                break;
                }
 
@@ -1877,7 +1884,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
        }
 
        list_del(&mapping->list);
-       interval_tree_remove(&mapping->it, &vm->va);
+       amdgpu_vm_it_remove(mapping, &vm->va);
        trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
        if (valid)
@@ -1905,7 +1912,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
                                uint64_t saddr, uint64_t size)
 {
        struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
-       struct interval_tree_node *it;
        LIST_HEAD(removed);
        uint64_t eaddr;
 
@@ -1927,43 +1933,42 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
        INIT_LIST_HEAD(&after->list);
 
        /* Now gather all removed mappings */
-       it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-       while (it) {
-               tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
-               it = interval_tree_iter_next(it, saddr, eaddr);
-
+       tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+       while (tmp) {
                /* Remember mapping split at the start */
-               if (tmp->it.start < saddr) {
-                       before->it.start = tmp->it.start;
-                       before->it.last = saddr - 1;
+               if (tmp->start < saddr) {
+                       before->start = tmp->start;
+                       before->last = saddr - 1;
                        before->offset = tmp->offset;
                        before->flags = tmp->flags;
                        list_add(&before->list, &tmp->list);
                }
 
                /* Remember mapping split at the end */
-               if (tmp->it.last > eaddr) {
-                       after->it.start = eaddr + 1;
-                       after->it.last = tmp->it.last;
+               if (tmp->last > eaddr) {
+                       after->start = eaddr + 1;
+                       after->last = tmp->last;
                        after->offset = tmp->offset;
-                       after->offset += after->it.start - tmp->it.start;
+                       after->offset += after->start - tmp->start;
                        after->flags = tmp->flags;
                        list_add(&after->list, &tmp->list);
                }
 
                list_del(&tmp->list);
                list_add(&tmp->list, &removed);
+
+               tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
        }
 
        /* And free them up */
        list_for_each_entry_safe(tmp, next, &removed, list) {
-               interval_tree_remove(&tmp->it, &vm->va);
+               amdgpu_vm_it_remove(tmp, &vm->va);
                list_del(&tmp->list);
 
-               if (tmp->it.start < saddr)
-                   tmp->it.start = saddr;
-               if (tmp->it.last > eaddr)
-                   tmp->it.last = eaddr;
+               if (tmp->start < saddr)
+                   tmp->start = saddr;
+               if (tmp->last > eaddr)
+                   tmp->last = eaddr;
 
                list_add(&tmp->list, &vm->freed);
                trace_amdgpu_vm_bo_unmap(NULL, tmp);
@@ -1971,7 +1976,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
        /* Insert partial mapping before the range */
        if (!list_empty(&before->list)) {
-               interval_tree_insert(&before->it, &vm->va);
+               amdgpu_vm_it_insert(before, &vm->va);
                if (before->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
        } else {
@@ -1980,7 +1985,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
        /* Insert partial mapping after the range */
        if (!list_empty(&after->list)) {
-               interval_tree_insert(&after->it, &vm->va);
+               amdgpu_vm_it_insert(after, &vm->va);
                if (after->flags & AMDGPU_PTE_PRT)
                        amdgpu_vm_prt_get(adev);
        } else {
@@ -2014,13 +2019,13 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
        list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
                list_del(&mapping->list);
-               interval_tree_remove(&mapping->it, &vm->va);
+               amdgpu_vm_it_remove(mapping, &vm->va);
                trace_amdgpu_vm_bo_unmap(bo_va, mapping);
                list_add(&mapping->list, &vm->freed);
        }
        list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
                list_del(&mapping->list);
-               interval_tree_remove(&mapping->it, &vm->va);
+               amdgpu_vm_it_remove(mapping, &vm->va);
                amdgpu_vm_free_mapping(adev, vm, mapping,
                                       bo_va->last_pt_update);
        }
@@ -2162,9 +2167,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
        if (!RB_EMPTY_ROOT(&vm->va)) {
                dev_err(adev->dev, "still active bo inside vm\n");
        }
-       rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) {
+       rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) {
                list_del(&mapping->list);
-               interval_tree_remove(&mapping->it, &vm->va);
+               amdgpu_vm_it_remove(mapping, &vm->va);
                kfree(mapping);
        }
        list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {