From 528cbd17ceff070747a312c6312346b585495157 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 28 Jan 2019 10:23:54 +0000 Subject: [PATCH] drm/i915: Move vma lookup to its own lock Remove the struct_mutex requirement for looking up the vma for an object. v2: Highlight how the race for duplicate vma creation is resolved on reacquiring the lock with a short comment. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20190128102356.15037-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +-- drivers/gpu/drm/i915/i915_gem.c | 33 +++++++----- drivers/gpu/drm/i915/i915_gem_object.h | 45 +++++++++------- drivers/gpu/drm/i915/i915_vma.c | 66 ++++++++++++++++------- drivers/gpu/drm/i915/i915_vma.h | 2 +- drivers/gpu/drm/i915/selftests/i915_vma.c | 4 +- 6 files changed, 98 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ecf762e24d0e..c9c230499420 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -159,14 +159,14 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) obj->mm.madv == I915_MADV_DONTNEED ? " purgeable" : ""); if (obj->base.name) seq_printf(m, " (name: %d)", obj->base.name); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (i915_vma_is_pinned(vma)) pin_count++; } seq_printf(m, " (pinned x %d)", pin_count); if (obj->pin_global) seq_printf(m, " (global)"); - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -322,7 +322,7 @@ static int per_file_stats(int id, void *ptr, void *data) if (obj->base.name || obj->base.dma_buf) stats->shared += obj->base.size; - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 538fa5404603..15acd052da46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -437,15 +437,19 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj) if (ret) return ret; - while ((vma = list_first_entry_or_null(&obj->vma_list, - struct i915_vma, - obj_link))) { + spin_lock(&obj->vma.lock); + while (!ret && (vma = list_first_entry_or_null(&obj->vma.list, + struct i915_vma, + obj_link))) { list_move_tail(&vma->obj_link, &still_in_list); + spin_unlock(&obj->vma.lock); + ret = i915_vma_unbind(vma); - if (ret) - break; + + spin_lock(&obj->vma.lock); } - list_splice(&still_in_list, &obj->vma_list); + list_splice(&still_in_list, &obj->vma.list); + spin_unlock(&obj->vma.lock); return ret; } @@ -3489,7 +3493,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * reading an invalid PTE on older architectures. */ restart: - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3567,7 +3571,7 @@ restart: */ } - list_for_each_entry(vma, &obj->vma_list, obj_link) { + list_for_each_entry(vma, &obj->vma.list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3577,7 +3581,7 @@ restart: } } - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) vma->node.color = cache_level; i915_gem_object_set_cache_coherency(obj, cache_level); obj->cache_dirty = true; /* Always invalidate stale cachelines */ @@ -4153,7 +4157,9 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->vma_list); + spin_lock_init(&obj->vma.lock); + INIT_LIST_HEAD(&obj->vma.list); + INIT_LIST_HEAD(&obj->lut_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4319,14 +4325,13 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, mutex_lock(&i915->drm.struct_mutex); GEM_BUG_ON(i915_gem_object_is_active(obj)); - list_for_each_entry_safe(vma, vn, - &obj->vma_list, obj_link) { + list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) { GEM_BUG_ON(i915_vma_is_active(vma)); vma->flags &= ~I915_VMA_PIN_MASK; i915_vma_destroy(vma); } - GEM_BUG_ON(!list_empty(&obj->vma_list)); - GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); + GEM_BUG_ON(!list_empty(&obj->vma.list)); + GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma.tree)); /* This serializes freeing with the shrinker. Since the free * is delayed, first by RCU then by the workqueue, we want the diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index cb1b0144d274..73fec917d097 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -87,24 +87,33 @@ struct drm_i915_gem_object { const struct drm_i915_gem_object_ops *ops; - /** - * @vma_list: List of VMAs backed by this object - * - * The VMA on this list are ordered by type, all GGTT vma are placed - * at the head and all ppGTT vma are placed at the tail. The different - * types of GGTT vma are unordered between themselves, use the - * @vma_tree (which has a defined order between all VMA) to find an - * exact match. - */ - struct list_head vma_list; - /** - * @vma_tree: Ordered tree of VMAs backed by this object - * - * All VMA created for this object are placed in the @vma_tree for - * fast retrieval via a binary search in i915_vma_instance(). - * They are also added to @vma_list for easy iteration. - */ - struct rb_root vma_tree; + struct { + /** + * @vma.lock: protect the list/tree of vmas + */ + spinlock_t lock; + + /** + * @vma.list: List of VMAs backed by this object + * + * The VMA on this list are ordered by type, all GGTT vma are + * placed at the head and all ppGTT vma are placed at the tail. + * The different types of GGTT vma are unordered between + * themselves, use the @vma.tree (which has a defined order + * between all VMA) to quickly find an exact match. + */ + struct list_head list; + + /** + * @vma.tree: Ordered tree of VMAs backed by this object + * + * All VMA created for this object are placed in the @vma.tree + * for fast retrieval via a binary search in + * i915_vma_instance(). They are also added to @vma.list for + * easy iteration. + */ + struct rb_root tree; + } vma; /** * @lut_list: List of vma lookup entries in use for this object. diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index dcbd0d345c72..d83b8ad5f859 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -187,32 +187,52 @@ vma_create(struct drm_i915_gem_object *obj, i915_gem_object_get_stride(obj)); GEM_BUG_ON(!is_power_of_2(vma->fence_alignment)); - /* - * We put the GGTT vma at the start of the vma-list, followed - * by the ppGGTT vma. This allows us to break early when - * iterating over only the GGTT vma for an object, see - * for_each_ggtt_vma() - */ vma->flags |= I915_VMA_GGTT; - list_add(&vma->obj_link, &obj->vma_list); - } else { - list_add_tail(&vma->obj_link, &obj->vma_list); } + spin_lock(&obj->vma.lock); + rb = NULL; - p = &obj->vma_tree.rb_node; + p = &obj->vma.tree.rb_node; while (*p) { struct i915_vma *pos; + long cmp; rb = *p; pos = rb_entry(rb, struct i915_vma, obj_node); - if (i915_vma_compare(pos, vm, view) < 0) + + /* + * If the view already exists in the tree, another thread + * already created a matching vma, so return the older instance + * and dispose of ours. + */ + cmp = i915_vma_compare(pos, vm, view); + if (cmp == 0) { + spin_unlock(&obj->vma.lock); + kmem_cache_free(vm->i915->vmas, vma); + return pos; + } + + if (cmp < 0) p = &rb->rb_right; else p = &rb->rb_left; } rb_link_node(&vma->obj_node, rb, p); - rb_insert_color(&vma->obj_node, &obj->vma_tree); + rb_insert_color(&vma->obj_node, &obj->vma.tree); + + if (i915_vma_is_ggtt(vma)) + /* + * We put the GGTT vma at the start of the vma-list, followed + * by the ppGGTT vma. This allows us to break early when + * iterating over only the GGTT vma for an object, see + * for_each_ggtt_vma() + */ + list_add(&vma->obj_link, &obj->vma.list); + else + list_add_tail(&vma->obj_link, &obj->vma.list); + + spin_unlock(&obj->vma.lock); mutex_lock(&vm->mutex); list_add(&vma->vm_link, &vm->unbound_list); @@ -232,7 +252,7 @@ vma_lookup(struct drm_i915_gem_object *obj, { struct rb_node *rb; - rb = obj->vma_tree.rb_node; + rb = obj->vma.tree.rb_node; while (rb) { struct i915_vma *vma = rb_entry(rb, struct i915_vma, obj_node); long cmp; @@ -272,16 +292,18 @@ i915_vma_instance(struct drm_i915_gem_object *obj, { struct i915_vma *vma; - lockdep_assert_held(&obj->base.dev->struct_mutex); GEM_BUG_ON(view && !i915_is_ggtt(vm)); GEM_BUG_ON(vm->closed); + spin_lock(&obj->vma.lock); vma = vma_lookup(obj, vm, view); - if (!vma) + spin_unlock(&obj->vma.lock); + + /* vma_create() will resolve the race if another creates the vma */ + if (unlikely(!vma)) vma = vma_create(obj, vm, view); GEM_BUG_ON(!IS_ERR(vma) && i915_vma_compare(vma, vm, view)); - GEM_BUG_ON(!IS_ERR(vma) && vma_lookup(obj, vm, view) != vma); return vma; } @@ -808,14 +830,18 @@ static void __i915_vma_destroy(struct i915_vma *vma) GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence)); - list_del(&vma->obj_link); - mutex_lock(&vma->vm->mutex); list_del(&vma->vm_link); mutex_unlock(&vma->vm->mutex); - if (vma->obj) - rb_erase(&vma->obj_node, &vma->obj->vma_tree); + if (vma->obj) { + struct drm_i915_gem_object *obj = vma->obj; + + spin_lock(&obj->vma.lock); + list_del(&vma->obj_link); + rb_erase(&vma->obj_node, &vma->obj->vma.tree); + spin_unlock(&obj->vma.lock); + } rbtree_postorder_for_each_entry_safe(iter, n, &vma->active, node) { GEM_BUG_ON(i915_gem_active_isset(&iter->base)); diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h index 4f7c1c7599f4..7252abc73d3e 100644 --- a/drivers/gpu/drm/i915/i915_vma.h +++ b/drivers/gpu/drm/i915/i915_vma.h @@ -425,7 +425,7 @@ void i915_vma_parked(struct drm_i915_private *i915); * or the list is empty ofc. */ #define for_each_ggtt_vma(V, OBJ) \ - list_for_each_entry(V, &(OBJ)->vma_list, obj_link) \ + list_for_each_entry(V, &(OBJ)->vma.list, obj_link) \ for_each_until(!i915_vma_is_ggtt(V)) #endif diff --git a/drivers/gpu/drm/i915/selftests/i915_vma.c b/drivers/gpu/drm/i915/selftests/i915_vma.c index f0a32edfb9b1..cf1de82741fa 100644 --- a/drivers/gpu/drm/i915/selftests/i915_vma.c +++ b/drivers/gpu/drm/i915/selftests/i915_vma.c @@ -672,7 +672,7 @@ static int igt_vma_partial(void *arg) } count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) All partial vma were not recorded on the obj->vma_list: found %u, expected %u\n", @@ -701,7 +701,7 @@ static int igt_vma_partial(void *arg) i915_vma_unpin(vma); count = 0; - list_for_each_entry(vma, &obj->vma_list, obj_link) + list_for_each_entry(vma, &obj->vma.list, obj_link) count++; if (count != nvma) { pr_err("(%s) allocated an extra full vma!\n", p->name); -- 2.30.2