drm/i915: Throw away the active object retirement complexity
authorChris Wilson <chris@chris-wilson.co.uk>
Fri, 21 Jun 2019 18:37:59 +0000 (19:37 +0100)
committerChris Wilson <chris@chris-wilson.co.uk>
Fri, 21 Jun 2019 18:47:51 +0000 (19:47 +0100)
Remove the accumulated optimisations that we have for i915_vma_retire
and reduce it to the bare essential of tracking the active object
reference. This allows us to only use atomic operations, and so will be
able to avoid the struct_mutex requirement.

The principal loss here is the shrinker MRU bumping, so now if we have
to shrink, we will do so in much more random order and more likely to
try and shrink recently used objects. That is a nuisance, but shrinking
active objects is a second step we try to avoid and will always be a
system-wide performance issue.

The other loss is here is in the automatic pruning of the
reservation_object when idling. This is not as large an issue as upon
reservation_object introduction as now adding new fences into the object
replaces already signaled fences, keeping the array compact. But we do
lose the auto-expiration of stale fences and unused arrays. That may be
a noticeable problem for which we need to re-implement autopruning.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190621183801.23252-3-chris@chris-wilson.co.uk
drivers/gpu/drm/i915/gem/i915_gem_object.c
drivers/gpu/drm/i915/gem/i915_gem_object.h
drivers/gpu/drm/i915/gem/i915_gem_object_types.h
drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
drivers/gpu/drm/i915/gt/intel_lrc.c
drivers/gpu/drm/i915/gt/intel_ringbuffer.c
drivers/gpu/drm/i915/gt/selftest_hangcheck.c
drivers/gpu/drm/i915/i915_debugfs.c
drivers/gpu/drm/i915/i915_gem_batch_pool.c
drivers/gpu/drm/i915/i915_vma.c

index 87275f9883ac30933fb30c657d8b3ac3de968b15..43194fbcbc2e75419001c5a091129e01452676dd 100644 (file)
@@ -160,7 +160,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
 
                mutex_lock(&i915->drm.struct_mutex);
 
-               GEM_BUG_ON(i915_gem_object_is_active(obj));
                list_for_each_entry_safe(vma, vn, &obj->vma.list, obj_link) {
                        GEM_BUG_ON(i915_vma_is_active(vma));
                        vma->flags &= ~I915_VMA_PIN_MASK;
index dfebd5706f16ddc3b1dc8aaf43e9d4bba6179118..20754c15412a4621a083267a3bf60521f9f77742 100644 (file)
@@ -158,12 +158,6 @@ i915_gem_object_needs_async_cancel(const struct drm_i915_gem_object *obj)
        return obj->ops->flags & I915_GEM_OBJECT_ASYNC_CANCEL;
 }
 
-static inline bool
-i915_gem_object_is_active(const struct drm_i915_gem_object *obj)
-{
-       return READ_ONCE(obj->active_count);
-}
-
 static inline bool
 i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
 {
index 18bf4f8d6d809a169199f01e1d2703787def2bfb..34b51fad02de4c965310c6203f5a23b7ef295318 100644 (file)
@@ -154,7 +154,6 @@ struct drm_i915_gem_object {
 
        /** Count of VMA actually bound by this object */
        atomic_t bind_count;
-       unsigned int active_count;
        /** Count of how many global VMA are currently pinned for use by HW */
        unsigned int pin_global;
 
index 1bbc690494c7058de7f8a05f6832b0f2eb1cc53a..d99f1a600b964180c7d765bd113760f7cf6ebc76 100644 (file)
@@ -229,8 +229,9 @@ i915_gem_shrink(struct drm_i915_private *i915,
                                continue;
 
                        if (!(shrink & I915_SHRINK_ACTIVE) &&
-                           (i915_gem_object_is_active(obj) ||
-                            i915_gem_object_is_framebuffer(obj)))
+                           (i915_gem_object_is_framebuffer(obj) ||
+                            !reservation_object_test_signaled_rcu(obj->base.resv,
+                                                                  true)))
                                continue;
 
                        if (!(shrink & I915_SHRINK_BOUND) &&
index 2812f7fa27fe52f8b18b3f56ea69f24e0adf2ca6..24a3c677ccd51f5c4a150e576e5a6edb7cc0a336 100644 (file)
@@ -475,15 +475,6 @@ static int igt_mmap_offset_exhaustion(void *arg)
                        pr_err("[loop %d] Failed to busy the object\n", loop);
                        goto err_obj;
                }
-
-               /* NB we rely on the _active_ reference to access obj now */
-               GEM_BUG_ON(!i915_gem_object_is_active(obj));
-               err = create_mmap_offset(obj);
-               if (err) {
-                       pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
-                              loop, err);
-                       goto out;
-               }
        }
 
 out:
index b3e0e25c5d8070a5a21200ddde10d84eb04f3f0d..ad7638da785d4ef3a889ca8b96cf7b22e29a380e 100644 (file)
@@ -1486,9 +1486,7 @@ static void execlists_submit_request(struct i915_request *request)
 static void __execlists_context_fini(struct intel_context *ce)
 {
        intel_ring_put(ce->ring);
-
-       GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
-       i915_gem_object_put(ce->state->obj);
+       i915_vma_put(ce->state);
 }
 
 static void execlists_context_destroy(struct kref *kref)
index d65b8cba1a8f805adb704f6bb9f8e5a2e687bf4d..c9337e4b5ee0845b1d0decd1672da27dd1c73eb8 100644 (file)
@@ -1327,7 +1327,6 @@ void intel_ring_free(struct kref *ref)
 
 static void __ring_context_fini(struct intel_context *ce)
 {
-       GEM_BUG_ON(i915_gem_object_is_active(ce->state->obj));
        i915_gem_object_put(ce->state->obj);
 }
 
index 0dc3896e49f5ce8d8fa7014f28e69d0c86cdac7c..3ceb397c8645dee216fc18c1858da23373b88a09 100644 (file)
@@ -131,35 +131,29 @@ hang_create_request(struct hang *h, struct intel_engine_cs *engine)
 {
        struct drm_i915_private *i915 = h->i915;
        struct i915_address_space *vm = h->ctx->vm ?: &engine->gt->ggtt->vm;
+       struct drm_i915_gem_object *obj;
        struct i915_request *rq = NULL;
        struct i915_vma *hws, *vma;
        unsigned int flags;
+       void *vaddr;
        u32 *batch;
        int err;
 
-       h->gt = engine->gt;
-
-       if (i915_gem_object_is_active(h->obj)) {
-               struct drm_i915_gem_object *obj;
-               void *vaddr;
+       obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+       if (IS_ERR(obj))
+               return ERR_CAST(obj);
 
-               obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
-               if (IS_ERR(obj))
-                       return ERR_CAST(obj);
-
-               vaddr = i915_gem_object_pin_map(obj,
-                                               i915_coherent_map_type(i915));
-               if (IS_ERR(vaddr)) {
-                       i915_gem_object_put(obj);
-                       return ERR_CAST(vaddr);
-               }
+       vaddr = i915_gem_object_pin_map(obj, i915_coherent_map_type(i915));
+       if (IS_ERR(vaddr)) {
+               i915_gem_object_put(obj);
+               return ERR_CAST(vaddr);
+       }
 
-               i915_gem_object_unpin_map(h->obj);
-               i915_gem_object_put(h->obj);
+       i915_gem_object_unpin_map(h->obj);
+       i915_gem_object_put(h->obj);
 
-               h->obj = obj;
-               h->batch = vaddr;
-       }
+       h->obj = obj;
+       h->batch = vaddr;
 
        vma = i915_vma_instance(h->obj, vm, NULL);
        if (IS_ERR(vma))
index 62cf34db9280b20e4c43e405d3ff56468eb26198..eeecdad0e3ca3c771e96d5876f5ba320f69f0ef4 100644 (file)
@@ -75,11 +75,6 @@ static int i915_capabilities(struct seq_file *m, void *data)
        return 0;
 }
 
-static char get_active_flag(struct drm_i915_gem_object *obj)
-{
-       return i915_gem_object_is_active(obj) ? '*' : ' ';
-}
-
 static char get_pin_flag(struct drm_i915_gem_object *obj)
 {
        return obj->pin_global ? 'p' : ' ';
@@ -144,9 +139,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
        unsigned int frontbuffer_bits;
        int pin_count = 0;
 
-       seq_printf(m, "%pK: %c%c%c%c%c %8zdKiB %02x %02x %s%s%s",
+       seq_printf(m, "%pK: %c%c%c%c %8zdKiB %02x %02x %s%s%s",
                   &obj->base,
-                  get_active_flag(obj),
                   get_pin_flag(obj),
                   get_tiling_flag(obj),
                   get_global_flag(obj),
index 25a3e4d09a2ffe9c6589999fd9e10ca6b4981db9..b17f239912534e021bcddbe7341433544a8a2937 100644 (file)
@@ -94,34 +94,26 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool,
        list = &pool->cache_list[n];
 
        list_for_each_entry(obj, list, batch_pool_link) {
+               struct reservation_object *resv = obj->base.resv;
+
                /* The batches are strictly LRU ordered */
-               if (i915_gem_object_is_active(obj)) {
-                       struct reservation_object *resv = obj->base.resv;
-
-                       if (!reservation_object_test_signaled_rcu(resv, true))
-                               break;
-
-                       i915_retire_requests(pool->engine->i915);
-                       GEM_BUG_ON(i915_gem_object_is_active(obj));
-
-                       /*
-                        * The object is now idle, clear the array of shared
-                        * fences before we add a new request. Although, we
-                        * remain on the same engine, we may be on a different
-                        * timeline and so may continually grow the array,
-                        * trapping a reference to all the old fences, rather
-                        * than replace the existing fence.
-                        */
-                       if (rcu_access_pointer(resv->fence)) {
-                               reservation_object_lock(resv, NULL);
-                               reservation_object_add_excl_fence(resv, NULL);
-                               reservation_object_unlock(resv);
-                       }
+               if (!reservation_object_test_signaled_rcu(resv, true))
+                       break;
+
+               /*
+                * The object is now idle, clear the array of shared
+                * fences before we add a new request. Although, we
+                * remain on the same engine, we may be on a different
+                * timeline and so may continually grow the array,
+                * trapping a reference to all the old fences, rather
+                * than replace the existing fence.
+                */
+               if (rcu_access_pointer(resv->fence)) {
+                       reservation_object_lock(resv, NULL);
+                       reservation_object_add_excl_fence(resv, NULL);
+                       reservation_object_unlock(resv);
                }
 
-               GEM_BUG_ON(!reservation_object_test_signaled_rcu(obj->base.resv,
-                                                                true));
-
                if (obj->base.size >= size)
                        goto found;
        }
index 503f1180af12fe16bcbd4ef0dbfeeb76269023eb..c13b86e6ef1f97387b98695d27e63b6f021a6695 100644 (file)
@@ -78,43 +78,11 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 
 #endif
 
-static void obj_bump_mru(struct drm_i915_gem_object *obj)
-{
-       struct drm_i915_private *i915 = to_i915(obj->base.dev);
-       unsigned long flags;
-
-       spin_lock_irqsave(&i915->mm.obj_lock, flags);
-       list_move_tail(&obj->mm.link, &i915->mm.shrink_list);
-       spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
-
-       obj->mm.dirty = true; /* be paranoid  */
-}
-
 static void __i915_vma_retire(struct i915_active *ref)
 {
        struct i915_vma *vma = container_of(ref, typeof(*vma), active);
-       struct drm_i915_gem_object *obj = vma->obj;
-
-       GEM_BUG_ON(!i915_gem_object_is_active(obj));
-       if (--obj->active_count)
-               return;
-
-       /* Prune the shared fence arrays iff completely idle (inc. external) */
-       if (reservation_object_trylock(obj->base.resv)) {
-               if (reservation_object_test_signaled_rcu(obj->base.resv, true))
-                       reservation_object_add_excl_fence(obj->base.resv, NULL);
-               reservation_object_unlock(obj->base.resv);
-       }
 
-       /*
-        * Bump our place on the bound list to keep it roughly in LRU order
-        * so that we don't steal from recently used but inactive objects
-        * (unless we are forced to ofc!)
-        */
-       if (i915_gem_object_is_shrinkable(obj))
-               obj_bump_mru(obj);
-
-       i915_gem_object_put(obj); /* and drop the active reference */
+       i915_vma_put(vma);
 }
 
 static struct i915_vma *
@@ -922,6 +890,7 @@ int i915_vma_move_to_active(struct i915_vma *vma,
                            unsigned int flags)
 {
        struct drm_i915_gem_object *obj = vma->obj;
+       int err;
 
        assert_vma_held(vma);
        assert_object_held(obj);
@@ -935,17 +904,13 @@ int i915_vma_move_to_active(struct i915_vma *vma,
         * add the active reference first and queue for it to be dropped
         * *last*.
         */
-       if (!vma->active.count && !obj->active_count++)
-               i915_gem_object_get(obj); /* once more for the active ref */
-
-       if (unlikely(i915_active_ref(&vma->active, rq->fence.context, rq))) {
-               if (!vma->active.count && !--obj->active_count)
-                       i915_gem_object_put(obj);
-               return -ENOMEM;
-       }
+       if (i915_active_acquire(&vma->active))
+               i915_vma_get(vma);
 
-       GEM_BUG_ON(!i915_vma_is_active(vma));
-       GEM_BUG_ON(!obj->active_count);
+       err = i915_active_ref(&vma->active, rq->fence.context, rq);
+       i915_active_release(&vma->active);
+       if (unlikely(err))
+               return err;
 
        obj->write_domain = 0;
        if (flags & EXEC_OBJECT_WRITE) {
@@ -957,11 +922,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
                obj->read_domains = 0;
        }
        obj->read_domains |= I915_GEM_GPU_DOMAINS;
+       obj->mm.dirty = true;
 
        if (flags & EXEC_OBJECT_NEEDS_FENCE)
                __i915_active_request_set(&vma->last_fence, rq);
 
        export_fence(vma, rq, flags);
+
+       GEM_BUG_ON(!i915_vma_is_active(vma));
        return 0;
 }