From: Chris Wilson Date: Mon, 1 Oct 2018 14:47:54 +0000 (+0100) Subject: drm/i915: Pull scheduling under standalone lock X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=e2f3496e93be3238de2e2e6bfc83b3a83c084ce5;p=openwrt%2Fstaging%2Fblogic.git drm/i915: Pull scheduling under standalone lock Currently, the backend scheduling code abuses struct_mutex into order to have a global lock to manipulate a temporary list (without widespread allocation) and to protect against list modifications. This is an extraneous coupling to struct_mutex and further can not extend beyond the local device. Pull all the code that needs to be under the one true lock into i915_scheduler.c, and make it so. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20181001144755.7978-2-chris@chris-wilson.co.uk --- diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 5794f102f9b8..ef1480c14e4e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -75,6 +75,7 @@ i915-y += i915_cmd_parser.o \ i915_gemfs.o \ i915_query.o \ i915_request.o \ + i915_scheduler.o \ i915_timeline.o \ i915_trace_points.o \ i915_vma.o \ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 56140ca054e8..d73ad490a261 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -111,91 +111,6 @@ i915_request_remove_from_client(struct i915_request *request) spin_unlock(&file_priv->mm.lock); } -static struct i915_dependency * -i915_dependency_alloc(struct drm_i915_private *i915) -{ - return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); -} - -static void -i915_dependency_free(struct drm_i915_private *i915, - struct i915_dependency *dep) -{ - kmem_cache_free(i915->dependencies, dep); -} - -static void -__i915_sched_node_add_dependency(struct i915_sched_node *node, - struct i915_sched_node *signal, - struct i915_dependency *dep, - unsigned long flags) -{ - INIT_LIST_HEAD(&dep->dfs_link); - list_add(&dep->wait_link, &signal->waiters_list); - list_add(&dep->signal_link, &node->signalers_list); - dep->signaler = signal; - dep->flags = flags; -} - -static int -i915_sched_node_add_dependency(struct drm_i915_private *i915, - struct i915_sched_node *node, - struct i915_sched_node *signal) -{ - struct i915_dependency *dep; - - dep = i915_dependency_alloc(i915); - if (!dep) - return -ENOMEM; - - __i915_sched_node_add_dependency(node, signal, dep, - I915_DEPENDENCY_ALLOC); - return 0; -} - -static void -i915_sched_node_fini(struct drm_i915_private *i915, - struct i915_sched_node *node) -{ - struct i915_dependency *dep, *tmp; - - GEM_BUG_ON(!list_empty(&node->link)); - - /* - * Everyone we depended upon (the fences we wait to be signaled) - * should retire before us and remove themselves from our list. - * However, retirement is run independently on each timeline and - * so we may be called out-of-order. - */ - list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { - GEM_BUG_ON(!i915_sched_node_signaled(dep->signaler)); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->wait_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } - - /* Remove ourselves from everyone who depends upon us */ - list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { - GEM_BUG_ON(dep->signaler != node); - GEM_BUG_ON(!list_empty(&dep->dfs_link)); - - list_del(&dep->signal_link); - if (dep->flags & I915_DEPENDENCY_ALLOC) - i915_dependency_free(i915, dep); - } -} - -static void -i915_sched_node_init(struct i915_sched_node *node) -{ - INIT_LIST_HEAD(&node->signalers_list); - INIT_LIST_HEAD(&node->waiters_list); - INIT_LIST_HEAD(&node->link); - node->attr.priority = I915_PRIORITY_INVALID; -} - static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno) { struct intel_engine_cs *engine; diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 7fa94b024968..5f7361e0fca6 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -332,14 +332,6 @@ static inline bool i915_request_completed(const struct i915_request *rq) return __i915_request_completed(rq, seqno); } -static inline bool i915_sched_node_signaled(const struct i915_sched_node *node) -{ - const struct i915_request *rq = - container_of(node, const struct i915_request, sched); - - return i915_request_completed(rq); -} - void i915_retire_requests(struct drm_i915_private *i915); /* diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c new file mode 100644 index 000000000000..de9a2ba7c3bc --- /dev/null +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -0,0 +1,377 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include + +#include "i915_drv.h" +#include "i915_request.h" +#include "i915_scheduler.h" + +static DEFINE_SPINLOCK(schedule_lock); + +static const struct i915_request * +node_to_request(const struct i915_sched_node *node) +{ + return container_of(node, const struct i915_request, sched); +} + +static inline bool node_signaled(const struct i915_sched_node *node) +{ + return i915_request_completed(node_to_request(node)); +} + +void i915_sched_node_init(struct i915_sched_node *node) +{ + INIT_LIST_HEAD(&node->signalers_list); + INIT_LIST_HEAD(&node->waiters_list); + INIT_LIST_HEAD(&node->link); + node->attr.priority = I915_PRIORITY_INVALID; +} + +static struct i915_dependency * +i915_dependency_alloc(struct drm_i915_private *i915) +{ + return kmem_cache_alloc(i915->dependencies, GFP_KERNEL); +} + +static void +i915_dependency_free(struct drm_i915_private *i915, + struct i915_dependency *dep) +{ + kmem_cache_free(i915->dependencies, dep); +} + +bool __i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags) +{ + bool ret = false; + + spin_lock(&schedule_lock); + + if (!node_signaled(signal)) { + INIT_LIST_HEAD(&dep->dfs_link); + list_add(&dep->wait_link, &signal->waiters_list); + list_add(&dep->signal_link, &node->signalers_list); + dep->signaler = signal; + dep->flags = flags; + + ret = true; + } + + spin_unlock(&schedule_lock); + + return ret; +} + +int i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal) +{ + struct i915_dependency *dep; + + dep = i915_dependency_alloc(i915); + if (!dep) + return -ENOMEM; + + if (!__i915_sched_node_add_dependency(node, signal, dep, + I915_DEPENDENCY_ALLOC)) + i915_dependency_free(i915, dep); + + return 0; +} + +void i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node) +{ + struct i915_dependency *dep, *tmp; + + GEM_BUG_ON(!list_empty(&node->link)); + + spin_lock(&schedule_lock); + + /* + * Everyone we depended upon (the fences we wait to be signaled) + * should retire before us and remove themselves from our list. + * However, retirement is run independently on each timeline and + * so we may be called out-of-order. + */ + list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { + GEM_BUG_ON(!node_signaled(dep->signaler)); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->wait_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + /* Remove ourselves from everyone who depends upon us */ + list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { + GEM_BUG_ON(dep->signaler != node); + GEM_BUG_ON(!list_empty(&dep->dfs_link)); + + list_del(&dep->signal_link); + if (dep->flags & I915_DEPENDENCY_ALLOC) + i915_dependency_free(i915, dep); + } + + spin_unlock(&schedule_lock); +} + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static void assert_priolists(struct intel_engine_execlists * const execlists, + long queue_priority) +{ + struct rb_node *rb; + long last_prio, i; + + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; + + GEM_BUG_ON(rb_first_cached(&execlists->queue) != + rb_first(&execlists->queue.rb_root)); + + last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + const struct i915_priolist *p = to_priolist(rb); + + GEM_BUG_ON(p->priority >= last_prio); + last_prio = p->priority; + + GEM_BUG_ON(!p->used); + for (i = 0; i < ARRAY_SIZE(p->requests); i++) { + if (list_empty(&p->requests[i])) + continue; + + GEM_BUG_ON(!(p->used & BIT(i))); + } + } +} + +struct list_head * +i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_priolist *p; + struct rb_node **parent, *rb; + bool first = true; + int idx, i; + + lockdep_assert_held(&engine->timeline.lock); + assert_priolists(execlists, INT_MAX); + + /* buckets sorted from highest [in slot 0] to lowest priority */ + idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; + prio >>= I915_USER_PRIORITY_SHIFT; + if (unlikely(execlists->no_priolist)) + prio = I915_PRIORITY_NORMAL; + +find_priolist: + /* most positive priority is scheduled first, equal priorities fifo */ + rb = NULL; + parent = &execlists->queue.rb_root.rb_node; + while (*parent) { + rb = *parent; + p = to_priolist(rb); + if (prio > p->priority) { + parent = &rb->rb_left; + } else if (prio < p->priority) { + parent = &rb->rb_right; + first = false; + } else { + goto out; + } + } + + if (prio == I915_PRIORITY_NORMAL) { + p = &execlists->default_priolist; + } else { + p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); + /* Convert an allocation failure to a priority bump */ + if (unlikely(!p)) { + prio = I915_PRIORITY_NORMAL; /* recurses just once */ + + /* To maintain ordering with all rendering, after an + * allocation failure we have to disable all scheduling. + * Requests will then be executed in fifo, and schedule + * will ensure that dependencies are emitted in fifo. + * There will be still some reordering with existing + * requests, so if userspace lied about their + * dependencies that reordering may be visible. + */ + execlists->no_priolist = true; + goto find_priolist; + } + } + + p->priority = prio; + for (i = 0; i < ARRAY_SIZE(p->requests); i++) + INIT_LIST_HEAD(&p->requests[i]); + rb_link_node(&p->node, rb, parent); + rb_insert_color_cached(&p->node, &execlists->queue, first); + p->used = 0; + +out: + p->used |= BIT(idx); + return &p->requests[idx]; +} + +static struct intel_engine_cs * +sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) +{ + struct intel_engine_cs *engine = node_to_request(node)->engine; + + GEM_BUG_ON(!locked); + + if (engine != locked) { + spin_unlock(&locked->timeline.lock); + spin_lock(&engine->timeline.lock); + } + + return engine; +} + +void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) +{ + struct list_head *uninitialized_var(pl); + struct intel_engine_cs *engine, *last; + struct i915_dependency *dep, *p; + struct i915_dependency stack; + const int prio = attr->priority; + LIST_HEAD(dfs); + + GEM_BUG_ON(prio == I915_PRIORITY_INVALID); + + if (i915_request_completed(rq)) + return; + + if (prio <= READ_ONCE(rq->sched.attr.priority)) + return; + + /* Needed in order to use the temporary link inside i915_dependency */ + spin_lock(&schedule_lock); + + stack.signaler = &rq->sched; + list_add(&stack.dfs_link, &dfs); + + /* + * Recursively bump all dependent priorities to match the new request. + * + * A naive approach would be to use recursion: + * static void update_priorities(struct i915_sched_node *node, prio) { + * list_for_each_entry(dep, &node->signalers_list, signal_link) + * update_priorities(dep->signal, prio) + * queue_request(node); + * } + * but that may have unlimited recursion depth and so runs a very + * real risk of overunning the kernel stack. Instead, we build + * a flat list of all dependencies starting with the current request. + * As we walk the list of dependencies, we add all of its dependencies + * to the end of the list (this may include an already visited + * request) and continue to walk onwards onto the new dependencies. The + * end result is a topological list of requests in reverse order, the + * last element in the list is the request we must execute first. + */ + list_for_each_entry(dep, &dfs, dfs_link) { + struct i915_sched_node *node = dep->signaler; + + /* + * Within an engine, there can be no cycle, but we may + * refer to the same dependency chain multiple times + * (redundant dependencies are not eliminated) and across + * engines. + */ + list_for_each_entry(p, &node->signalers_list, signal_link) { + GEM_BUG_ON(p == dep); /* no cycles! */ + + if (node_signaled(p->signaler)) + continue; + + GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); + if (prio > READ_ONCE(p->signaler->attr.priority)) + list_move_tail(&p->dfs_link, &dfs); + } + } + + /* + * If we didn't need to bump any existing priorities, and we haven't + * yet submitted this request (i.e. there is no potential race with + * execlists_submit_request()), we can set our own priority and skip + * acquiring the engine locks. + */ + if (rq->sched.attr.priority == I915_PRIORITY_INVALID) { + GEM_BUG_ON(!list_empty(&rq->sched.link)); + rq->sched.attr = *attr; + + if (stack.dfs_link.next == stack.dfs_link.prev) + goto out_unlock; + + __list_del_entry(&stack.dfs_link); + } + + last = NULL; + engine = rq->engine; + spin_lock_irq(&engine->timeline.lock); + + /* Fifo and depth-first replacement ensure our deps execute before us */ + list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { + struct i915_sched_node *node = dep->signaler; + + INIT_LIST_HEAD(&dep->dfs_link); + + engine = sched_lock_engine(node, engine); + + /* Recheck after acquiring the engine->timeline.lock */ + if (prio <= node->attr.priority || node_signaled(node)) + continue; + + node->attr.priority = prio; + if (!list_empty(&node->link)) { + if (last != engine) { + pl = i915_sched_lookup_priolist(engine, prio); + last = engine; + } + list_move_tail(&node->link, pl); + } else { + /* + * If the request is not in the priolist queue because + * it is not yet runnable, then it doesn't contribute + * to our preemption decisions. On the other hand, + * if the request is on the HW, it too is not in the + * queue; but in that case we may still need to reorder + * the inflight requests. + */ + if (!i915_sw_fence_done(&node_to_request(node)->submit)) + continue; + } + + if (prio <= engine->execlists.queue_priority) + continue; + + /* + * If we are already the currently executing context, don't + * bother evaluating if we should preempt ourselves. + */ + if (node_to_request(node)->global_seqno && + i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, + node_to_request(node)->global_seqno)) + continue; + + /* Defer (tasklet) submission until after all of our updates. */ + engine->execlists.queue_priority = prio; + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->timeline.lock); + +out_unlock: + spin_unlock(&schedule_lock); +} diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 53dc7dbf88b9..68d84a45ad7f 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -8,9 +8,14 @@ #define _I915_SCHEDULER_H_ #include +#include #include +struct drm_i915_private; +struct i915_request; +struct intel_engine_cs; + enum { I915_PRIORITY_MIN = I915_CONTEXT_MIN_USER_PRIORITY - 1, I915_PRIORITY_NORMAL = I915_CONTEXT_DEFAULT_PRIORITY, @@ -77,4 +82,24 @@ struct i915_dependency { #define I915_DEPENDENCY_ALLOC BIT(0) }; +void i915_sched_node_init(struct i915_sched_node *node); + +bool __i915_sched_node_add_dependency(struct i915_sched_node *node, + struct i915_sched_node *signal, + struct i915_dependency *dep, + unsigned long flags); + +int i915_sched_node_add_dependency(struct drm_i915_private *i915, + struct i915_sched_node *node, + struct i915_sched_node *signal); + +void i915_sched_node_fini(struct drm_i915_private *i915, + struct i915_sched_node *node); + +void i915_schedule(struct i915_request *request, + const struct i915_sched_attr *attr); + +struct list_head * +i915_sched_lookup_priolist(struct intel_engine_cs *engine, int prio); + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4c5c2b39e65c..650e01f0c197 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13188,13 +13188,12 @@ intel_prepare_plane_fb(struct drm_plane *plane, ret = intel_plane_pin_fb(to_intel_plane_state(new_state)); - fb_obj_bump_render_priority(obj); - mutex_unlock(&dev_priv->drm.struct_mutex); i915_gem_object_unpin_pages(obj); if (ret) return ret; + fb_obj_bump_render_priority(obj); intel_fb_obj_flush(obj, ORIGIN_DIRTYFB); if (!new_state->fence) { /* implicit fencing */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 5f324d6b44d5..28d56387edf5 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -259,102 +259,6 @@ intel_lr_context_descriptor_update(struct i915_gem_context *ctx, ce->lrc_desc = desc; } -static void assert_priolists(struct intel_engine_execlists * const execlists, - long queue_priority) -{ - struct rb_node *rb; - long last_prio, i; - - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - GEM_BUG_ON(rb_first_cached(&execlists->queue) != - rb_first(&execlists->queue.rb_root)); - - last_prio = (queue_priority >> I915_USER_PRIORITY_SHIFT) + 1; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = to_priolist(rb); - - GEM_BUG_ON(p->priority >= last_prio); - last_prio = p->priority; - - GEM_BUG_ON(!p->used); - for (i = 0; i < ARRAY_SIZE(p->requests); i++) { - if (list_empty(&p->requests[i])) - continue; - - GEM_BUG_ON(!(p->used & BIT(i))); - } - } -} - -static struct list_head * -lookup_priolist(struct intel_engine_cs *engine, int prio) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_priolist *p; - struct rb_node **parent, *rb; - bool first = true; - int idx, i; - - assert_priolists(execlists, INT_MAX); - - /* buckets sorted from highest [in slot 0] to lowest priority */ - idx = I915_PRIORITY_COUNT - (prio & I915_PRIORITY_MASK) - 1; - prio >>= I915_USER_PRIORITY_SHIFT; - if (unlikely(execlists->no_priolist)) - prio = I915_PRIORITY_NORMAL; - -find_priolist: - /* most positive priority is scheduled first, equal priorities fifo */ - rb = NULL; - parent = &execlists->queue.rb_root.rb_node; - while (*parent) { - rb = *parent; - p = to_priolist(rb); - if (prio > p->priority) { - parent = &rb->rb_left; - } else if (prio < p->priority) { - parent = &rb->rb_right; - first = false; - } else { - goto out; - } - } - - if (prio == I915_PRIORITY_NORMAL) { - p = &execlists->default_priolist; - } else { - p = kmem_cache_alloc(engine->i915->priorities, GFP_ATOMIC); - /* Convert an allocation failure to a priority bump */ - if (unlikely(!p)) { - prio = I915_PRIORITY_NORMAL; /* recurses just once */ - - /* To maintain ordering with all rendering, after an - * allocation failure we have to disable all scheduling. - * Requests will then be executed in fifo, and schedule - * will ensure that dependencies are emitted in fifo. - * There will be still some reordering with existing - * requests, so if userspace lied about their - * dependencies that reordering may be visible. - */ - execlists->no_priolist = true; - goto find_priolist; - } - } - - p->priority = prio; - for (i = 0; i < ARRAY_SIZE(p->requests); i++) - INIT_LIST_HEAD(&p->requests[i]); - rb_link_node(&p->node, rb, parent); - rb_insert_color_cached(&p->node, &execlists->queue, first); - p->used = 0; - -out: - p->used |= BIT(idx); - return &p->requests[idx]; -} - static void unwind_wa_tail(struct i915_request *rq) { rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES); @@ -381,7 +285,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); if (rq_prio(rq) != prio) { prio = rq_prio(rq); - pl = lookup_priolist(engine, prio); + pl = i915_sched_lookup_priolist(engine, prio); } GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); @@ -398,7 +302,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine) if (!(prio & I915_PRIORITY_NEWCLIENT)) { prio |= I915_PRIORITY_NEWCLIENT; list_move_tail(&active->sched.link, - lookup_priolist(engine, prio)); + i915_sched_lookup_priolist(engine, prio)); } } @@ -792,7 +696,6 @@ done: */ execlists->queue_priority = port != execlists->port ? rq_prio(last) : INT_MIN; - assert_priolists(execlists, execlists->queue_priority); if (submit) { port_assign(port, last); @@ -1119,12 +1022,7 @@ static void queue_request(struct intel_engine_cs *engine, struct i915_sched_node *node, int prio) { - list_add_tail(&node->link, lookup_priolist(engine, prio)); -} - -static void __update_queue(struct intel_engine_cs *engine, int prio) -{ - engine->execlists.queue_priority = prio; + list_add_tail(&node->link, i915_sched_lookup_priolist(engine, prio)); } static void __submit_queue_imm(struct intel_engine_cs *engine) @@ -1143,7 +1041,7 @@ static void __submit_queue_imm(struct intel_engine_cs *engine) static void submit_queue(struct intel_engine_cs *engine, int prio) { if (prio > engine->execlists.queue_priority) { - __update_queue(engine, prio); + engine->execlists.queue_priority = prio; __submit_queue_imm(engine); } } @@ -1166,162 +1064,6 @@ static void execlists_submit_request(struct i915_request *request) spin_unlock_irqrestore(&engine->timeline.lock, flags); } -static struct i915_request *sched_to_request(struct i915_sched_node *node) -{ - return container_of(node, struct i915_request, sched); -} - -static struct intel_engine_cs * -sched_lock_engine(struct i915_sched_node *node, struct intel_engine_cs *locked) -{ - struct intel_engine_cs *engine = sched_to_request(node)->engine; - - GEM_BUG_ON(!locked); - - if (engine != locked) { - spin_unlock(&locked->timeline.lock); - spin_lock(&engine->timeline.lock); - } - - return engine; -} - -static void execlists_schedule(struct i915_request *request, - const struct i915_sched_attr *attr) -{ - struct list_head *uninitialized_var(pl); - struct intel_engine_cs *engine, *last; - struct i915_dependency *dep, *p; - struct i915_dependency stack; - const int prio = attr->priority; - LIST_HEAD(dfs); - - GEM_BUG_ON(prio == I915_PRIORITY_INVALID); - - if (i915_request_completed(request)) - return; - - if (prio <= READ_ONCE(request->sched.attr.priority)) - return; - - /* Need BKL in order to use the temporary link inside i915_dependency */ - lockdep_assert_held(&request->i915->drm.struct_mutex); - - stack.signaler = &request->sched; - list_add(&stack.dfs_link, &dfs); - - /* - * Recursively bump all dependent priorities to match the new request. - * - * A naive approach would be to use recursion: - * static void update_priorities(struct i915_sched_node *node, prio) { - * list_for_each_entry(dep, &node->signalers_list, signal_link) - * update_priorities(dep->signal, prio) - * queue_request(node); - * } - * but that may have unlimited recursion depth and so runs a very - * real risk of overunning the kernel stack. Instead, we build - * a flat list of all dependencies starting with the current request. - * As we walk the list of dependencies, we add all of its dependencies - * to the end of the list (this may include an already visited - * request) and continue to walk onwards onto the new dependencies. The - * end result is a topological list of requests in reverse order, the - * last element in the list is the request we must execute first. - */ - list_for_each_entry(dep, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; - - /* - * Within an engine, there can be no cycle, but we may - * refer to the same dependency chain multiple times - * (redundant dependencies are not eliminated) and across - * engines. - */ - list_for_each_entry(p, &node->signalers_list, signal_link) { - GEM_BUG_ON(p == dep); /* no cycles! */ - - if (i915_sched_node_signaled(p->signaler)) - continue; - - GEM_BUG_ON(p->signaler->attr.priority < node->attr.priority); - if (prio > READ_ONCE(p->signaler->attr.priority)) - list_move_tail(&p->dfs_link, &dfs); - } - } - - /* - * If we didn't need to bump any existing priorities, and we haven't - * yet submitted this request (i.e. there is no potential race with - * execlists_submit_request()), we can set our own priority and skip - * acquiring the engine locks. - */ - if (request->sched.attr.priority == I915_PRIORITY_INVALID) { - GEM_BUG_ON(!list_empty(&request->sched.link)); - request->sched.attr = *attr; - if (stack.dfs_link.next == stack.dfs_link.prev) - return; - __list_del_entry(&stack.dfs_link); - } - - last = NULL; - engine = request->engine; - spin_lock_irq(&engine->timeline.lock); - - /* Fifo and depth-first replacement ensure our deps execute before us */ - list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { - struct i915_sched_node *node = dep->signaler; - - INIT_LIST_HEAD(&dep->dfs_link); - - engine = sched_lock_engine(node, engine); - - /* Recheck after acquiring the engine->timeline.lock */ - if (prio <= node->attr.priority) - continue; - - if (i915_sched_node_signaled(node)) - continue; - - node->attr.priority = prio; - if (!list_empty(&node->link)) { - if (last != engine) { - pl = lookup_priolist(engine, prio); - last = engine; - } - list_move_tail(&node->link, pl); - } else { - /* - * If the request is not in the priolist queue because - * it is not yet runnable, then it doesn't contribute - * to our preemption decisions. On the other hand, - * if the request is on the HW, it too is not in the - * queue; but in that case we may still need to reorder - * the inflight requests. - */ - if (!i915_sw_fence_done(&sched_to_request(node)->submit)) - continue; - } - - if (prio <= engine->execlists.queue_priority) - continue; - - /* - * If we are already the currently executing context, don't - * bother evaluating if we should preempt ourselves. - */ - if (sched_to_request(node)->global_seqno && - i915_seqno_passed(port_request(engine->execlists.port)->global_seqno, - sched_to_request(node)->global_seqno)) - continue; - - /* Defer (tasklet) submission until after all of our updates. */ - __update_queue(engine, prio); - tasklet_hi_schedule(&engine->execlists.tasklet); - } - - spin_unlock_irq(&engine->timeline.lock); -} - static void execlists_context_destroy(struct intel_context *ce) { GEM_BUG_ON(ce->pin_count); @@ -2359,7 +2101,7 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine) { engine->submit_request = execlists_submit_request; engine->cancel_requests = execlists_cancel_requests; - engine->schedule = execlists_schedule; + engine->schedule = i915_schedule; engine->execlists.tasklet.func = execlists_submission_tasklet; engine->reset.prepare = execlists_reset_prepare; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 1534de5bb852..f6ec48a75a69 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -498,11 +498,10 @@ struct intel_engine_cs { */ void (*submit_request)(struct i915_request *rq); - /* Call when the priority on a request has changed and it and its + /* + * Call when the priority on a request has changed and it and its * dependencies may need rescheduling. Note the request itself may * not be ready to run! - * - * Called under the struct_mutex. */ void (*schedule)(struct i915_request *request, const struct i915_sched_attr *attr);