{
int last_prio;
- if (!intel_engine_has_preemption(engine))
+ if (!engine->preempt_context)
return false;
if (i915_request_completed(rq))
{
u32 *cs;
- cs = intel_ring_begin(rq, 6);
+ cs = intel_ring_begin(rq, 4);
if (IS_ERR(cs))
return PTR_ERR(cs);
* particular all the gen that do not need the w/a at all!), if we
* took care to make sure that on every switch into this context
* (both ordinary and for preemption) that arbitrartion was enabled
- * we would be fine. However, there doesn't seem to be a downside to
- * being paranoid and making sure it is set before each batch and
- * every context-switch.
- *
- * Note that if we fail to enable arbitration before the request
- * is complete, then we do not see the context-switch interrupt and
- * the engine hangs (with RING_HEAD == RING_TAIL).
- *
- * That satisfies both the GPGPU w/a and our heavy-handed paranoia.
+ * we would be fine. However, for gen8 there is another w/a that
+ * requires us to not preempt inside GPGPU execution, so we keep
+ * arbitration disabled for gen8 batches. Arbitration will be
+ * re-enabled before we close the request
+ * (engine->emit_fini_breadcrumb).
*/
+ *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+
+ /* FIXME(BDW+): Address space and security selectors. */
+ *cs++ = MI_BATCH_BUFFER_START_GEN8 |
+ (flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
+ *cs++ = lower_32_bits(offset);
+ *cs++ = upper_32_bits(offset);
+
+ intel_ring_advance(rq, cs);
+
+ return 0;
+}
+
+static int gen9_emit_bb_start(struct i915_request *rq,
+ u64 offset, u32 len,
+ const unsigned int flags)
+{
+ u32 *cs;
+
+ cs = intel_ring_begin(rq, 6);
+ if (IS_ERR(cs))
+ return PTR_ERR(cs);
+
*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- /* FIXME(BDW): Address space and security selectors. */
*cs++ = MI_BATCH_BUFFER_START_GEN8 |
(flags & I915_DISPATCH_SECURE ? 0 : BIT(8));
*cs++ = lower_32_bits(offset);
engine->flags |= I915_ENGINE_SUPPORTS_STATS;
if (!intel_vgpu_active(engine->i915))
engine->flags |= I915_ENGINE_HAS_SEMAPHORES;
- if (engine->preempt_context)
+ if (engine->preempt_context &&
+ HAS_LOGICAL_RING_PREEMPTION(engine->i915))
engine->flags |= I915_ENGINE_HAS_PREEMPTION;
}
* until a more refined solution exists.
*/
}
- engine->emit_bb_start = gen8_emit_bb_start;
+ if (IS_GEN(engine->i915, 8))
+ engine->emit_bb_start = gen8_emit_bb_start;
+ else
+ engine->emit_bb_start = gen9_emit_bb_start;
}
static inline void
return err;
}
+static int live_busywait_preempt(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct i915_gem_context *ctx_hi, *ctx_lo;
+ struct intel_engine_cs *engine;
+ struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ enum intel_engine_id id;
+ intel_wakeref_t wakeref;
+ int err = -ENOMEM;
+ u32 *map;
+
+ /*
+ * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can
+ * preempt the busywaits used to synchronise between rings.
+ */
+
+ mutex_lock(&i915->drm.struct_mutex);
+ wakeref = intel_runtime_pm_get(i915);
+
+ ctx_hi = kernel_context(i915);
+ if (!ctx_hi)
+ goto err_unlock;
+ ctx_hi->sched.priority = INT_MAX;
+
+ ctx_lo = kernel_context(i915);
+ if (!ctx_lo)
+ goto err_ctx_hi;
+ ctx_lo->sched.priority = INT_MIN;
+
+ obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+ if (IS_ERR(obj)) {
+ err = PTR_ERR(obj);
+ goto err_ctx_lo;
+ }
+
+ map = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto err_obj;
+ }
+
+ vma = i915_vma_instance(obj, &i915->ggtt.vm, 0);
+ if (IS_ERR(vma)) {
+ err = PTR_ERR(vma);
+ goto err_map;
+ }
+
+ err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ goto err_map;
+
+ for_each_engine(engine, i915, id) {
+ struct i915_request *lo, *hi;
+ struct igt_live_test t;
+ u32 *cs;
+
+ if (!intel_engine_can_store_dword(engine))
+ continue;
+
+ if (igt_live_test_begin(&t, i915, __func__, engine->name)) {
+ err = -EIO;
+ goto err_vma;
+ }
+
+ /*
+ * We create two requests. The low priority request
+ * busywaits on a semaphore (inside the ringbuffer where
+ * is should be preemptible) and the high priority requests
+ * uses a MI_STORE_DWORD_IMM to update the semaphore value
+ * allowing the first request to complete. If preemption
+ * fails, we hang instead.
+ */
+
+ lo = i915_request_alloc(engine, ctx_lo);
+ if (IS_ERR(lo)) {
+ err = PTR_ERR(lo);
+ goto err_vma;
+ }
+
+ cs = intel_ring_begin(lo, 8);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(lo);
+ goto err_vma;
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+ *cs++ = 1;
+
+ /* XXX Do we need a flush + invalidate here? */
+
+ *cs++ = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_GLOBAL_GTT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+
+ intel_ring_advance(lo, cs);
+ i915_request_add(lo);
+
+ if (wait_for(READ_ONCE(*map), 10)) {
+ err = -ETIMEDOUT;
+ goto err_vma;
+ }
+
+ /* Low priority request should be busywaiting now */
+ if (i915_request_wait(lo, I915_WAIT_LOCKED, 1) != -ETIME) {
+ pr_err("%s: Busywaiting request did not!\n",
+ engine->name);
+ err = -EIO;
+ goto err_vma;
+ }
+
+ hi = i915_request_alloc(engine, ctx_hi);
+ if (IS_ERR(hi)) {
+ err = PTR_ERR(hi);
+ goto err_vma;
+ }
+
+ cs = intel_ring_begin(hi, 4);
+ if (IS_ERR(cs)) {
+ err = PTR_ERR(cs);
+ i915_request_add(hi);
+ goto err_vma;
+ }
+
+ *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+ *cs++ = i915_ggtt_offset(vma);
+ *cs++ = 0;
+ *cs++ = 0;
+
+ intel_ring_advance(hi, cs);
+ i915_request_add(hi);
+
+ if (i915_request_wait(lo, I915_WAIT_LOCKED, HZ / 5) < 0) {
+ struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+ pr_err("%s: Failed to preempt semaphore busywait!\n",
+ engine->name);
+
+ intel_engine_dump(engine, &p, "%s\n", engine->name);
+ GEM_TRACE_DUMP();
+
+ i915_gem_set_wedged(i915);
+ err = -EIO;
+ goto err_vma;
+ }
+ GEM_BUG_ON(READ_ONCE(*map));
+
+ if (igt_live_test_end(&t)) {
+ err = -EIO;
+ goto err_vma;
+ }
+ }
+
+ err = 0;
+err_vma:
+ i915_vma_unpin(vma);
+err_map:
+ i915_gem_object_unpin_map(obj);
+err_obj:
+ i915_gem_object_put(obj);
+err_ctx_lo:
+ kernel_context_close(ctx_lo);
+err_ctx_hi:
+ kernel_context_close(ctx_hi);
+err_unlock:
+ if (igt_flush_test(i915, I915_WAIT_LOCKED))
+ err = -EIO;
+ intel_runtime_pm_put(i915, wakeref);
+ mutex_unlock(&i915->drm.struct_mutex);
+ return err;
+}
+
static int live_preempt(void *arg)
{
struct drm_i915_private *i915 = arg;
{
static const struct i915_subtest tests[] = {
SUBTEST(live_sanitycheck),
+ SUBTEST(live_busywait_preempt),
SUBTEST(live_preempt),
SUBTEST(live_late_preempt),
SUBTEST(live_suppress_self_preempt),