CTX_REG(reg_state, state_offset, flex_regs[i], value);
}
+
+ CTX_REG(reg_state, CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE,
+ gen8_make_rpcs(dev_priv,
+ &to_intel_context(ctx,
+ dev_priv->engine[RCS])->sseu));
}
/*
if (ret)
goto err_lock;
+ stream->ops = &i915_oa_stream_ops;
+ dev_priv->perf.oa.exclusive_stream = stream;
+
ret = dev_priv->perf.oa.ops.enable_metric_set(stream);
if (ret) {
DRM_DEBUG("Unable to enable metric set\n");
goto err_enable;
}
- stream->ops = &i915_oa_stream_ops;
-
- dev_priv->perf.oa.exclusive_stream = stream;
-
mutex_unlock(&dev_priv->drm.struct_mutex);
return 0;
err_enable:
+ dev_priv->perf.oa.exclusive_stream = NULL;
dev_priv->perf.oa.ops.disable_metric_set(dev_priv);
mutex_unlock(&dev_priv->drm.struct_mutex);
return i915_vma_pin(vma, 0, 0, flags);
}
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu);
-
static void
__execlists_update_reg_state(struct intel_engine_cs *engine,
struct intel_context *ce)
/* RPCS */
if (engine->class == RENDER_CLASS)
- regs[CTX_R_PWR_CLK_STATE + 1] = make_rpcs(engine->i915,
- &ce->sseu);
+ regs[CTX_R_PWR_CLK_STATE + 1] = gen8_make_rpcs(engine->i915,
+ &ce->sseu);
}
static struct intel_context *
return logical_ring_init(engine);
}
-static u32
-make_rpcs(struct drm_i915_private *i915, struct intel_sseu *ctx_sseu)
+u32 gen8_make_rpcs(struct drm_i915_private *i915, struct intel_sseu *req_sseu)
{
const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
bool subslice_pg = sseu->has_subslice_pg;
- u8 slices = hweight8(ctx_sseu->slice_mask);
- u8 subslices = hweight8(ctx_sseu->subslice_mask);
+ struct intel_sseu ctx_sseu;
+ u8 slices, subslices;
u32 rpcs = 0;
/*
if (INTEL_GEN(i915) < 9)
return 0;
+ /*
+ * If i915/perf is active, we want a stable powergating configuration
+ * on the system.
+ *
+ * We could choose full enablement, but on ICL we know there are use
+ * cases which disable slices for functional, apart for performance
+ * reasons. So in this case we select a known stable subset.
+ */
+ if (!i915->perf.oa.exclusive_stream) {
+ ctx_sseu = *req_sseu;
+ } else {
+ ctx_sseu = intel_device_default_sseu(i915);
+
+ if (IS_GEN(i915, 11)) {
+ /*
+ * We only need subslice count so it doesn't matter
+ * which ones we select - just turn off low bits in the
+ * amount of half of all available subslices per slice.
+ */
+ ctx_sseu.subslice_mask =
+ ~(~0 << (hweight8(ctx_sseu.subslice_mask) / 2));
+ ctx_sseu.slice_mask = 0x1;
+ }
+ }
+
+ slices = hweight8(ctx_sseu.slice_mask);
+ subslices = hweight8(ctx_sseu.subslice_mask);
+
/*
* Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
* wide and Icelake has up to eight subslices, specfial programming is
if (sseu->has_eu_pg) {
u32 val;
- val = ctx_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
+ val = ctx_sseu.min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
val &= GEN8_RPCS_EU_MIN_MASK;
rpcs |= val;
- val = ctx_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
+ val = ctx_sseu.max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
val &= GEN8_RPCS_EU_MAX_MASK;