drm/i915/gvt: keep oa config in shadow ctx
authorMin He <min.he@intel.com>
Fri, 2 Mar 2018 02:00:25 +0000 (10:00 +0800)
committerZhi Wang <zhi.a.wang@intel.com>
Fri, 9 Mar 2018 13:53:18 +0000 (21:53 +0800)
When populating shadow ctx from guest, we should handle oa related
registers in hw ctx, so that they will not be overlapped by guest oa
configs. This patch made it possible to capture oa data from host for
both host and guests.

Signed-off-by: Min He <min.he@intel.com>
Signed-off-by: Zhi Wang <zhi.a.wang@intel.com>
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/gvt/scheduler.h

index b55b3580ca1dd00402374354f3d12918817de378..8caf72c1e79425893160a2eecca0cd04e50d1b4e 100644 (file)
@@ -52,6 +52,54 @@ static void set_context_pdp_root_pointer(
                pdp_pair[i].val = pdp[7 - i];
 }
 
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+               u32 *reg_state, bool save)
+{
+       struct drm_i915_private *dev_priv = workload->vgpu->gvt->dev_priv;
+       u32 ctx_oactxctrl = dev_priv->perf.oa.ctx_oactxctrl_offset;
+       u32 ctx_flexeu0 = dev_priv->perf.oa.ctx_flexeu0_offset;
+       int i = 0;
+       u32 flex_mmio[] = {
+               i915_mmio_reg_offset(EU_PERF_CNTL0),
+               i915_mmio_reg_offset(EU_PERF_CNTL1),
+               i915_mmio_reg_offset(EU_PERF_CNTL2),
+               i915_mmio_reg_offset(EU_PERF_CNTL3),
+               i915_mmio_reg_offset(EU_PERF_CNTL4),
+               i915_mmio_reg_offset(EU_PERF_CNTL5),
+               i915_mmio_reg_offset(EU_PERF_CNTL6),
+       };
+
+       if (!workload || !reg_state || workload->ring_id != RCS)
+               return;
+
+       if (save) {
+               workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+
+                       workload->flex_mmio[i] = reg_state[state_offset + 1];
+               }
+       } else {
+               reg_state[ctx_oactxctrl] =
+                       i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+               reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+               for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+                       u32 state_offset = ctx_flexeu0 + i * 2;
+                       u32 mmio = flex_mmio[i];
+
+                       reg_state[state_offset] = mmio;
+                       reg_state[state_offset + 1] = workload->flex_mmio[i];
+               }
+       }
+}
+
 static int populate_shadow_context(struct intel_vgpu_workload *workload)
 {
        struct intel_vgpu *vgpu = workload->vgpu;
@@ -98,6 +146,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
        page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
        shadow_ring_context = kmap(page);
 
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
 #define COPY_REG(name) \
        intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
                + RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
@@ -122,6 +171,7 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
                        sizeof(*shadow_ring_context),
                        I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
 
+       sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
        kunmap(page);
        return 0;
 }
index ff175a98b19ed41c6cdd8405a8872d4bdb6003dd..2603336b7c6d761209160b6724dbf93bbc8a5a02 100644 (file)
@@ -110,6 +110,10 @@ struct intel_vgpu_workload {
        /* shadow batch buffer */
        struct list_head shadow_bb;
        struct intel_shadow_wa_ctx wa_ctx;
+
+       /* oa registers */
+       u32 oactxctrl;
+       u32 flex_mmio[7];
 };
 
 struct intel_vgpu_shadow_bb {