perf/x86: Support outputting XMM registers
authorKan Liang <kan.liang@linux.intel.com>
Tue, 2 Apr 2019 19:44:59 +0000 (12:44 -0700)
committerIngo Molnar <mingo@kernel.org>
Tue, 16 Apr 2019 10:19:36 +0000 (12:19 +0200)
Starting from Icelake, XMM registers can be collected in PEBS record.
But current code only output the pt_regs.

Add a new struct x86_perf_regs for both pt_regs and xmm_regs. The
xmm_regs will be used later to keep a pointer to PEBS record which has
XMM information.

XMM registers are 128 bit. To simplify the code, they are handled like
two different registers, which means setting two bits in the register
bitmap. This also allows only sampling the lower 64bit bits in XMM.

The index of XMM registers starts from 32. There are 16 XMM registers.
So all reserved space for regs are used. Remove REG_RESERVED.

Add PERF_REG_X86_XMM_MAX, which stands for the max number of all x86
regs including both GPRs and XMM.

Add REG_NOSUPPORT for 32bit to exclude unsupported registers.

Previous platforms can not collect XMM information in PEBS record.
Adding pebs_no_xmm_regs to indicate the unsupported platforms.

The common code still validates the supported registers. However, it
cannot check model specific registers, e.g. XMM. Add extra check in
x86_pmu_hw_config() to reject invalid config of regs_user and regs_intr.
The regs_user never supports XMM collection.
The regs_intr only supports XMM collection when sampling PEBS event on
icelake and later platforms.

Originally-by: Andi Kleen <ak@linux.intel.com>
Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: acme@kernel.org
Cc: jolsa@kernel.org
Link: https://lkml.kernel.org/r/20190402194509.2832-3-kan.liang@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/events/core.c
arch/x86/events/intel/ds.c
arch/x86/events/perf_event.h
arch/x86/include/asm/perf_event.h
arch/x86/include/uapi/asm/perf_regs.h
arch/x86/kernel/perf_regs.c

index fdd106267fd230d2f1929a8d1fd1887d0b2883e8..de1a924a49145127b8cde57dca2070e736d69ce6 100644 (file)
@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
                        return -EINVAL;
        }
 
+       /* sample_regs_user never support XMM registers */
+       if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+               return -EINVAL;
+       /*
+        * Besides the general purpose registers, XMM registers may
+        * be collected in PEBS on some platforms, e.g. Icelake
+        */
+       if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
+               if (x86_pmu.pebs_no_xmm_regs)
+                       return -EINVAL;
+
+               if (!event->attr.precise_ip)
+                       return -EINVAL;
+       }
+
        return x86_setup_perfctr(event);
 }
 
index 10c99ce1feaddf5fa196bfbd385cbd02b55ef57a..f57e6cb7fd993c873a0119d3481361e2ba04cff8 100644 (file)
@@ -1628,8 +1628,10 @@ void __init intel_ds_init(void)
        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
        x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
-       if (x86_pmu.version <= 4)
+       if (x86_pmu.version <= 4) {
                x86_pmu.pebs_no_isolation = 1;
+               x86_pmu.pebs_no_xmm_regs = 1;
+       }
        if (x86_pmu.pebs) {
                char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
                int format = x86_pmu.intel_cap.pebs_format;
index 9e474a5f3b86934301012508e93f1fd1d972114e..7abfadb4f2025836b630949643412bab1bc7c794 100644 (file)
@@ -115,6 +115,24 @@ struct amd_nb {
         (1ULL << PERF_REG_X86_R14)   | \
         (1ULL << PERF_REG_X86_R15))
 
+#define PEBS_XMM_REGS                   \
+       ((1ULL << PERF_REG_X86_XMM0)  | \
+        (1ULL << PERF_REG_X86_XMM1)  | \
+        (1ULL << PERF_REG_X86_XMM2)  | \
+        (1ULL << PERF_REG_X86_XMM3)  | \
+        (1ULL << PERF_REG_X86_XMM4)  | \
+        (1ULL << PERF_REG_X86_XMM5)  | \
+        (1ULL << PERF_REG_X86_XMM6)  | \
+        (1ULL << PERF_REG_X86_XMM7)  | \
+        (1ULL << PERF_REG_X86_XMM8)  | \
+        (1ULL << PERF_REG_X86_XMM9)  | \
+        (1ULL << PERF_REG_X86_XMM10) | \
+        (1ULL << PERF_REG_X86_XMM11) | \
+        (1ULL << PERF_REG_X86_XMM12) | \
+        (1ULL << PERF_REG_X86_XMM13) | \
+        (1ULL << PERF_REG_X86_XMM14) | \
+        (1ULL << PERF_REG_X86_XMM15))
+
 /*
  * Per register state.
  */
@@ -612,7 +630,8 @@ struct x86_pmu {
                        pebs_broken             :1,
                        pebs_prec_dist          :1,
                        pebs_no_tlb             :1,
-                       pebs_no_isolation       :1;
+                       pebs_no_isolation       :1,
+                       pebs_no_xmm_regs        :1;
        int             pebs_record_size;
        int             pebs_buffer_size;
        void            (*drain_pebs)(struct pt_regs *regs);
index 8bdf74902293489a031aa300a605447e83b96341..d9f5bbe44b3ccbb8a0846c04c419d13a83bfee3d 100644 (file)
@@ -248,6 +248,11 @@ extern void perf_events_lapic_init(void);
 #define PERF_EFLAGS_VM         (1UL << 5)
 
 struct pt_regs;
+struct x86_perf_regs {
+       struct pt_regs  regs;
+       u64             *xmm_regs;
+};
+
 extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
 extern unsigned long perf_misc_flags(struct pt_regs *regs);
 #define perf_misc_flags(regs)  perf_misc_flags(regs)
index f3329cabce5c6d9e7c605a0fb46f764e2d643141..ac67bbea10cae36848ff0be197c40a3a7af7c0f6 100644 (file)
@@ -27,8 +27,29 @@ enum perf_event_x86_regs {
        PERF_REG_X86_R13,
        PERF_REG_X86_R14,
        PERF_REG_X86_R15,
-
+       /* These are the limits for the GPRs. */
        PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
        PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+       /* These all need two bits set because they are 128bit */
+       PERF_REG_X86_XMM0  = 32,
+       PERF_REG_X86_XMM1  = 34,
+       PERF_REG_X86_XMM2  = 36,
+       PERF_REG_X86_XMM3  = 38,
+       PERF_REG_X86_XMM4  = 40,
+       PERF_REG_X86_XMM5  = 42,
+       PERF_REG_X86_XMM6  = 44,
+       PERF_REG_X86_XMM7  = 46,
+       PERF_REG_X86_XMM8  = 48,
+       PERF_REG_X86_XMM9  = 50,
+       PERF_REG_X86_XMM10 = 52,
+       PERF_REG_X86_XMM11 = 54,
+       PERF_REG_X86_XMM12 = 56,
+       PERF_REG_X86_XMM13 = 58,
+       PERF_REG_X86_XMM14 = 60,
+       PERF_REG_X86_XMM15 = 62,
+
+       /* These include both GPRs and XMMX registers */
+       PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
 };
 #endif /* _ASM_X86_PERF_REGS_H */
index c06c4c16c6b69c0d251505fa4c03a658c5f938a6..07c30ee1742542f15923b6e4ab7020b22bc634ad 100644 (file)
@@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
+       struct x86_perf_regs *perf_regs;
+
+       if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+               perf_regs = container_of(regs, struct x86_perf_regs, regs);
+               if (!perf_regs->xmm_regs)
+                       return 0;
+               return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+       }
+
        if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
                return 0;
 
        return regs_get_register(regs, pt_regs_offset[idx]);
 }
 
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
 #ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+                      (1ULL << PERF_REG_X86_R9) | \
+                      (1ULL << PERF_REG_X86_R10) | \
+                      (1ULL << PERF_REG_X86_R11) | \
+                      (1ULL << PERF_REG_X86_R12) | \
+                      (1ULL << PERF_REG_X86_R13) | \
+                      (1ULL << PERF_REG_X86_R14) | \
+                      (1ULL << PERF_REG_X86_R15))
+
 int perf_reg_validate(u64 mask)
 {
-       if (!mask || mask & REG_RESERVED)
+       if (!mask || (mask & REG_NOSUPPORT))
                return -EINVAL;
 
        return 0;
@@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
 
 int perf_reg_validate(u64 mask)
 {
-       if (!mask || mask & REG_RESERVED)
-               return -EINVAL;
-
-       if (mask & REG_NOSUPPORT)
+       if (!mask || (mask & REG_NOSUPPORT))
                return -EINVAL;
 
        return 0;