perf/x86: Use rdpmc() rather than rdmsr() when possible in the kernel
authorVince Weaver <vweaver1@eecs.utk.edu>
Thu, 1 Mar 2012 22:28:14 +0000 (17:28 -0500)
committerIngo Molnar <mingo@kernel.org>
Wed, 6 Jun 2012 15:23:35 +0000 (17:23 +0200)
The rdpmc instruction is faster than the equivelant rdmsr call,
so use it when possible in the kernel.

The perfctr kernel patches did this, after extensive testing showed
rdpmc to always be faster (One can look in etc/costs in the perfctr-2.6
package to see a historical list of the overhead).

I have done some tests on a 3.2 kernel, the kernel module I used
was included in the first posting of this patch:

                   rdmsr           rdpmc
 Core2 T9900:      203.9 cycles     30.9 cycles
 AMD fam0fh:        56.2 cycles      9.8 cycles
 Atom 6/28/2:      129.7 cycles     50.6 cycles

The speedup of using rdpmc is large.

[ It's probably possible (and desirable) to do this without
  requiring a new field in the hw_perf_event structure, but
  the fixed events make this tricky. ]

Signed-off-by: Vince Weaver <vweaver1@eecs.utk.edu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/alpine.DEB.2.00.1203011724030.26934@cl320.eecs.utk.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
arch/x86/kernel/cpu/perf_event.c
include/linux/perf_event.h

index 43c2017347e7f22372e6145ac5799eced6104805..000a4746c7ced5146c64681be7abacef441bc661 100644 (file)
@@ -75,7 +75,7 @@ u64 x86_perf_event_update(struct perf_event *event)
         */
 again:
        prev_raw_count = local64_read(&hwc->prev_count);
-       rdmsrl(hwc->event_base, new_raw_count);
+       rdpmcl(hwc->event_base_rdpmc, new_raw_count);
 
        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
@@ -819,9 +819,11 @@ static inline void x86_assign_hw_event(struct perf_event *event,
        } else if (hwc->idx >= X86_PMC_IDX_FIXED) {
                hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
                hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
+               hwc->event_base_rdpmc = (hwc->idx - X86_PMC_IDX_FIXED) | 1<<30;
        } else {
                hwc->config_base = x86_pmu_config_addr(hwc->idx);
                hwc->event_base  = x86_pmu_event_addr(hwc->idx);
+               hwc->event_base_rdpmc = x86_pmu_addr_offset(hwc->idx);
        }
 }
 
index 45db49f64bb492ddf34d2b451fffa810ea45b965..1ce887abcc5cdff8c8f85080a35c6b2ffbf4831c 100644 (file)
@@ -677,6 +677,7 @@ struct hw_perf_event {
                        u64             last_tag;
                        unsigned long   config_base;
                        unsigned long   event_base;
+                       int             event_base_rdpmc;
                        int             idx;
                        int             last_cpu;