x86 cpufreq, perf: Make trace_power_frequency cpufreq driver independent
authorThomas Renninger <trenn@suse.de>
Tue, 20 Jul 2010 23:59:34 +0000 (16:59 -0700)
committerIngo Molnar <mingo@elte.hu>
Thu, 22 Jul 2010 10:08:27 +0000 (12:08 +0200)
and fix the broken case if a core's frequency depends on others.

trace_power_frequency was only implemented in a rather ungeneric
way in acpi-cpufreq driver's target() function only.

-> Move the call to trace_power_frequency to
   cpufreq.c:cpufreq_notify_transition() where CPUFREQ_POSTCHANGE
   notifier is triggered.
   This will support power frequency tracing by all cpufreq
   drivers.

trace_power_frequency did not trace frequency changes correctly
when the userspace governor was used or when CPU cores'
frequency depend on each other.

-> Moving this into the CPUFREQ_POSTCHANGE notifier and pass the cpu
   which gets switched automatically fixes this.

Robert Schoene provided some important fixes on top of my
initial quick shot version which are integrated in this patch:
- Forgot some changes in power_end trace (TP_printk/variable names)
- Variable dummy in power_end must now be cpu_id
- Use static 64 bit variable instead of unsigned int for cpu_id

[akpm@linux-foundation.org: build fix]
Signed-off-by: Thomas Renninger <trenn@suse.de>
Cc: davej@codemonkey.org.uk
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Dave Jones <davej@codemonkey.org.uk>
Acked-by: Arjan van de Ven <arjan@infradead.org>
Cc: Robert Schoene <robert.schoene@tu-dresden.de>
Tested-by: Robert Schoene <robert.schoene@tu-dresden.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
arch/x86/kernel/process.c
drivers/cpufreq/cpufreq.c
drivers/cpuidle/cpuidle.c
drivers/idle/intel_idle.c
include/trace/events/power.h
tools/perf/builtin-timechart.c

index 1d3cddaa40ee66d5730c11e6758a449f680b8cf6..cee5263927c1c2f22ed3c6fcbb6919657ed2f3ce 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/compiler.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
-#include <trace/events/power.h>
 
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -324,8 +323,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
                }
        }
 
-       trace_power_frequency(POWER_PSTATE, data->freq_table[next_state].frequency);
-
        switch (data->cpu_feature) {
        case SYSTEM_INTEL_MSR_CAPABLE:
                cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
index e7e35219b32f23e115c23846d06c07b0128e26ae..787572d43d9c152f178d9c821f5dc672c68849b0 100644 (file)
@@ -371,7 +371,7 @@ static inline int hlt_use_halt(void)
 void default_idle(void)
 {
        if (hlt_use_halt()) {
-               trace_power_start(POWER_CSTATE, 1);
+               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
                current_thread_info()->status &= ~TS_POLLING;
                /*
                 * TS_POLLING-cleared state must be visible before we
@@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  */
 void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 {
-       trace_power_start(POWER_CSTATE, (ax>>4)+1);
+       trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
        if (!need_resched()) {
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
@@ -457,7 +457,7 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
 static void mwait_idle(void)
 {
        if (!need_resched()) {
-               trace_power_start(POWER_CSTATE, 1);
+               trace_power_start(POWER_CSTATE, 1, smp_processor_id());
                if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
                        clflush((void *)&current_thread_info()->flags);
 
@@ -478,7 +478,7 @@ static void mwait_idle(void)
  */
 static void poll_idle(void)
 {
-       trace_power_start(POWER_CSTATE, 0);
+       trace_power_start(POWER_CSTATE, 0, smp_processor_id());
        local_irq_enable();
        while (!need_resched())
                cpu_relax();
index 063b2184caf5506fc1e1a83e1cd6c8dc6f22b6c3..4ed665725cc51080e17a0d2f023e1ebe672edfc1 100644 (file)
@@ -29,6 +29,8 @@
 #include <linux/completion.h>
 #include <linux/mutex.h>
 
+#include <trace/events/power.h>
+
 #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \
                                                "cpufreq-core", msg)
 
@@ -354,6 +356,7 @@ void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state)
 
        case CPUFREQ_POSTCHANGE:
                adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
+                trace_power_frequency(POWER_PSTATE, freqs->new, freqs->cpu);
                srcu_notifier_call_chain(&cpufreq_transition_notifier_list,
                                CPUFREQ_POSTCHANGE, freqs);
                if (likely(policy) && likely(policy->cpu == freqs->cpu))
index 199488576a05c65bac2740d33600fc1c6fd5511f..dbefe15bd582804497cbca76b6f3ce4fae6e28da 100644 (file)
@@ -95,7 +95,7 @@ static void cpuidle_idle_call(void)
        /* give the governor an opportunity to reflect on the outcome */
        if (cpuidle_curr_governor->reflect)
                cpuidle_curr_governor->reflect(dev);
-       trace_power_end(0);
+       trace_power_end(smp_processor_id());
 }
 
 /**
index 54f0fb4cd5d25086701ad23243f97f138a645233..03d202b1ff2764d44e7bebbadc6f9fc80052bfdb 100755 (executable)
@@ -231,7 +231,7 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state)
 
        stop_critical_timings();
 #ifndef MODULE
-       trace_power_start(POWER_CSTATE, (eax >> 4) + 1);
+       trace_power_start(POWER_CSTATE, (eax >> 4) + 1, cpu);
 #endif
        if (!need_resched()) {
 
index c4efe9b8280d4f9c4261c24de5f80010f1f445ce..35a2a6e7bf1e74992b8b83b242c4a507fed4246f 100644 (file)
@@ -18,52 +18,55 @@ enum {
 
 DECLARE_EVENT_CLASS(power,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state),
+       TP_ARGS(type, state, cpu_id),
 
        TP_STRUCT__entry(
                __field(        u64,            type            )
                __field(        u64,            state           )
+               __field(        u64,            cpu_id          )
        ),
 
        TP_fast_assign(
                __entry->type = type;
                __entry->state = state;
+               __entry->cpu_id = cpu_id;
        ),
 
-       TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state)
+       TP_printk("type=%lu state=%lu cpu_id=%lu", (unsigned long)__entry->type,
+               (unsigned long)__entry->state, (unsigned long)__entry->cpu_id)
 );
 
 DEFINE_EVENT(power, power_start,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state)
+       TP_ARGS(type, state, cpu_id)
 );
 
 DEFINE_EVENT(power, power_frequency,
 
-       TP_PROTO(unsigned int type, unsigned int state),
+       TP_PROTO(unsigned int type, unsigned int state, unsigned int cpu_id),
 
-       TP_ARGS(type, state)
+       TP_ARGS(type, state, cpu_id)
 );
 
 TRACE_EVENT(power_end,
 
-       TP_PROTO(int dummy),
+       TP_PROTO(unsigned int cpu_id),
 
-       TP_ARGS(dummy),
+       TP_ARGS(cpu_id),
 
        TP_STRUCT__entry(
-               __field(        u64,            dummy           )
+               __field(        u64,            cpu_id          )
        ),
 
        TP_fast_assign(
-               __entry->dummy = 0xffff;
+               __entry->cpu_id = cpu_id;
        ),
 
-       TP_printk("dummy=%lu", (unsigned long)__entry->dummy)
+       TP_printk("cpu_id=%lu", (unsigned long)__entry->cpu_id)
 
 );
 
index 5a52ed9fc10baf5e0a2080853899847187530f32..5161619d4714d3838579d82d91f543b117a4535a 100644 (file)
@@ -300,8 +300,9 @@ struct trace_entry {
 
 struct power_entry {
        struct trace_entry te;
-       s64     type;
-       s64     value;
+       u64     type;
+       u64     value;
+       u64     cpu_id;
 };
 
 #define TASK_COMM_LEN 16
@@ -498,13 +499,13 @@ static int process_sample_event(event_t *event, struct perf_session *session)
                        return 0;
 
                if (strcmp(event_str, "power:power_start") == 0)
-                       c_state_start(data.cpu, data.time, pe->value);
+                       c_state_start(pe->cpu_id, data.time, pe->value);
 
                if (strcmp(event_str, "power:power_end") == 0)
-                       c_state_end(data.cpu, data.time);
+                       c_state_end(pe->cpu_id, data.time);
 
                if (strcmp(event_str, "power:power_frequency") == 0)
-                       p_state_change(data.cpu, data.time, pe->value);
+                       p_state_change(pe->cpu_id, data.time, pe->value);
 
                if (strcmp(event_str, "sched:sched_wakeup") == 0)
                        sched_wakeup(data.cpu, data.time, data.pid, te);