perf intel-pt: Accumulate cycle count from CYC packets
authorAdrian Hunter <adrian.hunter@intel.com>
Mon, 20 May 2019 11:37:11 +0000 (14:37 +0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 5 Jun 2019 12:47:54 +0000 (09:47 -0300)
In preparation for providing instructions-per-cycle (IPC) information,
accumulate cycle count from CYC packets.

Although CYC packets are optional (requires config term 'cyc' to enable
cycle-accurate mode when recording), the simplest way to count cycles is
with CYC packets.

The first complication is that cycles must be counted only when also
counting instructions.

That means when control flow packet generation is enabled i.e. between
TIP.PGE and TIP.PGD packets.

Also, sampling the cycle count follows the same rules as sampling the
timestamp, that is, not before the instruction to which the decoder is
walking is reached.

In addition, the cycle count is not accurate for any but the first
branch of a TNT packet.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20190520113728.14389-6-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h

index 1ab4070b5633eca443a5b01e899a5498c6a335a8..ef3a1c1cd25094a8cb5ee0124552788fec72217c 100644 (file)
@@ -160,6 +160,8 @@ struct intel_pt_decoder {
        uint64_t period_mask;
        uint64_t period_ticks;
        uint64_t last_masked_timestamp;
+       uint64_t tot_cyc_cnt;
+       uint64_t sample_tot_cyc_cnt;
        bool continuous_period;
        bool overflow;
        bool set_fup_tx_flags;
@@ -167,6 +169,7 @@ struct intel_pt_decoder {
        bool set_fup_mwait;
        bool set_fup_pwre;
        bool set_fup_exstop;
+       bool sample_cyc;
        unsigned int fup_tx_flags;
        unsigned int tx_flags;
        uint64_t fup_ptw_payload;
@@ -1323,6 +1326,7 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
                                decoder->ip += intel_pt_insn.length;
                                return 0;
                        }
+                       decoder->sample_cyc = false;
                        decoder->ip += intel_pt_insn.length;
                        if (!decoder->tnt.count) {
                                intel_pt_update_sample_time(decoder);
@@ -1515,6 +1519,9 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
        decoder->have_cyc = true;
 
        decoder->cycle_cnt += decoder->packet.payload;
+       if (decoder->pge)
+               decoder->tot_cyc_cnt += decoder->packet.payload;
+       decoder->sample_cyc = true;
 
        if (!decoder->cyc_ref_timestamp)
                return;
@@ -2419,6 +2426,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
                decoder->state.err = intel_pt_ext_err(err);
                decoder->state.from_ip = decoder->ip;
                intel_pt_update_sample_time(decoder);
+               decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
        } else {
                decoder->state.err = 0;
                if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
@@ -2426,14 +2434,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
                        decoder->state.type |= INTEL_PT_CBR_CHG;
                        decoder->state.cbr_payload = decoder->cbr_payload;
                }
-               if (intel_pt_sample_time(decoder->pkt_state))
+               if (intel_pt_sample_time(decoder->pkt_state)) {
                        intel_pt_update_sample_time(decoder);
+                       if (decoder->sample_cyc)
+                               decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt;
+               }
        }
 
        decoder->state.timestamp = decoder->sample_timestamp;
        decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
        decoder->state.cr3 = decoder->cr3;
        decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+       decoder->state.tot_cyc_cnt = decoder->sample_tot_cyc_cnt;
 
        return &decoder->state;
 }
index ed088d4726ba6186176cc1213d49ba14736aeefb..6a61773dc44b9ca006ca2fabb5d17424725ddffe 100644 (file)
@@ -77,6 +77,7 @@ struct intel_pt_state {
        uint64_t to_ip;
        uint64_t cr3;
        uint64_t tot_insn_cnt;
+       uint64_t tot_cyc_cnt;
        uint64_t timestamp;
        uint64_t est_timestamp;
        uint64_t trace_nr;