From: Namhyung Kim Date: Mon, 9 Nov 2015 05:45:37 +0000 (+0900) Subject: perf report: Support folded callchain mode on --stdio X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=26e779245dd6f5270c0696860438e5c03d0780fd;p=openwrt%2Fstaging%2Fblogic.git perf report: Support folded callchain mode on --stdio Add new call chain option (-g) 'folded' to print callchains in a line. The callchains are separated by semicolons, and preceded by (absolute) percent values and a space. For example, the following 20 lines can be printed in 3 lines with the folded output mode: $ perf report -g flat --no-children | grep -v ^# | head -20 60.48% swapper [kernel.vmlinux] [k] intel_idle 54.60% intel_idle cpuidle_enter_state cpuidle_enter call_cpuidle cpu_startup_entry start_secondary 5.88% intel_idle cpuidle_enter_state cpuidle_enter call_cpuidle cpu_startup_entry rest_init start_kernel x86_64_start_reservations x86_64_start_kernel $ perf report -g folded --no-children | grep -v ^# | head -3 60.48% swapper [kernel.vmlinux] [k] intel_idle 54.60% intel_idle;cpuidle_enter_state;cpuidle_enter;call_cpuidle;cpu_startup_entry;start_secondary 5.88% intel_idle;cpuidle_enter_state;cpuidle_enter;call_cpuidle;cpu_startup_entry;rest_init;start_kernel;x86_64_start_reservations;x86_64_start_kernel This mode is supported only for --stdio now and intended to be used by some scripts like in FlameGraphs[1]. Support for other UI might be added later. [1] http://www.brendangregg.com/FlameGraphs/cpuflamegraphs.html Requested-and-Tested-by: Brendan Gregg Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Frederic Weisbecker Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1447047946-1691-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 5ce8da1e1256..f7d81aac9188 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -181,6 +181,7 @@ OPTIONS - graph: use a graph tree, displaying absolute overhead rates. (default) - fractal: like graph, but displays relative rates. Each branch of the tree is considered as a new profiled object. + - folded: call chains are displayed in a line, separated by semicolons - none: disable call chain display. threshold is a percentage value which specifies a minimum percent to be diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c index dfcbc90146ef..ea7984932d9a 100644 --- a/tools/perf/ui/stdio/hist.c +++ b/tools/perf/ui/stdio/hist.c @@ -260,6 +260,58 @@ static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree, return ret; } +static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node) +{ + const char *sep = symbol_conf.field_sep ?: ";"; + struct callchain_list *chain; + size_t ret = 0; + char bf[1024]; + bool first; + + if (!node) + return 0; + + ret += __callchain__fprintf_folded(fp, node->parent); + + first = (ret == 0); + list_for_each_entry(chain, &node->val, list) { + if (chain->ip >= PERF_CONTEXT_MAX) + continue; + ret += fprintf(fp, "%s%s", first ? "" : sep, + callchain_list__sym_name(chain, + bf, sizeof(bf), false)); + first = false; + } + + return ret; +} + +static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree, + u64 total_samples) +{ + size_t ret = 0; + u32 entries_printed = 0; + struct callchain_node *chain; + struct rb_node *rb_node = rb_first(tree); + + while (rb_node) { + double percent; + + chain = rb_entry(rb_node, struct callchain_node, rb_node); + percent = chain->hit * 100.0 / total_samples; + + ret += fprintf(fp, "%.2f%% ", percent); + ret += __callchain__fprintf_folded(fp, chain); + ret += fprintf(fp, "\n"); + if (++entries_printed == callchain_param.print_limit) + break; + + rb_node = rb_next(rb_node); + } + + return ret; +} + static size_t hist_entry_callchain__fprintf(struct hist_entry *he, u64 total_samples, int left_margin, FILE *fp) @@ -278,6 +330,9 @@ static size_t hist_entry_callchain__fprintf(struct hist_entry *he, case CHAIN_FLAT: return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples); break; + case CHAIN_FOLDED: + return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples); + break; case CHAIN_NONE: break; default: diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 735ad48e1858..08cb220ba5ea 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -44,6 +44,10 @@ static int parse_callchain_mode(const char *value) callchain_param.mode = CHAIN_GRAPH_REL; return 0; } + if (!strncmp(value, "folded", strlen(value))) { + callchain_param.mode = CHAIN_FOLDED; + return 0; + } return -1; } @@ -218,6 +222,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain, switch (mode) { case CHAIN_FLAT: + case CHAIN_FOLDED: if (rnode->hit < chain->hit) p = &(*p)->rb_left; else @@ -338,6 +343,7 @@ int callchain_register_param(struct callchain_param *param) param->sort = sort_chain_graph_rel; break; case CHAIN_FLAT: + case CHAIN_FOLDED: param->sort = sort_chain_flat; break; case CHAIN_NONE: diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index fce8161e54db..544d99ac169c 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -24,7 +24,7 @@ #define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP #define CALLCHAIN_REPORT_HELP \ - HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|none)\n" \ + HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \ HELP_PAD "threshold:\tminimum call graph inclusion threshold ()\n" \ HELP_PAD "print_limit:\tmaximum number of call graph entry ()\n" \ HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \ @@ -43,7 +43,8 @@ enum chain_mode { CHAIN_NONE, CHAIN_FLAT, CHAIN_GRAPH_ABS, - CHAIN_GRAPH_REL + CHAIN_GRAPH_REL, + CHAIN_FOLDED, }; enum chain_order {