From: Arnaldo Carvalho de Melo Date: Wed, 27 Apr 2016 13:16:24 +0000 (-0300) Subject: perf tools: Set the maximum allowed stack from /proc/sys/kernel/perf_event_max_stack X-Git-Url: http://git.lede-project.org./?a=commitdiff_plain;h=4cb93446c587d56e2a54f4f83113daba2c0b6dee;p=openwrt%2Fstaging%2Fblogic.git perf tools: Set the maximum allowed stack from /proc/sys/kernel/perf_event_max_stack There is an upper limit to what tooling considers a valid callchain, and it was tied to the hardcoded value in the kernel, PERF_MAX_STACK_DEPTH (127), now that this can be tuned via a sysctl, make it read it and use that as the upper limit, falling back to PERF_MAX_STACK_DEPTH for kernels where this sysctl isn't present. Cc: Adrian Hunter Cc: Brendan Gregg Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Milian Wolff Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yjqsd30nnkogvj5oyx9ghir9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 496d42cdf02b..ebaf849e30ef 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -248,7 +248,7 @@ OPTIONS Note that when using the --itrace option the synthesized callchain size will override this value if the synthesized callchain size is bigger. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. -G:: --inverted:: diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 4fc44c75263f..a856a1095893 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -267,7 +267,7 @@ include::itrace.txt[] Note that when using the --itrace option the synthesized callchain size will override this value if the synthesized callchain size is bigger. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. --ns:: Use 9 decimal places when displaying time (i.e. show the nanoseconds) diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 19f046f027cd..91d638df3a6b 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -177,7 +177,7 @@ Default is to monitor all CPUS. between information loss and faster processing especially for workloads that can have a very long callchain stack. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. --ignore-callees=:: Ignore callees of the function(s) matching the given regex. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index c075c002eaa4..6afe20121bc0 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -143,7 +143,7 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Implies '--call-graph dwarf' when --call-graph not present on the command line, on systems where DWARF unwinding was built in. - Default: 127 + Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise. --min-stack:: Set the stack depth limit when parsing the callchain, anything diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1d5be0bd426f..8d9b88af901d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -691,7 +691,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .ordered_events = true, .ordering_requires_timestamps = true, }, - .max_stack = PERF_MAX_STACK_DEPTH, + .max_stack = sysctl_perf_event_max_stack, .pretty_printing_style = "normal", .socket_filter = -1, }; @@ -744,7 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_INTEGER(0, "max-stack", &report.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('G', "inverted", &report.inverted_callchain, "alias for inverted call graph"), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index f43b0c6f88f4..efca81679bb3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2031,7 +2031,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_UINTEGER(0, "max-stack", &scripting_max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -2067,6 +2067,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; + scripting_max_stack = sysctl_perf_event_max_stack; + setup_scripting(); argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c130a11d3a0d..da18517b1d40 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1103,7 +1103,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) }, .proc_map_timeout = 500, }, - .max_stack = PERF_MAX_STACK_DEPTH, + .max_stack = sysctl_perf_event_max_stack, .sym_pcnt_filter = 5, }; struct record_opts *opts = &top.record_opts; @@ -1171,7 +1171,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) "Accumulate callchains of children and show total overhead as well"), OPT_INTEGER(0, "max-stack", &top.max_stack, "Set the maximum stack depth when parsing the callchain. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_CALLBACK(0, "ignore-callees", NULL, "regex", "ignore callees of these functions in call graphs", report_parse_ignore_callees_opt), diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 48b00f042599..f4f3389c92c7 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3106,7 +3106,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_UINTEGER(0, "max-stack", &trace.max_stack, "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " - "Default: " __stringify(PERF_MAX_STACK_DEPTH)), + "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_END() @@ -3150,7 +3150,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) mmap_pages_user_set = false; if (trace.max_stack == UINT_MAX) { - trace.max_stack = PERF_MAX_STACK_DEPTH; + trace.max_stack = sysctl_perf_event_max_stack; max_stack_user_set = false; } diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 7b2df2b46525..83ffe7cd7330 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -17,6 +17,7 @@ #include #include "util/bpf-loader.h" #include "util/debug.h" +#include #include #include #include @@ -533,6 +534,7 @@ int main(int argc, const char **argv) { const char *cmd; char sbuf[STRERR_BUFSIZE]; + int value; /* libsubcmd init */ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT); @@ -542,6 +544,9 @@ int main(int argc, const char **argv) page_size = sysconf(_SC_PAGE_SIZE); cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE); + if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0) + sysctl_perf_event_max_stack = value; + cmd = extract_argv0_path(argv[0]); if (!cmd) cmd = "perf-help"; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index ed5aa9eaeb6c..4a2bbff9b1ee 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -101,7 +101,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index b825d24f8186..e846f8c42013 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -81,7 +81,7 @@ static int add_hist_entries(struct perf_evlist *evlist, al.socket = fake_samples[i].socket; if (hist_entry_iter__add(&iter, &al, - PERF_MAX_STACK_DEPTH, NULL) < 0) { + sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; } diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index d3556fbe8c5c..7cd8738e842f 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -67,7 +67,7 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) if (machine__resolve(machine, &al, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack, NULL) < 0) { addr_location__put(&al); goto out; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 656c1d7ee7d4..2cb95bbf9ea6 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1764,7 +1764,7 @@ static int resolve_lbr_callchain_sample(struct thread *thread, */ int mix_chain_nr = i + 1 + lbr_nr + 1; - if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + if (mix_chain_nr > (int)sysctl_perf_event_max_stack + PERF_MAX_BRANCH_DEPTH) { pr_warning("corrupted callchain. skipping...\n"); return 0; } @@ -1825,7 +1825,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. */ - if (chain->nr < PERF_MAX_STACK_DEPTH) + if (chain->nr < sysctl_perf_event_max_stack) skip_idx = arch_skip_callchain_idx(thread, chain); /* @@ -1886,7 +1886,7 @@ static int thread__resolve_callchain_sample(struct thread *thread, } check_calls: - if (chain->nr > PERF_MAX_STACK_DEPTH && (int)chain->nr > max_stack) { + if (chain->nr > sysctl_perf_event_max_stack && (int)chain->nr > max_stack) { pr_warning("corrupted callchain. skipping...\n"); return 0; } diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index ae1cebc307c5..62c7f6988e0e 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -265,7 +265,7 @@ static SV *perl_process_callchain(struct perf_sample *sample, if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel, sample, NULL, NULL, - PERF_MAX_STACK_DEPTH) != 0) { + sysctl_perf_event_max_stack) != 0) { pr_err("Failed to resolve callchain. Skipping\n"); goto exit; } diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9473d46c00bb..619ba2061b62 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -33,6 +33,8 @@ struct callchain_param callchain_param = { unsigned int page_size; int cacheline_size; +unsigned int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH; + bool test_attr__enabled; bool perf_host = true; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 26a924651e7b..88f607af1f47 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -267,6 +267,7 @@ void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; +extern unsigned int sysctl_perf_event_max_stack; struct parse_tag { char tag;