perf report/top: Add option to collapse undesired parts of call graph
authorGreg Price <price@MIT.EDU>
Fri, 7 Dec 2012 05:48:05 +0000 (21:48 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 12 Jul 2013 16:53:55 +0000 (13:53 -0300)
For example, in an application with an expensive function implemented
with deeply nested recursive calls, the default call-graph presentation
is dominated by the different callchains within that function.  By
ignoring these callees, we can collect the callchains leading into the
function and compactly identify what to blame for expensive calls.

For example, in this report the callers of garbage_collect() are
scattered across the tree:

  $ perf report -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      22.03%     ruby  [.] gc_mark
                 --- gc_mark
                    |--59.40%-- mark_keyvalue
                    |          st_foreach
                    |          gc_mark_children
                    |          |--99.75%-- rb_gc_mark
                    |          |          rb_vm_mark
                    |          |          gc_mark_children
                    |          |          gc_marks
                    |          |          |--99.00%-- garbage_collect

If we ignore the callees of garbage_collect(), its callers are coalesced:

  $ perf report --ignore-callees garbage_collect -d ruby 2>- | grep -m10 ^[^#]*[a-z]
      72.92%     ruby  [.] garbage_collect
                 --- garbage_collect
                     vm_xmalloc
                    |--47.08%-- ruby_xmalloc
                    |          st_insert2
                    |          rb_hash_aset
                    |          |--98.45%-- features_index_add
                    |          |          rb_provide_feature
                    |          |          rb_require_safe
                    |          |          vm_call_method

Signed-off-by: Greg Price <price@mit.edu>
Tested-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20130623031720.GW22203@biohazard-cafe.mit.edu
Link: http://lkml.kernel.org/r/20130708115746.GO22203@biohazard-cafe.mit.edu
Cc: Fengguang Wu <fengguang.wu@intel.com>
[ remove spaces at beginning of line, reported by Fengguang Wu ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-report.txt
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-report.c
tools/perf/builtin-top.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/session.c
tools/perf/util/sort.c
tools/perf/util/sort.h

index 66dab7410c1d352f3583f541de5d1160cffa9085..747ff50284b7c84419cc431996d820485bb069e9 100644 (file)
@@ -135,6 +135,11 @@ OPTIONS
 --inverted::
         alias for inverted caller based call graph.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --pretty=<key>::
         Pretty printing style.  key: normal, raw
 
index 7fdd1909e37601c7bfeb074800433bec3e8aa4ca..58d6598a968679fb4c020f9d932443a57f9feee8 100644 (file)
@@ -155,6 +155,11 @@ Default is to monitor all CPUS.
 
        Default: fractal,0.5,callee.
 
+--ignore-callees=<regex>::
+        Ignore callees of the function(s) matching the given regex.
+        This has the effect of collecting the callers of each such
+        function into one place in the call-graph tree.
+
 --percent-limit::
        Do not show entries which have an overhead under that percent.
        (Default: 0).
index ee2ca3eb22dfd148b3e7fa49211cbf3d1e212b87..9a7e54d701cf64c1f01ce27a5a2df9c1e140861a 100644 (file)
@@ -89,7 +89,7 @@ static int perf_report__add_mem_hist_entry(struct perf_tool *tool,
        if ((sort__has_parent || symbol_conf.use_callchain) &&
            sample->callchain) {
                err = machine__resolve_callchain(machine, evsel, al->thread,
-                                                sample, &parent);
+                                                sample, &parent, al);
                if (err)
                        return err;
        }
@@ -180,7 +180,7 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
        if ((sort__has_parent || symbol_conf.use_callchain)
            && sample->callchain) {
                err = machine__resolve_callchain(machine, evsel, al->thread,
-                                                sample, &parent);
+                                                sample, &parent, al);
                if (err)
                        return err;
        }
@@ -254,7 +254,7 @@ static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
 
        if ((sort__has_parent || symbol_conf.use_callchain) && sample->callchain) {
                err = machine__resolve_callchain(machine, evsel, al->thread,
-                                                sample, &parent);
+                                                sample, &parent, al);
                if (err)
                        return err;
        }
@@ -681,6 +681,24 @@ setup:
        return 0;
 }
 
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+                               const char *arg, int unset __maybe_unused)
+{
+       if (arg) {
+               int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+               if (err) {
+                       char buf[BUFSIZ];
+                       regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+                       pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+                       return -1;
+               }
+               have_ignore_callees = 1;
+       }
+
+       return 0;
+}
+
 static int
 parse_branch_mode(const struct option *opt __maybe_unused,
                  const char *str __maybe_unused, int unset)
@@ -771,6 +789,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                     "Default: fractal,0.5,callee", &parse_callchain_opt, callchain_default_opt),
        OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
                    "alias for inverted call graph"),
+       OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+                  "ignore callees of these functions in call graphs",
+                  report_parse_ignore_callees_opt),
        OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
                   "only consider symbols in these dsos"),
        OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
index a237059f51cfb3fc2c224ce3599bbe7d87de7f5c..bbf46357277714a5fcdede5a170a9a05a9ab507b 100644 (file)
@@ -773,8 +773,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
                    sample->callchain) {
                        err = machine__resolve_callchain(machine, evsel,
                                                         al.thread, sample,
-                                                        &parent);
-
+                                                        &parent, &al);
                        if (err)
                                return;
                }
@@ -1109,6 +1108,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
        OPT_CALLBACK_DEFAULT('G', "call-graph", &top.record_opts,
                             "mode[,dump_size]", record_callchain_help,
                             &parse_callchain_opt, "fp"),
+       OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+                  "ignore callees of these functions in call graphs",
+                  report_parse_ignore_callees_opt),
        OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
                    "Show a column with the sum of periods"),
        OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
index 5dd5026a82ef34d4a9bc9c5d03c11e79a9f87a09..f9f9d6381b9a8cf36fb3c23b11cbedf29fb45e9e 100644 (file)
@@ -1058,11 +1058,10 @@ int machine__process_event(struct machine *machine, union perf_event *event)
        return ret;
 }
 
-static bool symbol__match_parent_regex(struct symbol *sym)
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
 {
-       if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0))
+       if (sym->name && !regexec(regex, sym->name, 0, NULL, 0))
                return 1;
-
        return 0;
 }
 
@@ -1159,8 +1158,8 @@ struct branch_info *machine__resolve_bstack(struct machine *machine,
 static int machine__resolve_callchain_sample(struct machine *machine,
                                             struct thread *thread,
                                             struct ip_callchain *chain,
-                                            struct symbol **parent)
-
+                                            struct symbol **parent,
+                                            struct addr_location *root_al)
 {
        u8 cpumode = PERF_RECORD_MISC_USER;
        unsigned int i;
@@ -1211,8 +1210,15 @@ static int machine__resolve_callchain_sample(struct machine *machine,
                                           MAP__FUNCTION, ip, &al, NULL);
                if (al.sym != NULL) {
                        if (sort__has_parent && !*parent &&
-                           symbol__match_parent_regex(al.sym))
+                           symbol__match_regex(al.sym, &parent_regex))
                                *parent = al.sym;
+                       else if (have_ignore_callees && root_al &&
+                         symbol__match_regex(al.sym, &ignore_callees_regex)) {
+                               /* Treat this symbol as the root,
+                                  forgetting its callees. */
+                               *root_al = al;
+                               callchain_cursor_reset(&callchain_cursor);
+                       }
                        if (!symbol_conf.use_callchain)
                                break;
                }
@@ -1237,13 +1243,13 @@ int machine__resolve_callchain(struct machine *machine,
                               struct perf_evsel *evsel,
                               struct thread *thread,
                               struct perf_sample *sample,
-                              struct symbol **parent)
-
+                              struct symbol **parent,
+                              struct addr_location *root_al)
 {
        int ret;
 
        ret = machine__resolve_callchain_sample(machine, thread,
-                                               sample->callchain, parent);
+                                               sample->callchain, parent, root_al);
        if (ret)
                return ret;
 
index e49ba01b7937635ce989e7e96706e99e956d39a0..5bb6244194d5c02d44d18522807fa192f301cb3a 100644 (file)
@@ -5,6 +5,7 @@
 #include <linux/rbtree.h>
 #include "map.h"
 
+struct addr_location;
 struct branch_stack;
 struct perf_evsel;
 struct perf_sample;
@@ -83,7 +84,8 @@ int machine__resolve_callchain(struct machine *machine,
                               struct perf_evsel *evsel,
                               struct thread *thread,
                               struct perf_sample *sample,
-                              struct symbol **parent);
+                              struct symbol **parent,
+                              struct addr_location *root_al);
 
 /*
  * Default guest kernel is defined by parameter --guestkallsyms
index 951a1cfb317c99c0a11739235c145c97b55ea92c..1eb58eedcac1142d65055204ac115c23e7f099ba 100644 (file)
@@ -1406,9 +1406,8 @@ void perf_evsel__print_ip(struct perf_evsel *evsel, union perf_event *event,
 
        if (symbol_conf.use_callchain && sample->callchain) {
 
-
                if (machine__resolve_callchain(machine, evsel, al.thread,
-                                              sample, NULL) != 0) {
+                                              sample, NULL, NULL) != 0) {
                        if (verbose)
                                error("Failed to resolve callchain. Skipping\n");
                        return;
index 8deee19d2e7f150969c9abe00ff10eacd954c3f4..cb2b108635ee44b33528d231920df389f797bbb9 100644 (file)
@@ -7,6 +7,8 @@ const char      default_parent_pattern[] = "^sys_|^do_page_fault";
 const char     *parent_pattern = default_parent_pattern;
 const char     default_sort_order[] = "comm,dso,symbol";
 const char     *sort_order = default_sort_order;
+regex_t                ignore_callees_regex;
+int            have_ignore_callees = 0;
 int            sort__need_collapse = 0;
 int            sort__has_parent = 0;
 int            sort__has_sym = 0;
index 45ac84c1e037595a9a0070142ef68a9f067aa422..a4a6d0b1ea0edc57af935054e8ffd6515df028c8 100644 (file)
@@ -29,6 +29,8 @@ extern const char *sort_order;
 extern const char default_parent_pattern[];
 extern const char *parent_pattern;
 extern const char default_sort_order[];
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
 extern int sort__need_collapse;
 extern int sort__has_parent;
 extern int sort__has_sym;
@@ -183,4 +185,6 @@ int setup_sorting(void);
 extern int sort_dimension__add(const char *);
 void sort__setup_elide(FILE *fp);
 
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
 #endif /* __PERF_SORT_H */