perf stat: Use affinity for reading
authorAndi Kleen <ak@linux.intel.com>
Thu, 21 Nov 2019 00:15:20 +0000 (16:15 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 29 Nov 2019 15:20:45 +0000 (12:20 -0300)
Restructure event reading to use affinity to minimize the number of IPIs
needed.

Before on a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
    3.16    0.106079           4     22082           read

After:

    3.43    0.081295           3     22082           read

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-11-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-stat.c
tools/perf/util/evsel.h

index cf8516e701e2c8e01d49e354ef5d632336238a8c..a098c2ebf4eac474c69b554ac40a279fbfc8afa7 100644 (file)
@@ -266,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
  * Read out the results of a single counter:
  * do not aggregate counts across CPUs in system-wide mode
  */
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
 {
        int nthreads = perf_thread_map__nr(evsel_list->core.threads);
-       int ncpus, cpu, thread;
-
-       if (target__has_cpu(&target) && !target__has_per_thread(&target))
-               ncpus = perf_evsel__nr_cpus(counter);
-       else
-               ncpus = 1;
+       int thread;
 
        if (!counter->supported)
                return -ENOENT;
@@ -283,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
                nthreads = 1;
 
        for (thread = 0; thread < nthreads; thread++) {
-               for (cpu = 0; cpu < ncpus; cpu++) {
-                       struct perf_counts_values *count;
-
-                       count = perf_counts(counter->counts, cpu, thread);
-
-                       /*
-                        * The leader's group read loads data into its group members
-                        * (via perf_evsel__read_counter) and sets threir count->loaded.
-                        */
-                       if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
-                           read_single_counter(counter, cpu, thread, rs)) {
-                               counter->counts->scaled = -1;
-                               perf_counts(counter->counts, cpu, thread)->ena = 0;
-                               perf_counts(counter->counts, cpu, thread)->run = 0;
-                               return -1;
-                       }
+               struct perf_counts_values *count;
 
-                       perf_counts__set_loaded(counter->counts, cpu, thread, false);
+               count = perf_counts(counter->counts, cpu, thread);
 
-                       if (STAT_RECORD) {
-                               if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
-                                       pr_err("failed to write stat event\n");
-                                       return -1;
-                               }
-                       }
+               /*
+                * The leader's group read loads data into its group members
+                * (via perf_evsel__read_counter()) and sets their count->loaded.
+                */
+               if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+                   read_single_counter(counter, cpu, thread, rs)) {
+                       counter->counts->scaled = -1;
+                       perf_counts(counter->counts, cpu, thread)->ena = 0;
+                       perf_counts(counter->counts, cpu, thread)->run = 0;
+                       return -1;
+               }
+
+               perf_counts__set_loaded(counter->counts, cpu, thread, false);
 
-                       if (verbose > 1) {
-                               fprintf(stat_config.output,
-                                       "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
-                                               perf_evsel__name(counter),
-                                               cpu,
-                                               count->val, count->ena, count->run);
+               if (STAT_RECORD) {
+                       if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+                               pr_err("failed to write stat event\n");
+                               return -1;
                        }
                }
+
+               if (verbose > 1) {
+                       fprintf(stat_config.output,
+                               "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+                                       perf_evsel__name(counter),
+                                       cpu,
+                                       count->val, count->ena, count->run);
+               }
        }
 
        return 0;
@@ -325,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
 static void read_counters(struct timespec *rs)
 {
        struct evsel *counter;
-       int ret;
+       struct affinity affinity;
+       int i, ncpus, cpu;
+
+       if (affinity__setup(&affinity) < 0)
+               return;
+
+       ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+       if (!target__has_cpu(&target) || target__has_per_thread(&target))
+               ncpus = 1;
+       evlist__for_each_cpu(evsel_list, i, cpu) {
+               if (i >= ncpus)
+                       break;
+               affinity__set(&affinity, cpu);
+
+               evlist__for_each_entry(evsel_list, counter) {
+                       if (evsel__cpu_iter_skip(counter, cpu))
+                               continue;
+                       if (!counter->err) {
+                               counter->err = read_counter_cpu(counter, rs,
+                                                               counter->cpu_iter - 1);
+                       }
+               }
+       }
+       affinity__cleanup(&affinity);
 
        evlist__for_each_entry(evsel_list, counter) {
-               ret = read_counter(counter, rs);
-               if (ret)
+               if (counter->err)
                        pr_debug("failed to read counter %s\n", counter->name);
-
-               if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+               if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
                        pr_warning("failed to process counter %s\n", counter->name);
+               counter->err = 0;
        }
 }
 
index ca82a93960cd7bc83f87c3c7b6fe820d8839b91a..c8af4bc23f8ff39151ff7e4acf99d2581c569f74 100644 (file)
@@ -86,6 +86,7 @@ struct evsel {
        struct list_head        config_terms;
        struct bpf_object       *bpf_obj;
        int                     bpf_fd;
+       int                     err;
        bool                    auto_merge_stats;
        bool                    merged_stat;
        const char *            metric_expr;