]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
perf stat: Use affinity for opening events
authorAndi Kleen <ak@linux.intel.com>
Thu, 21 Nov 2019 00:15:19 +0000 (16:15 -0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 29 Nov 2019 15:20:45 +0000 (12:20 -0300)
Restructure the event opening in perf stat to cycle through the events
by CPU after setting affinity to that CPU.

This eliminates IPI overhead in the perf API.

We have to loop through the CPU in the outter builtin-stat code instead
of leaving that to low level functions.

It has to change the weak group fallback strategy slightly.  Since we
cannot easily undo the opens for other CPUs move the weak group retry to
a separate loop.

Before with a large test case with 94 CPUs:

  % time     seconds  usecs/call     calls    errors syscall
  ------ ----------- ----------- --------- --------- ----------------
   42.75    4.050910          67     60046       110 perf_event_open

After:

   26.86    0.944396          16     58069       110 perf_event_open

(the number changes slightly because the weak group retries
work differently and the test case relies on weak groups)

Committer notes:

Added one of the hunks in a patch provided by Andi after I noticed that
the "event times" 'perf test' entry was segfaulting.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lore.kernel.org/lkml/20191121001522.180827-10-andi@firstfloor.org
Link: http://lore.kernel.org/lkml/20191127232657.GL84886@tassilo.jf.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-record.c
tools/perf/builtin-stat.c
tools/perf/tests/event-times.c
tools/perf/util/evlist.c
tools/perf/util/evlist.h
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/stat.c
tools/perf/util/stat.h

index b5063d3b6fd077fd8b5b4d35936f44cd6609bae7..fb19ef63cc35fa78ad1a7a2318031ff70a770fe0 100644 (file)
@@ -832,7 +832,7 @@ try_again:
                        if ((errno == EINVAL || errno == EBADF) &&
                            pos->leader != pos &&
                            pos->weak_group) {
-                               pos = perf_evlist__reset_weak_group(evlist, pos);
+                               pos = perf_evlist__reset_weak_group(evlist, pos, true);
                                goto try_again;
                        }
                        rc = -errno;
index 1d9d7161815e57412109ca88bef35b741a7eb6c9..cf8516e701e2c8e01d49e354ef5d632336238a8c 100644 (file)
@@ -65,6 +65,7 @@
 #include "util/target.h"
 #include "util/time-utils.h"
 #include "util/top.h"
+#include "util/affinity.h"
 #include "asm/bug.h"
 
 #include <linux/time64.h>
@@ -440,6 +441,11 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
                        ui__warning("%s event is not supported by the kernel.\n",
                                    perf_evsel__name(counter));
                counter->supported = false;
+               /*
+                * errored is a sticky flag that means one of the counter's
+                * cpu event had a problem and needs to be reexamined.
+                */
+               counter->errored = true;
 
                if ((counter->leader != counter) ||
                    !(counter->leader->core.nr_members > 1))
@@ -484,6 +490,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
        int status = 0;
        const bool forks = (argc > 0);
        bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
+       struct affinity affinity;
+       int i, cpu;
+       bool second_pass = false;
 
        if (interval) {
                ts.tv_sec  = interval / USEC_PER_MSEC;
@@ -508,30 +517,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
        if (group)
                perf_evlist__set_leader(evsel_list);
 
-       evlist__for_each_entry(evsel_list, counter) {
+       if (affinity__setup(&affinity) < 0)
+               return -1;
+
+       evlist__for_each_cpu (evsel_list, i, cpu) {
+               affinity__set(&affinity, cpu);
+
+               evlist__for_each_entry(evsel_list, counter) {
+                       if (evsel__cpu_iter_skip(counter, cpu))
+                               continue;
+                       if (counter->reset_group || counter->errored)
+                               continue;
 try_again:
-               if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
-
-                       /* Weak group failed. Reset the group. */
-                       if ((errno == EINVAL || errno == EBADF) &&
-                           counter->leader != counter &&
-                           counter->weak_group) {
-                               counter = perf_evlist__reset_weak_group(evsel_list, counter);
-                               goto try_again;
+                       if (create_perf_stat_counter(counter, &stat_config, &target,
+                                                    counter->cpu_iter - 1) < 0) {
+
+                               /*
+                                * Weak group failed. We cannot just undo this here
+                                * because earlier CPUs might be in group mode, and the kernel
+                                * doesn't support mixing group and non group reads. Defer
+                                * it to later.
+                                * Don't close here because we're in the wrong affinity.
+                                */
+                               if ((errno == EINVAL || errno == EBADF) &&
+                                   counter->leader != counter &&
+                                   counter->weak_group) {
+                                       perf_evlist__reset_weak_group(evsel_list, counter, false);
+                                       assert(counter->reset_group);
+                                       second_pass = true;
+                                       continue;
+                               }
+
+                               switch (stat_handle_error(counter)) {
+                               case COUNTER_FATAL:
+                                       return -1;
+                               case COUNTER_RETRY:
+                                       goto try_again;
+                               case COUNTER_SKIP:
+                                       continue;
+                               default:
+                                       break;
+                               }
+
                        }
+                       counter->supported = true;
+               }
+       }
 
-                       switch (stat_handle_error(counter)) {
-                       case COUNTER_FATAL:
-                               return -1;
-                       case COUNTER_RETRY:
-                               goto try_again;
-                       case COUNTER_SKIP:
-                               continue;
-                       default:
-                               break;
+       if (second_pass) {
+               /*
+                * Now redo all the weak group after closing them,
+                * and also close errored counters.
+                */
+
+               evlist__for_each_cpu(evsel_list, i, cpu) {
+                       affinity__set(&affinity, cpu);
+                       /* First close errored or weak retry */
+                       evlist__for_each_entry(evsel_list, counter) {
+                               if (!counter->reset_group && !counter->errored)
+                                       continue;
+                               if (evsel__cpu_iter_skip_no_inc(counter, cpu))
+                                       continue;
+                               perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
+                       }
+                       /* Now reopen weak */
+                       evlist__for_each_entry(evsel_list, counter) {
+                               if (!counter->reset_group && !counter->errored)
+                                       continue;
+                               if (evsel__cpu_iter_skip(counter, cpu))
+                                       continue;
+                               if (!counter->reset_group)
+                                       continue;
+try_again_reset:
+                               pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+                               if (create_perf_stat_counter(counter, &stat_config, &target,
+                                                            counter->cpu_iter - 1) < 0) {
+
+                                       switch (stat_handle_error(counter)) {
+                                       case COUNTER_FATAL:
+                                               return -1;
+                                       case COUNTER_RETRY:
+                                               goto try_again_reset;
+                                       case COUNTER_SKIP:
+                                               continue;
+                                       default:
+                                               break;
+                                       }
+                               }
+                               counter->supported = true;
                        }
                }
-               counter->supported = true;
+       }
+       affinity__cleanup(&affinity);
+
+       evlist__for_each_entry(evsel_list, counter) {
+               if (!counter->supported) {
+                       perf_evsel__free_fd(&counter->core);
+                       continue;
+               }
 
                l = strlen(counter->unit);
                if (l > stat_config.unit_width)
index 1ee8704e22849726dd30a0aa74642515fd07f059..1e8a9f5c356dd623226c5fb7dee5e4f30b002b6f 100644 (file)
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
 
        evsel->core.attr.disabled = 1;
 
-       err = perf_evsel__open_per_cpu(evsel, cpus);
+       err = perf_evsel__open_per_cpu(evsel, cpus, -1);
        if (err) {
                if (err == -EACCES)
                        return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
                return -1;
        }
 
-       err = perf_evsel__open_per_cpu(evsel, cpus);
+       err = perf_evsel__open_per_cpu(evsel, cpus, -1);
        if (err == -EACCES)
                return TEST_SKIP;
 
index 2e8d38a324be986034b1d95ff56abae91a770ff6..096a4ea65b1ba97e568880a436107ebc9a05adf8 100644 (file)
@@ -1636,7 +1636,8 @@ void perf_evlist__force_leader(struct evlist *evlist)
 }
 
 struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
-                                                struct evsel *evsel)
+                                                struct evsel *evsel,
+                                               bool close)
 {
        struct evsel *c2, *leader;
        bool is_open = true;
@@ -1653,10 +1654,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
                if (c2 == evsel)
                        is_open = false;
                if (c2->leader == leader) {
-                       if (is_open)
+                       if (is_open && close)
                                perf_evsel__close(&c2->core);
                        c2->leader = c2;
                        c2->core.nr_members = 0;
+                       /*
+                        * Set this for all former members of the group
+                        * to indicate they get reopened.
+                        */
+                       c2->reset_group = true;
                }
        }
        return leader;
index 22e2f58eabead8f52bc86f8be9e9703461405171..f5bd5c386df1138423313860c34ab331d34024a4 100644 (file)
@@ -356,5 +356,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist);
 void perf_evlist__force_leader(struct evlist *evlist);
 
 struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
-                                                struct evsel *evsel);
+                                                struct evsel *evsel,
+                                               bool close);
 #endif /* __PERF_EVLIST_H */
index f4dea055b0808941229374cf6a93d7992888b406..aa180d1df50f7e541912d9222eca1634d5be443f 100644 (file)
@@ -1587,8 +1587,9 @@ static int perf_event_open(struct evsel *evsel,
        return fd;
 }
 
-int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
-               struct perf_thread_map *threads)
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+               struct perf_thread_map *threads,
+               int start_cpu, int end_cpu)
 {
        int cpu, thread, nthreads;
        unsigned long flags = PERF_FLAG_FD_CLOEXEC;
@@ -1665,7 +1666,7 @@ retry_sample_id:
 
        display_attr(&evsel->core.attr);
 
-       for (cpu = 0; cpu < cpus->nr; cpu++) {
+       for (cpu = start_cpu; cpu < end_cpu; cpu++) {
 
                for (thread = 0; thread < nthreads; thread++) {
                        int fd, group_fd;
@@ -1843,6 +1844,12 @@ out_close:
        return err;
 }
 
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+               struct perf_thread_map *threads)
+{
+       return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+}
+
 void evsel__close(struct evsel *evsel)
 {
        perf_evsel__close(&evsel->core);
@@ -1850,9 +1857,14 @@ void evsel__close(struct evsel *evsel)
 }
 
 int perf_evsel__open_per_cpu(struct evsel *evsel,
-                            struct perf_cpu_map *cpus)
+                            struct perf_cpu_map *cpus,
+                            int cpu)
 {
-       return evsel__open(evsel, cpus, NULL);
+       if (cpu == -1)
+               return evsel__open_cpu(evsel, cpus, NULL, 0,
+                                       cpus ? cpus->nr : 1);
+
+       return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
 }
 
 int perf_evsel__open_per_thread(struct evsel *evsel,
index b10d5ba2196622ee4d40a2cc241c82c26e8c3a49..ca82a93960cd7bc83f87c3c7b6fe820d8839b91a 100644 (file)
@@ -94,6 +94,8 @@ struct evsel {
        struct evsel            *metric_leader;
        bool                    collect_stat;
        bool                    weak_group;
+       bool                    reset_group;
+       bool                    errored;
        bool                    percore;
        int                     cpu_iter;
        const char              *pmu_name;
@@ -223,7 +225,8 @@ int evsel__enable(struct evsel *evsel);
 int evsel__disable(struct evsel *evsel);
 
 int perf_evsel__open_per_cpu(struct evsel *evsel,
-                            struct perf_cpu_map *cpus);
+                            struct perf_cpu_map *cpus,
+                            int cpu);
 int perf_evsel__open_per_thread(struct evsel *evsel,
                                struct perf_thread_map *threads);
 int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
index 332cb730785bd3defe9b8e29c8d6080c6551cd68..5f26137b8d6028fa0fc00e772ebacf43ba3e007e 100644 (file)
@@ -464,7 +464,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 
 int create_perf_stat_counter(struct evsel *evsel,
                             struct perf_stat_config *config,
-                            struct target *target)
+                            struct target *target,
+                            int cpu)
 {
        struct perf_event_attr *attr = &evsel->core.attr;
        struct evsel *leader = evsel->leader;
@@ -518,7 +519,7 @@ int create_perf_stat_counter(struct evsel *evsel,
        }
 
        if (target__has_cpu(target) && !target__has_per_thread(target))
-               return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
+               return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
 
        return perf_evsel__open_per_thread(evsel, evsel->core.threads);
 }
index bfa9aaf36ce6fc6027853bc6e6c90e4f2496bdc1..fb990efa54a8a83d8960aa0cedde11301f819ca2 100644 (file)
@@ -214,7 +214,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
 
 int create_perf_stat_counter(struct evsel *evsel,
                             struct perf_stat_config *config,
-                            struct target *target);
+                            struct target *target,
+                            int cpu);
 void
 perf_evlist__print_counters(struct evlist *evlist,
                            struct perf_stat_config *config,