From 30bded87bd367e6ec6f2ba7284269cb6d238557e Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 4 Nov 2023 17:01:00 -0400 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- ...st-add-evlist__add_dummy_on_all_cpus.patch | 106 ++++++++++++++++++ ...d-frequency-mode-for-the-dummy-event.patch | 86 ++++++++++++++ ...s-get-rid-of-evlist__add_on_all_cpus.patch | 81 +++++++++++++ queue-5.10/series | 3 + 4 files changed, 276 insertions(+) create mode 100644 queue-5.10/perf-evlist-add-evlist__add_dummy_on_all_cpus.patch create mode 100644 queue-5.10/perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch create mode 100644 queue-5.10/perf-tools-get-rid-of-evlist__add_on_all_cpus.patch diff --git a/queue-5.10/perf-evlist-add-evlist__add_dummy_on_all_cpus.patch b/queue-5.10/perf-evlist-add-evlist__add_dummy_on_all_cpus.patch new file mode 100644 index 00000000000..88c1abaa75c --- /dev/null +++ b/queue-5.10/perf-evlist-add-evlist__add_dummy_on_all_cpus.patch @@ -0,0 +1,106 @@ +From 05cb8c307c35ac0c8b999b6403219bdd2a494cee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 24 May 2022 10:54:26 +0300 +Subject: perf evlist: Add evlist__add_dummy_on_all_cpus() + +From: Adrian Hunter + +[ Upstream commit 126d68fdcabed8c2ca5ffaba785add93ef722da8 ] + +Add evlist__add_dummy_on_all_cpus() to enable creating a system-wide dummy +event that sets up the system-wide maps before map propagation. + +For convenience, add evlist__add_aux_dummy() so that the logic can be used +whether or not the event needs to be system-wide. + +Signed-off-by: Adrian Hunter +Acked-by: Ian Rogers +Acked-by: Namhyung Kim +Cc: Alexey Bayduraev +Cc: Ian Rogers +Cc: Jiri Olsa +Cc: Leo Yan +Link: https://lore.kernel.org/r/20220524075436.29144-6-adrian.hunter@intel.com +Signed-off-by: Arnaldo Carvalho de Melo +Stable-dep-of: f9cdeb58a9cf ("perf evlist: Avoid frequency mode for the dummy event") +Signed-off-by: Sasha Levin +--- + tools/perf/util/evlist.c | 45 ++++++++++++++++++++++++++++++++++++++++ + tools/perf/util/evlist.h | 5 +++++ + 2 files changed, 50 insertions(+) + +diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c +index 98ae432470cdd..117420abdc325 100644 +--- a/tools/perf/util/evlist.c ++++ b/tools/perf/util/evlist.c +@@ -261,6 +261,51 @@ int evlist__add_dummy(struct evlist *evlist) + return 0; + } + ++static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) ++{ ++ evsel->core.system_wide = true; ++ ++ /* ++ * All CPUs. ++ * ++ * Note perf_event_open() does not accept CPUs that are not online, so ++ * in fact this CPU list will include only all online CPUs. ++ */ ++ perf_cpu_map__put(evsel->core.own_cpus); ++ evsel->core.own_cpus = perf_cpu_map__new(NULL); ++ perf_cpu_map__put(evsel->core.cpus); ++ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); ++ ++ /* No threads */ ++ perf_thread_map__put(evsel->core.threads); ++ evsel->core.threads = perf_thread_map__new_dummy(); ++ ++ evlist__add(evlist, evsel); ++} ++ ++struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) ++{ ++ struct evsel *evsel = evlist__dummy_event(evlist); ++ ++ if (!evsel) ++ return NULL; ++ ++ evsel->core.attr.exclude_kernel = 1; ++ evsel->core.attr.exclude_guest = 1; ++ evsel->core.attr.exclude_hv = 1; ++ evsel->core.attr.freq = 0; ++ evsel->core.attr.sample_period = 1; ++ evsel->no_aux_samples = true; ++ evsel->name = strdup("dummy:u"); ++ ++ if (system_wide) ++ evlist__add_on_all_cpus(evlist, evsel); ++ else ++ evlist__add(evlist, evsel); ++ ++ return evsel; ++} ++ + static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) + { + struct evsel *evsel, *n; +diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h +index 9298fce53ea31..eb36f85ba3f3e 100644 +--- a/tools/perf/util/evlist.h ++++ b/tools/perf/util/evlist.h +@@ -111,6 +111,11 @@ int __evlist__add_default_attrs(struct evlist *evlist, + __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) + + int evlist__add_dummy(struct evlist *evlist); ++struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide); ++static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist) ++{ ++ return evlist__add_aux_dummy(evlist, true); ++} + + int perf_evlist__add_sb_event(struct evlist *evlist, + struct perf_event_attr *attr, +-- +2.42.0 + diff --git a/queue-5.10/perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch b/queue-5.10/perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch new file mode 100644 index 00000000000..d182c682146 --- /dev/null +++ b/queue-5.10/perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch @@ -0,0 +1,86 @@ +From 48d6f4e0e28650d6c432a30f3a5ba71825bca7ce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Sep 2023 20:56:40 -0700 +Subject: perf evlist: Avoid frequency mode for the dummy event + +From: Ian Rogers + +[ Upstream commit f9cdeb58a9cf46c09b56f5f661ea8da24b6458c3 ] + +Dummy events are created with an attribute where the period and freq +are zero. evsel__config will then see the uninitialized values and +initialize them in evsel__default_freq_period. As fequency mode is +used by default the dummy event would be set to use frequency +mode. However, this has no effect on the dummy event but does cause +unnecessary timers/interrupts. Avoid this overhead by setting the +period to 1 for dummy events. + +evlist__add_aux_dummy calls evlist__add_dummy then sets freq=0 and +period=1. This isn't necessary after this change and so the setting is +removed. + +From Stephane: + +The dummy event is not counting anything. It is used to collect mmap +records and avoid a race condition during the synthesize mmap phase of +perf record. As such, it should not cause any overhead during active +profiling. Yet, it did. Because of a bug the dummy event was +programmed as a sampling event in frequency mode. Events in that mode +incur more kernel overheads because on timer tick, the kernel has to +look at the number of samples for each event and potentially adjust +the sampling period to achieve the desired frequency. The dummy event +was therefore adding a frequency event to task and ctx contexts we may +otherwise not have any, e.g., + + perf record -a -e cpu/event=0x3c,period=10000000/. + +On each timer tick the perf_adjust_freq_unthr_context() is invoked and +if ctx->nr_freq is non-zero, then the kernel will loop over ALL the +events of the context looking for frequency mode ones. In doing, so it +locks the context, and enable/disable the PMU of each hw event. If all +the events of the context are in period mode, the kernel will have to +traverse the list for nothing incurring overhead. The overhead is +multiplied by a very large factor when this happens in a guest kernel. +There is no need for the dummy event to be in frequency mode, it does +not count anything and therefore should not cause extra overhead for +no reason. + +Fixes: 5bae0250237f ("perf evlist: Introduce perf_evlist__new_dummy constructor") +Reported-by: Stephane Eranian +Signed-off-by: Ian Rogers +Acked-by: Adrian Hunter +Cc: Yang Jihong +Cc: Kan Liang +Link: https://lore.kernel.org/r/20230916035640.1074422-1-irogers@google.com +Signed-off-by: Namhyung Kim +Signed-off-by: Sasha Levin +--- + tools/perf/util/evlist.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c +index f0ca9aa7c208e..84b328d2515bd 100644 +--- a/tools/perf/util/evlist.c ++++ b/tools/perf/util/evlist.c +@@ -251,6 +251,9 @@ int evlist__add_dummy(struct evlist *evlist) + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_DUMMY, + .size = sizeof(attr), /* to capture ABI version */ ++ /* Avoid frequency mode for dummy events to avoid associated timers. */ ++ .freq = 0, ++ .sample_period = 1, + }; + struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries); + +@@ -271,8 +274,6 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) + evsel->core.attr.exclude_kernel = 1; + evsel->core.attr.exclude_guest = 1; + evsel->core.attr.exclude_hv = 1; +- evsel->core.attr.freq = 0; +- evsel->core.attr.sample_period = 1; + evsel->core.system_wide = system_wide; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); +-- +2.42.0 + diff --git a/queue-5.10/perf-tools-get-rid-of-evlist__add_on_all_cpus.patch b/queue-5.10/perf-tools-get-rid-of-evlist__add_on_all_cpus.patch new file mode 100644 index 00000000000..adbe48c9eb1 --- /dev/null +++ b/queue-5.10/perf-tools-get-rid-of-evlist__add_on_all_cpus.patch @@ -0,0 +1,81 @@ +From 9f23ade132ff1486c694d1632ae471f187e20ee0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 3 Oct 2022 13:46:45 -0700 +Subject: perf tools: Get rid of evlist__add_on_all_cpus() + +From: Namhyung Kim + +[ Upstream commit 60ea006f72512fd7c36f16cdbe91f4fc284f8115 ] + +The cpu and thread maps are properly handled in libperf now. No need to +do it in the perf tools anymore. Let's remove the logic. + +Reviewed-by: Adrian Hunter +Signed-off-by: Namhyung Kim +Cc: Ian Rogers +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Kan Liang +Cc: Leo Yan +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20221003204647.1481128-4-namhyung@kernel.org +Signed-off-by: Arnaldo Carvalho de Melo +Stable-dep-of: f9cdeb58a9cf ("perf evlist: Avoid frequency mode for the dummy event") +Signed-off-by: Sasha Levin +--- + tools/perf/util/evlist.c | 29 ++--------------------------- + 1 file changed, 2 insertions(+), 27 deletions(-) + +diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c +index 117420abdc325..f0ca9aa7c208e 100644 +--- a/tools/perf/util/evlist.c ++++ b/tools/perf/util/evlist.c +@@ -261,28 +261,6 @@ int evlist__add_dummy(struct evlist *evlist) + return 0; + } + +-static void evlist__add_on_all_cpus(struct evlist *evlist, struct evsel *evsel) +-{ +- evsel->core.system_wide = true; +- +- /* +- * All CPUs. +- * +- * Note perf_event_open() does not accept CPUs that are not online, so +- * in fact this CPU list will include only all online CPUs. +- */ +- perf_cpu_map__put(evsel->core.own_cpus); +- evsel->core.own_cpus = perf_cpu_map__new(NULL); +- perf_cpu_map__put(evsel->core.cpus); +- evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus); +- +- /* No threads */ +- perf_thread_map__put(evsel->core.threads); +- evsel->core.threads = perf_thread_map__new_dummy(); +- +- evlist__add(evlist, evsel); +-} +- + struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) + { + struct evsel *evsel = evlist__dummy_event(evlist); +@@ -295,14 +273,11 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide) + evsel->core.attr.exclude_hv = 1; + evsel->core.attr.freq = 0; + evsel->core.attr.sample_period = 1; ++ evsel->core.system_wide = system_wide; + evsel->no_aux_samples = true; + evsel->name = strdup("dummy:u"); + +- if (system_wide) +- evlist__add_on_all_cpus(evlist, evsel); +- else +- evlist__add(evlist, evsel); +- ++ evlist__add(evlist, evsel); + return evsel; + } + +-- +2.42.0 + diff --git a/queue-5.10/series b/queue-5.10/series index cb98eca1183..401420f89e9 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -72,3 +72,6 @@ scsi-mpt3sas-fix-in-error-path.patch platform-mellanox-mlxbf-tmfifo-fix-a-warning-message.patch net-chelsio-cxgb4-add-an-error-code-check-in-t4_load.patch powerpc-mm-fix-boot-crash-with-flatmem.patch +perf-evlist-add-evlist__add_dummy_on_all_cpus.patch +perf-tools-get-rid-of-evlist__add_on_all_cpus.patch +perf-evlist-avoid-frequency-mode-for-the-dummy-event.patch -- 2.47.3