From: Ian Rogers Date: Wed, 6 Nov 2024 00:30:05 +0000 (+0000) Subject: perf evsel: Improve the evsel__open_strerror() for EBUSY X-Git-Tag: v6.14-rc1~120^2~69 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=05efa0ab0115d32882fc516aa5c99199f35412f1;p=thirdparty%2Flinux.git perf evsel: Improve the evsel__open_strerror() for EBUSY The existing EBUSY strerror message is: The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//). "dmesg | grep -i perf" may provide additional information. The dmesg won't be useful. What is more useful is knowing what processes are potentially using the PMU, which some procfs scanning can reveal. When parallel testing tests/shell/stat_all_pmu.sh this yields: Testing intel_bts// Error: The PMU intel_bts counters are busy and in use by another process. Possible processes: 2585882 perf list 2585902 perf list -j -o /tmp/__perf_test.list_output.json.KF9MY 2585904 perf list 2585911 perf record -e task-clock --filter period > 1 -o /dev/null --quiet true 2585912 perf list 2585915 perf list 2586042 /tmp/perf/perf record -asdg -e cpu-clock -o /tmp/perftool-testsuite_report.dIF/perf_report/perf.data -- sleep 2 2589078 perf record -g -e task-clock:u -o - perf test -w noploop 2589148 /tmp/perf/perf record --control=fifo:control,ack -e cpu-clock -m 1 sleep 10 2589379 perf --buildid-dir /tmp/perf.debug.Umx record --buildid-all -o /tmp/perf.data.YBm /tmp/perf.ex.MD5.ZQW 2589568 perf record -o /tmp/__perf_test.program.mtcZH/perf.data --branch-filter any,save_type,u -- perf test -w brstack 2589649 perf record --per-thread -o /tmp/__perf_test.perf.data.5d3dc perf test -w thloop 2589898 perf record -o /tmp/perf-test-script.BX2b27Dcnj/pp-perf.data --sample-cpu uname Which gets a little closer to finding the issue. Committer testing: root@number:~# root@number:~# grep -m1 "model name" /proc/cpuinfo model name : Intel(R) Core(TM) i7-14700K root@number:~# Before: root@number:~# perf stat -e intel_bts// & [1] 197954 root@number:~# perf test "perf all PMU test" 124: perf all PMU test : FAILED! root@number:~# perf test -v "perf all PMU test" |& tail Testing i915/vecs0-busy/ Testing i915/vecs0-sema/ Testing i915/vecs0-wait/ Testing intel_bts// Unexpected signal in main Error: The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//). "dmesg | grep -i perf" may provide additional information. ---- end(-1) ---- 124: perf all PMU test : FAILED! root@number:~# After: root@number:~# perf stat -e intel_bts// & [1] 200195 root@number:~# perf test "perf all PMU test" 123: perf all PMU test : FAILED! root@number:~# perf test -v "perf all PMU test" |& tail Testing i915/vecs0-wait/ Testing intel_bts// Unexpected signal in main Error: The PMU intel_bts counters are busy and in use by another process. Possible processes: 200195 perf stat -e intel_bts// 2319766 /root/bin/perf top --stdio ---- end(-1) ---- 123: perf all PMU test : FAILED! root@number:~# Signed-off-by: Ian Rogers Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Chun-Tse Shao Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Weilin Wang Cc: Ze Gao Change-Id: Ie1ed8688286c44e8f44a35e98fed8be3e2a344df Link: https://lore.kernel.org/r/20241106003007.2112584-1-ctshao@google.com Signed-off-by: Arnaldo Carvalho de Melo --- diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 697428efa644a..bc144388f8929 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3581,6 +3581,78 @@ static bool find_process(const char *name) return ret ? false : true; } +static int dump_perf_event_processes(char *msg, size_t size) +{ + DIR *proc_dir; + struct dirent *proc_entry; + int printed = 0; + + proc_dir = opendir(procfs__mountpoint()); + if (!proc_dir) + return 0; + + /* Walk through the /proc directory. */ + while ((proc_entry = readdir(proc_dir)) != NULL) { + char buf[256]; + DIR *fd_dir; + struct dirent *fd_entry; + int fd_dir_fd; + + if (proc_entry->d_type != DT_DIR || + !isdigit(proc_entry->d_name[0]) || + strlen(proc_entry->d_name) > sizeof(buf) - 4) + continue; + + scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name); + fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY); + if (fd_dir_fd == -1) + continue; + fd_dir = fdopendir(fd_dir_fd); + if (!fd_dir) { + close(fd_dir_fd); + continue; + } + while ((fd_entry = readdir(fd_dir)) != NULL) { + ssize_t link_size; + + if (fd_entry->d_type != DT_LNK) + continue; + link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf)); + if (link_size < 0) + continue; + /* Take care as readlink doesn't null terminate the string. */ + if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) { + int cmdline_fd; + ssize_t cmdline_size; + + scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name); + cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY); + if (cmdline_fd == -1) + continue; + cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1); + close(cmdline_fd); + if (cmdline_size < 0) + continue; + buf[cmdline_size] = '\0'; + for (ssize_t i = 0; i < cmdline_size; i++) { + if (buf[i] == '\0') + buf[i] = ' '; + } + + if (printed == 0) + printed += scnprintf(msg, size, "Possible processes:\n"); + + printed += scnprintf(msg + printed, size - printed, + "%s %s\n", proc_entry->d_name, buf); + break; + } + } + closedir(fd_dir); + } + closedir(proc_dir); + return printed; +} + int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused, char *msg __maybe_unused, size_t size __maybe_unused) @@ -3614,7 +3686,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, printed += scnprintf(msg, size, "No permission to enable %s event.\n\n", evsel__name(evsel)); - return scnprintf(msg + printed, size - printed, + return printed + scnprintf(msg + printed, size - printed, "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n" "access to performance monitoring and observability operations for processes\n" "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n" @@ -3681,6 +3753,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); + printed += scnprintf( + msg, size, + "The PMU %s counters are busy and in use by another process.\n", + evsel->pmu ? evsel->pmu->name : ""); + return printed + dump_perf_event_processes(msg + printed, size - printed); break; case EINVAL: if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)