]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf evsel: Improve the evsel__open_strerror() for EBUSY
authorIan Rogers <irogers@google.com>
Wed, 6 Nov 2024 00:30:05 +0000 (00:30 +0000)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 8 Jan 2025 20:20:42 +0000 (17:20 -0300)
The existing EBUSY strerror message is:

  The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//).
  "dmesg | grep -i perf" may provide additional information.

The dmesg won't be useful. What is more useful is knowing what
processes are potentially using the PMU, which some procfs scanning can
reveal. When parallel testing tests/shell/stat_all_pmu.sh this yields:

  Testing intel_bts//
  Error:
  The PMU intel_bts counters are busy and in use by another process.
  Possible processes:
  2585882 perf list
  2585902 perf list -j -o /tmp/__perf_test.list_output.json.KF9MY
  2585904 perf list
  2585911 perf record -e task-clock --filter period > 1 -o /dev/null --quiet true
  2585912 perf list
  2585915 perf list
  2586042 /tmp/perf/perf record -asdg -e cpu-clock -o /tmp/perftool-testsuite_report.dIF/perf_report/perf.data -- sleep 2
  2589078 perf record -g -e task-clock:u -o - perf test -w noploop
  2589148 /tmp/perf/perf record --control=fifo:control,ack -e cpu-clock -m 1 sleep 10
  2589379 perf --buildid-dir /tmp/perf.debug.Umx record --buildid-all -o /tmp/perf.data.YBm /tmp/perf.ex.MD5.ZQW
  2589568 perf record -o /tmp/__perf_test.program.mtcZH/perf.data --branch-filter any,save_type,u -- perf test -w brstack
  2589649 perf record --per-thread -o /tmp/__perf_test.perf.data.5d3dc perf test -w thloop
  2589898 perf record -o /tmp/perf-test-script.BX2b27Dcnj/pp-perf.data --sample-cpu uname

Which gets a little closer to finding the issue.

Committer testing:

  root@number:~#
  root@number:~# grep -m1 "model name" /proc/cpuinfo
  model name : Intel(R) Core(TM) i7-14700K
  root@number:~#

Before:

  root@number:~# perf stat -e intel_bts// &
  [1] 197954
  root@number:~# perf test "perf all PMU test"
  124: perf all PMU test                                               : FAILED!
  root@number:~# perf test -v "perf all PMU test" |& tail
  Testing i915/vecs0-busy/
  Testing i915/vecs0-sema/
  Testing i915/vecs0-wait/
  Testing intel_bts//
  Unexpected signal in main
  Error:
  The sys_perf_event_open() syscall returned with 16 (Device or resource busy) for event (intel_bts//).
  "dmesg | grep -i perf" may provide additional information.
  ---- end(-1) ----
  124: perf all PMU test                                               : FAILED!
  root@number:~#

After:

  root@number:~# perf stat -e intel_bts// &
  [1] 200195
  root@number:~# perf test "perf all PMU test"
  123: perf all PMU test                                               : FAILED!
  root@number:~# perf test -v "perf all PMU test" |& tail
  Testing i915/vecs0-wait/
  Testing intel_bts//
  Unexpected signal in main
  Error:
  The PMU intel_bts counters are busy and in use by another process.
  Possible processes:
  200195 perf stat -e intel_bts//
  2319766 /root/bin/perf top --stdio
  ---- end(-1) ----
  123: perf all PMU test                                               : FAILED!
  root@number:~#

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Chun-Tse Shao <ctshao@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Ze Gao <zegao2021@gmail.com>
Change-Id: Ie1ed8688286c44e8f44a35e98fed8be3e2a344df
Link: https://lore.kernel.org/r/20241106003007.2112584-1-ctshao@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/evsel.c

index 697428efa644aa36f1357eaec7bc7e0cbe48c53e..bc144388f89298f1ce752ea90005127654a2f308 100644 (file)
@@ -3581,6 +3581,78 @@ static bool find_process(const char *name)
        return ret ? false : true;
 }
 
+static int dump_perf_event_processes(char *msg, size_t size)
+{
+       DIR *proc_dir;
+       struct dirent *proc_entry;
+       int printed = 0;
+
+       proc_dir = opendir(procfs__mountpoint());
+       if (!proc_dir)
+               return 0;
+
+       /* Walk through the /proc directory. */
+       while ((proc_entry = readdir(proc_dir)) != NULL) {
+               char buf[256];
+               DIR *fd_dir;
+               struct dirent *fd_entry;
+               int fd_dir_fd;
+
+               if (proc_entry->d_type != DT_DIR ||
+                   !isdigit(proc_entry->d_name[0]) ||
+                   strlen(proc_entry->d_name) > sizeof(buf) - 4)
+                       continue;
+
+               scnprintf(buf, sizeof(buf), "%s/fd", proc_entry->d_name);
+               fd_dir_fd = openat(dirfd(proc_dir), buf, O_DIRECTORY);
+               if (fd_dir_fd == -1)
+                       continue;
+               fd_dir = fdopendir(fd_dir_fd);
+               if (!fd_dir) {
+                       close(fd_dir_fd);
+                       continue;
+               }
+               while ((fd_entry = readdir(fd_dir)) != NULL) {
+                       ssize_t link_size;
+
+                       if (fd_entry->d_type != DT_LNK)
+                               continue;
+                       link_size = readlinkat(fd_dir_fd, fd_entry->d_name, buf, sizeof(buf));
+                       if (link_size < 0)
+                               continue;
+                       /* Take care as readlink doesn't null terminate the string. */
+                       if (!strncmp(buf, "anon_inode:[perf_event]", link_size)) {
+                               int cmdline_fd;
+                               ssize_t cmdline_size;
+
+                               scnprintf(buf, sizeof(buf), "%s/cmdline", proc_entry->d_name);
+                               cmdline_fd = openat(dirfd(proc_dir), buf, O_RDONLY);
+                               if (cmdline_fd == -1)
+                                       continue;
+                               cmdline_size = read(cmdline_fd, buf, sizeof(buf) - 1);
+                               close(cmdline_fd);
+                               if (cmdline_size < 0)
+                                       continue;
+                               buf[cmdline_size] = '\0';
+                               for (ssize_t i = 0; i < cmdline_size; i++) {
+                                       if (buf[i] == '\0')
+                                               buf[i] = ' ';
+                               }
+
+                               if (printed == 0)
+                                       printed += scnprintf(msg, size, "Possible processes:\n");
+
+                               printed += scnprintf(msg + printed, size - printed,
+                                               "%s %s\n", proc_entry->d_name, buf);
+                               break;
+                       }
+               }
+               closedir(fd_dir);
+       }
+       closedir(proc_dir);
+       return printed;
+}
+
 int __weak arch_evsel__open_strerror(struct evsel *evsel __maybe_unused,
                                     char *msg __maybe_unused,
                                     size_t size __maybe_unused)
@@ -3614,7 +3686,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
                        printed += scnprintf(msg, size,
                                "No permission to enable %s event.\n\n", evsel__name(evsel));
 
-               return scnprintf(msg + printed, size - printed,
+               return printed + scnprintf(msg + printed, size - printed,
                 "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
                 "access to performance monitoring and observability operations for processes\n"
                 "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
@@ -3681,6 +3753,11 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
                        return scnprintf(msg, size,
        "The PMU counters are busy/taken by another profiler.\n"
        "We found oprofile daemon running, please stop it and try again.");
+               printed += scnprintf(
+                       msg, size,
+                       "The PMU %s counters are busy and in use by another process.\n",
+                       evsel->pmu ? evsel->pmu->name : "");
+               return printed + dump_perf_event_processes(msg + printed, size - printed);
                break;
        case EINVAL:
                if (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE && perf_missing_features.code_page_size)