]> git.ipfire.org Git - thirdparty/linux.git/commitdiff
perf intel-tpebs: Don't close record on read
authorIan Rogers <irogers@google.com>
Mon, 14 Apr 2025 17:41:30 +0000 (10:41 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 25 Apr 2025 15:31:28 +0000 (12:31 -0300)
Factor sending record control fd code into its own function.

Rather than killing the record process send it a ping when reading.

Timeouts were witnessed if done too frequently, so only ping for the
first tpebs events.

Don't kill the record command send it a stop command.

As close isn't reliably called also close on evsel__exit.

Add extra checks on the record being terminated to avoid warnings.

Adjust the locking as needed and incorporate extra -Wthread-safety
checks.

Check to do six 500ms poll timeouts when sending commands, rather than
the larger 3000ms, to allow the record process terminating to be better
witnessed.

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Weilin Wang <weilin.wang@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andreas Färber <afaerber@suse.de>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Falcon <thomas.falcon@intel.com>
Link: https://lore.kernel.org/r/20250414174134.3095492-13-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/evsel.c
tools/perf/util/intel-tpebs.c

index 2079ffab29f8b9e58898bd022eef95a280809d8c..07a3a4e3b7a471c6b45d07f4b99c0d940935d074 100644 (file)
@@ -1656,6 +1656,8 @@ void evsel__exit(struct evsel *evsel)
 {
        assert(list_empty(&evsel->core.node));
        assert(evsel->evlist == NULL);
+       if (evsel__is_retire_lat(evsel))
+               evsel__tpebs_close(evsel);
        bpf_counter__destroy(evsel);
        perf_bpf_filter__destroy(evsel);
        evsel__free_counts(evsel);
index 44e86d0f934322fcdf1769ee52d52988e75a9573..dc4985728b29a9f90ae220854a55bd958e7b4c23 100644 (file)
@@ -33,6 +33,7 @@ bool tpebs_recording;
 static LIST_HEAD(tpebs_results);
 static pthread_t tpebs_reader_thread;
 static struct child_process tpebs_cmd;
+static int control_fd[2], ack_fd[2];
 static struct mutex tpebs_mtx;
 
 struct tpebs_retire_lat {
@@ -51,8 +52,6 @@ struct tpebs_retire_lat {
        bool started;
 };
 
-static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel);
-
 static void tpebs_mtx_init(void)
 {
        mutex_init(&tpebs_mtx);
@@ -66,7 +65,10 @@ static struct mutex *tpebs_mtx_get(void)
        return &tpebs_mtx;
 }
 
-static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[], int ack_fd[])
+static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel)
+       EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get());
+
+static int evsel__tpebs_start_perf_record(struct evsel *evsel)
 {
        const char **record_argv;
        int tpebs_event_size = 0, i = 0, ret;
@@ -74,15 +76,13 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[],
        char cpumap_buf[50];
        struct tpebs_retire_lat *t;
 
-       mutex_lock(tpebs_mtx_get());
        list_for_each_entry(t, &tpebs_results, nd)
                tpebs_event_size++;
 
        record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv));
-       if (!record_argv) {
-               mutex_unlock(tpebs_mtx_get());
+       if (!record_argv)
                return -ENOMEM;
-       }
+
        record_argv[i++] = "perf";
        record_argv[i++] = "record";
        record_argv[i++] = "-W";
@@ -118,7 +118,6 @@ static int evsel__tpebs_start_perf_record(struct evsel *evsel, int control_fd[],
        list_for_each_entry(t, &tpebs_results, nd)
                t->started = true;
 
-       mutex_unlock(tpebs_mtx_get());
        return ret;
 }
 
@@ -131,6 +130,11 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
        struct tpebs_retire_lat *t;
 
        mutex_lock(tpebs_mtx_get());
+       if (tpebs_cmd.pid == 0) {
+               /* Record has terminated. */
+               mutex_unlock(tpebs_mtx_get());
+               return 0;
+       }
        t = tpebs_retire_lat__find(evsel);
        if (!t) {
                mutex_unlock(tpebs_mtx_get());
@@ -180,17 +184,98 @@ static void *__sample_reader(void *arg __maybe_unused)
        return NULL;
 }
 
+static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
+{
+       struct pollfd pollfd = { .events = POLLIN, };
+       int ret, len, retries = 0;
+       char ack_buf[8];
+
+       /* Check if the command exited before the send, done with the lock held. */
+       if (tpebs_cmd.pid == 0)
+               return 0;
+
+       /*
+        * Let go of the lock while sending/receiving as blocking can starve the
+        * sample reading thread.
+        */
+       mutex_unlock(tpebs_mtx_get());
+
+       /* Send perf record command.*/
+       len = strlen(msg);
+       ret = write(control_fd[1], msg, len);
+       if (ret != len) {
+               pr_err("perf record control write control message '%s' failed\n", msg);
+               ret = -EPIPE;
+               goto out;
+       }
+
+       if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) {
+               ret = 0;
+               goto out;
+       }
+
+       /* Wait for an ack. */
+       pollfd.fd = ack_fd[0];
+
+       /*
+        * We need this poll to ensure the ack_fd PIPE will not hang
+        * when perf record failed for any reason. The timeout value
+        * 3000ms is an empirical selection.
+        */
+again:
+       if (!poll(&pollfd, 1, 500)) {
+               if (check_if_command_finished(&tpebs_cmd)) {
+                       ret = 0;
+                       goto out;
+               }
+
+               if (retries++ < 6)
+                       goto again;
+               pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg);
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       if (!(pollfd.revents & POLLIN)) {
+               if (check_if_command_finished(&tpebs_cmd)) {
+                       ret = 0;
+                       goto out;
+               }
+
+               pr_err("tpebs failed: did not received an ack for '%s'\n", msg);
+               ret = -EPIPE;
+               goto out;
+       }
+
+       ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
+       if (ret > 0)
+               ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
+       else
+               pr_err("tpebs: perf record control ack failed\n");
+out:
+       /* Re-take lock as expected by caller. */
+       mutex_lock(tpebs_mtx_get());
+       return ret;
+}
+
 /*
  * tpebs_stop - stop the sample data read thread and the perf record process.
  */
-static int tpebs_stop(void)
+static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get())
 {
        int ret = 0;
 
        /* Like tpebs_start, we should only run tpebs_end once. */
        if (tpebs_cmd.pid != 0) {
-               kill(tpebs_cmd.pid, SIGTERM);
+               tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG);
+               tpebs_cmd.pid = 0;
+               mutex_unlock(tpebs_mtx_get());
                pthread_join(tpebs_reader_thread, NULL);
+               mutex_lock(tpebs_mtx_get());
+               close(control_fd[0]);
+               close(control_fd[1]);
+               close(ack_fd[0]);
+               close(ack_fd[1]);
                close(tpebs_cmd.out);
                ret = finish_command(&tpebs_cmd);
                tpebs_cmd.pid = 0;
@@ -352,13 +437,15 @@ int evsel__tpebs_open(struct evsel *evsel)
                return 0;
        /* Only start the events once. */
        if (tpebs_cmd.pid != 0) {
-               struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);
+               struct tpebs_retire_lat *t;
+               bool valid;
 
-               if (!t || !t->started) {
-                       /* Fail, as the event wasn't started. */
-                       return -EBUSY;
-               }
-               return 0;
+               mutex_lock(tpebs_mtx_get());
+               t = tpebs_retire_lat__find(evsel);
+               valid = t && t->started;
+               mutex_unlock(tpebs_mtx_get());
+               /* May fail as the event wasn't started. */
+               return valid ? 0 : -EBUSY;
        }
 
        ret = evsel__tpebs_prepare(evsel);
@@ -367,12 +454,7 @@ int evsel__tpebs_open(struct evsel *evsel)
 
        mutex_lock(tpebs_mtx_get());
        tpebs_empty = list_empty(&tpebs_results);
-       mutex_unlock(tpebs_mtx_get());
        if (!tpebs_empty) {
-               struct pollfd pollfd = { .events = POLLIN, };
-               int control_fd[2], ack_fd[2], len;
-               char ack_buf[8];
-
                /*Create control and ack fd for --control*/
                if (pipe(control_fd) < 0) {
                        pr_err("tpebs: Failed to create control fifo");
@@ -385,7 +467,7 @@ int evsel__tpebs_open(struct evsel *evsel)
                        goto out;
                }
 
-               ret = evsel__tpebs_start_perf_record(evsel, control_fd, ack_fd);
+               ret = evsel__tpebs_start_perf_record(evsel);
                if (ret)
                        goto out;
 
@@ -397,53 +479,16 @@ int evsel__tpebs_open(struct evsel *evsel)
                        ret = -1;
                        goto out;
                }
-               /* Wait for perf record initialization.*/
-               len = strlen(EVLIST_CTL_CMD_ENABLE_TAG);
-               ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len);
-               if (ret != len) {
-                       pr_err("perf record control write control message failed\n");
-                       goto out;
-               }
-
-               /* wait for an ack */
-               pollfd.fd = ack_fd[0];
-
-               /*
-                * We need this poll to ensure the ack_fd PIPE will not hang
-                * when perf record failed for any reason. The timeout value
-                * 3000ms is an empirical selection.
-                */
-               if (!poll(&pollfd, 1, 3000)) {
-                       pr_err("tpebs failed: perf record ack timeout\n");
-                       ret = -1;
-                       goto out;
-               }
-
-               if (!(pollfd.revents & POLLIN)) {
-                       pr_err("tpebs failed: did not received an ack\n");
-                       ret = -1;
-                       goto out;
-               }
-
-               ret = read(ack_fd[0], ack_buf, sizeof(ack_buf));
-               if (ret > 0)
-                       ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG);
-               else {
-                       pr_err("tpebs: perf record control ack failed\n");
-                       goto out;
-               }
-out:
-               close(control_fd[0]);
-               close(control_fd[1]);
-               close(ack_fd[0]);
-               close(ack_fd[1]);
+               ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG);
        }
+out:
        if (ret) {
                struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel);
 
                list_del_init(&t->nd);
                tpebs_retire_lat__delete(t);
        }
+       mutex_unlock(tpebs_mtx_get());
        return ret;
 }
 
@@ -452,6 +497,7 @@ int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
        struct perf_counts_values *count, *old_count = NULL;
        struct tpebs_retire_lat *t;
        uint64_t val;
+       int ret;
 
        /* Only set retire_latency value to the first CPU and thread. */
        if (cpu_map_idx != 0 || thread != 0)
@@ -462,14 +508,20 @@ int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread)
 
        count = perf_counts(evsel->counts, cpu_map_idx, thread);
 
-       /*
-        * Need to stop the forked record to ensure get sampled data from the
-        * PIPE to process and get non-zero retire_lat value for hybrid.
-        */
-       tpebs_stop();
-
        mutex_lock(tpebs_mtx_get());
        t = tpebs_retire_lat__find(evsel);
+       /*
+        * If reading the first tpebs result, send a ping to the record
+        * process. Allow the sample reader a chance to read by releasing and
+        * reacquiring the lock.
+        */
+       if (&t->nd == tpebs_results.next) {
+               ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG);
+               mutex_unlock(tpebs_mtx_get());
+               if (ret)
+                       return ret;
+               mutex_lock(tpebs_mtx_get());
+       }
        val = rint(t->val);
        mutex_unlock(tpebs_mtx_get());
 
@@ -498,10 +550,12 @@ void evsel__tpebs_close(struct evsel *evsel)
 
        mutex_lock(tpebs_mtx_get());
        t = tpebs_retire_lat__find(evsel);
-       list_del_init(&t->nd);
-       tpebs_retire_lat__delete(t);
-       mutex_unlock(tpebs_mtx_get());
+       if (t) {
+               list_del_init(&t->nd);
+               tpebs_retire_lat__delete(t);
 
-       if (list_empty(&tpebs_results))
-               tpebs_stop();
+               if (list_empty(&tpebs_results))
+                       tpebs_stop();
+       }
+       mutex_unlock(tpebs_mtx_get());
 }