]> git.ipfire.org Git - thirdparty/kernel/linux.git/blame - tools/perf/builtin-record.c
perf bpf: Make synthesize_bpf_events() receive perf_session pointer instead of perf_tool
[thirdparty/kernel/linux.git] / tools / perf / builtin-record.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
abaff32a 2/*
bf9e1876
IM
3 * builtin-record.c
4 *
5 * Builtin record command: Record the profile of a workload
6 * (or a CPU, or a PID) into the perf.data output file - for
7 * later analysis via perf report.
abaff32a 8 */
16f762a2 9#include "builtin.h"
bf9e1876
IM
10
11#include "perf.h"
12
6122e4e4 13#include "util/build-id.h"
6eda5838 14#include "util/util.h"
4b6ab94e 15#include <subcmd/parse-options.h>
8ad8db37 16#include "util/parse-events.h"
41840d21 17#include "util/config.h"
6eda5838 18
8f651eae 19#include "util/callchain.h"
f14d5707 20#include "util/cgroup.h"
7c6a1c65 21#include "util/header.h"
66e274f3 22#include "util/event.h"
361c99a6 23#include "util/evlist.h"
69aad6f1 24#include "util/evsel.h"
8f28827a 25#include "util/debug.h"
94c744b6 26#include "util/session.h"
45694aa7 27#include "util/tool.h"
8d06367f 28#include "util/symbol.h"
a12b51c4 29#include "util/cpumap.h"
fd78260b 30#include "util/thread_map.h"
f5fc1412 31#include "util/data.h"
bcc84ec6 32#include "util/perf_regs.h"
ef149c25 33#include "util/auxtrace.h"
46bc29b9 34#include "util/tsc.h"
f00898f4 35#include "util/parse-branch-options.h"
bcc84ec6 36#include "util/parse-regs-options.h"
71dc2326 37#include "util/llvm-utils.h"
8690a2a7 38#include "util/bpf-loader.h"
5f9cf599 39#include "util/trigger.h"
a074865e 40#include "util/perf-hooks.h"
f13de660 41#include "util/cpu-set-sched.h"
c5e4027e 42#include "util/time-utils.h"
58db1d6e 43#include "util/units.h"
7b612e29 44#include "util/bpf-event.h"
d8871ea7 45#include "asm/bug.h"
7c6a1c65 46
a43783ae 47#include <errno.h>
fd20e811 48#include <inttypes.h>
67230479 49#include <locale.h>
4208735d 50#include <poll.h>
97124d5e 51#include <unistd.h>
de9ac07b 52#include <sched.h>
9607ad3a 53#include <signal.h>
a41794cd 54#include <sys/mman.h>
4208735d 55#include <sys/wait.h>
0693e680 56#include <linux/time64.h>
78da39fa 57
1b43b704 58struct switch_output {
dc0c6127 59 bool enabled;
1b43b704 60 bool signal;
dc0c6127 61 unsigned long size;
bfacbe3b 62 unsigned long time;
cb4e1ebb
JO
63 const char *str;
64 bool set;
03724b2e
AK
65 char **filenames;
66 int num_files;
67 int cur_file;
1b43b704
JO
68};
69
8c6f45a7 70struct record {
45694aa7 71 struct perf_tool tool;
b4006796 72 struct record_opts opts;
d20deb64 73 u64 bytes_written;
8ceb41d7 74 struct perf_data data;
ef149c25 75 struct auxtrace_record *itr;
d20deb64
ACM
76 struct perf_evlist *evlist;
77 struct perf_session *session;
d20deb64 78 int realtime_prio;
d20deb64 79 bool no_buildid;
d2db9a98 80 bool no_buildid_set;
d20deb64 81 bool no_buildid_cache;
d2db9a98 82 bool no_buildid_cache_set;
6156681b 83 bool buildid_all;
ecfd7a9c 84 bool timestamp_filename;
68588baf 85 bool timestamp_boundary;
1b43b704 86 struct switch_output switch_output;
9f065194 87 unsigned long long samples;
9d2ed645 88 cpu_set_t affinity_mask;
0f82ebc4 89};
a21ca2ca 90
dc0c6127
JO
91static volatile int auxtrace_record__snapshot_started;
92static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
93static DEFINE_TRIGGER(switch_output_trigger);
94
9d2ed645
AB
95static const char *affinity_tags[PERF_AFFINITY_MAX] = {
96 "SYS", "NODE", "CPU"
97};
98
dc0c6127
JO
99static bool switch_output_signal(struct record *rec)
100{
101 return rec->switch_output.signal &&
102 trigger_is_ready(&switch_output_trigger);
103}
104
105static bool switch_output_size(struct record *rec)
106{
107 return rec->switch_output.size &&
108 trigger_is_ready(&switch_output_trigger) &&
109 (rec->bytes_written >= rec->switch_output.size);
110}
111
bfacbe3b
JO
112static bool switch_output_time(struct record *rec)
113{
114 return rec->switch_output.time &&
115 trigger_is_ready(&switch_output_trigger);
116}
117
ded2b8fe
JO
118static int record__write(struct record *rec, struct perf_mmap *map __maybe_unused,
119 void *bf, size_t size)
f5970550 120{
ded2b8fe
JO
121 struct perf_data_file *file = &rec->session->data->file;
122
123 if (perf_data_file__write(file, bf, size) < 0) {
50a9b868
JO
124 pr_err("failed to write perf data, error: %m\n");
125 return -1;
f5970550 126 }
8d3eca20 127
cf8b2e69 128 rec->bytes_written += size;
dc0c6127
JO
129
130 if (switch_output_size(rec))
131 trigger_hit(&switch_output_trigger);
132
8d3eca20 133 return 0;
f5970550
PZ
134}
135
d3d1af6f
AB
136#ifdef HAVE_AIO_SUPPORT
137static int record__aio_write(struct aiocb *cblock, int trace_fd,
138 void *buf, size_t size, off_t off)
139{
140 int rc;
141
142 cblock->aio_fildes = trace_fd;
143 cblock->aio_buf = buf;
144 cblock->aio_nbytes = size;
145 cblock->aio_offset = off;
146 cblock->aio_sigevent.sigev_notify = SIGEV_NONE;
147
148 do {
149 rc = aio_write(cblock);
150 if (rc == 0) {
151 break;
152 } else if (errno != EAGAIN) {
153 cblock->aio_fildes = -1;
154 pr_err("failed to queue perf data, error: %m\n");
155 break;
156 }
157 } while (1);
158
159 return rc;
160}
161
162static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock)
163{
164 void *rem_buf;
165 off_t rem_off;
166 size_t rem_size;
167 int rc, aio_errno;
168 ssize_t aio_ret, written;
169
170 aio_errno = aio_error(cblock);
171 if (aio_errno == EINPROGRESS)
172 return 0;
173
174 written = aio_ret = aio_return(cblock);
175 if (aio_ret < 0) {
176 if (aio_errno != EINTR)
177 pr_err("failed to write perf data, error: %m\n");
178 written = 0;
179 }
180
181 rem_size = cblock->aio_nbytes - written;
182
183 if (rem_size == 0) {
184 cblock->aio_fildes = -1;
185 /*
186 * md->refcount is incremented in perf_mmap__push() for
187 * every enqueued aio write request so decrement it because
188 * the request is now complete.
189 */
190 perf_mmap__put(md);
191 rc = 1;
192 } else {
193 /*
194 * aio write request may require restart with the
195 * reminder if the kernel didn't write whole
196 * chunk at once.
197 */
198 rem_off = cblock->aio_offset + written;
199 rem_buf = (void *)(cblock->aio_buf + written);
200 record__aio_write(cblock, cblock->aio_fildes,
201 rem_buf, rem_size, rem_off);
202 rc = 0;
203 }
204
205 return rc;
206}
207
93f20c0f 208static int record__aio_sync(struct perf_mmap *md, bool sync_all)
d3d1af6f 209{
93f20c0f
AB
210 struct aiocb **aiocb = md->aio.aiocb;
211 struct aiocb *cblocks = md->aio.cblocks;
d3d1af6f 212 struct timespec timeout = { 0, 1000 * 1000 * 1 }; /* 1ms */
93f20c0f 213 int i, do_suspend;
d3d1af6f
AB
214
215 do {
93f20c0f
AB
216 do_suspend = 0;
217 for (i = 0; i < md->aio.nr_cblocks; ++i) {
218 if (cblocks[i].aio_fildes == -1 || record__aio_complete(md, &cblocks[i])) {
219 if (sync_all)
220 aiocb[i] = NULL;
221 else
222 return i;
223 } else {
224 /*
225 * Started aio write is not complete yet
226 * so it has to be waited before the
227 * next allocation.
228 */
229 aiocb[i] = &cblocks[i];
230 do_suspend = 1;
231 }
232 }
233 if (!do_suspend)
234 return -1;
d3d1af6f 235
93f20c0f 236 while (aio_suspend((const struct aiocb **)aiocb, md->aio.nr_cblocks, &timeout)) {
d3d1af6f
AB
237 if (!(errno == EAGAIN || errno == EINTR))
238 pr_err("failed to sync perf data, error: %m\n");
239 }
240 } while (1);
241}
242
243static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off)
244{
245 struct record *rec = to;
246 int ret, trace_fd = rec->session->data->file.fd;
247
248 rec->samples++;
249
250 ret = record__aio_write(cblock, trace_fd, bf, size, off);
251 if (!ret) {
252 rec->bytes_written += size;
253 if (switch_output_size(rec))
254 trigger_hit(&switch_output_trigger);
255 }
256
257 return ret;
258}
259
260static off_t record__aio_get_pos(int trace_fd)
261{
262 return lseek(trace_fd, 0, SEEK_CUR);
263}
264
265static void record__aio_set_pos(int trace_fd, off_t pos)
266{
267 lseek(trace_fd, pos, SEEK_SET);
268}
269
270static void record__aio_mmap_read_sync(struct record *rec)
271{
272 int i;
273 struct perf_evlist *evlist = rec->evlist;
274 struct perf_mmap *maps = evlist->mmap;
275
276 if (!rec->opts.nr_cblocks)
277 return;
278
279 for (i = 0; i < evlist->nr_mmaps; i++) {
280 struct perf_mmap *map = &maps[i];
281
282 if (map->base)
93f20c0f 283 record__aio_sync(map, true);
d3d1af6f
AB
284 }
285}
286
287static int nr_cblocks_default = 1;
93f20c0f 288static int nr_cblocks_max = 4;
d3d1af6f
AB
289
290static int record__aio_parse(const struct option *opt,
93f20c0f 291 const char *str,
d3d1af6f
AB
292 int unset)
293{
294 struct record_opts *opts = (struct record_opts *)opt->value;
295
93f20c0f 296 if (unset) {
d3d1af6f 297 opts->nr_cblocks = 0;
93f20c0f
AB
298 } else {
299 if (str)
300 opts->nr_cblocks = strtol(str, NULL, 0);
301 if (!opts->nr_cblocks)
302 opts->nr_cblocks = nr_cblocks_default;
303 }
d3d1af6f
AB
304
305 return 0;
306}
307#else /* HAVE_AIO_SUPPORT */
93f20c0f
AB
308static int nr_cblocks_max = 0;
309
310static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused)
d3d1af6f 311{
93f20c0f 312 return -1;
d3d1af6f
AB
313}
314
315static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused,
316 void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused)
317{
318 return -1;
319}
320
321static off_t record__aio_get_pos(int trace_fd __maybe_unused)
322{
323 return -1;
324}
325
326static void record__aio_set_pos(int trace_fd __maybe_unused, off_t pos __maybe_unused)
327{
328}
329
330static void record__aio_mmap_read_sync(struct record *rec __maybe_unused)
331{
332}
333#endif
334
335static int record__aio_enabled(struct record *rec)
336{
337 return rec->opts.nr_cblocks > 0;
338}
339
45694aa7 340static int process_synthesized_event(struct perf_tool *tool,
d20deb64 341 union perf_event *event,
1d037ca1
IT
342 struct perf_sample *sample __maybe_unused,
343 struct machine *machine __maybe_unused)
234fbbf5 344{
8c6f45a7 345 struct record *rec = container_of(tool, struct record, tool);
ded2b8fe 346 return record__write(rec, NULL, event, event->header.size);
234fbbf5
ACM
347}
348
ded2b8fe 349static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size)
d37f1586
ACM
350{
351 struct record *rec = to;
352
353 rec->samples++;
ded2b8fe 354 return record__write(rec, map, bf, size);
d37f1586
ACM
355}
356
2dd6d8a1
AH
357static volatile int done;
358static volatile int signr = -1;
359static volatile int child_finished;
c0bdc1c4 360
2dd6d8a1
AH
361static void sig_handler(int sig)
362{
363 if (sig == SIGCHLD)
364 child_finished = 1;
365 else
366 signr = sig;
367
368 done = 1;
369}
370
a074865e
WN
371static void sigsegv_handler(int sig)
372{
373 perf_hooks__recover();
374 sighandler_dump_stack(sig);
375}
376
2dd6d8a1
AH
377static void record__sig_exit(void)
378{
379 if (signr == -1)
380 return;
381
382 signal(signr, SIG_DFL);
383 raise(signr);
384}
385
e31f0d01
AH
386#ifdef HAVE_AUXTRACE_SUPPORT
387
ef149c25 388static int record__process_auxtrace(struct perf_tool *tool,
ded2b8fe 389 struct perf_mmap *map,
ef149c25
AH
390 union perf_event *event, void *data1,
391 size_t len1, void *data2, size_t len2)
392{
393 struct record *rec = container_of(tool, struct record, tool);
8ceb41d7 394 struct perf_data *data = &rec->data;
ef149c25
AH
395 size_t padding;
396 u8 pad[8] = {0};
397
cd3dd8dd 398 if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) {
99fa2984 399 off_t file_offset;
8ceb41d7 400 int fd = perf_data__fd(data);
99fa2984
AH
401 int err;
402
403 file_offset = lseek(fd, 0, SEEK_CUR);
404 if (file_offset == -1)
405 return -1;
406 err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
407 event, file_offset);
408 if (err)
409 return err;
410 }
411
ef149c25
AH
412 /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
413 padding = (len1 + len2) & 7;
414 if (padding)
415 padding = 8 - padding;
416
ded2b8fe
JO
417 record__write(rec, map, event, event->header.size);
418 record__write(rec, map, data1, len1);
ef149c25 419 if (len2)
ded2b8fe
JO
420 record__write(rec, map, data2, len2);
421 record__write(rec, map, &pad, padding);
ef149c25
AH
422
423 return 0;
424}
425
426static int record__auxtrace_mmap_read(struct record *rec,
e035f4ca 427 struct perf_mmap *map)
ef149c25
AH
428{
429 int ret;
430
e035f4ca 431 ret = auxtrace_mmap__read(map, rec->itr, &rec->tool,
ef149c25
AH
432 record__process_auxtrace);
433 if (ret < 0)
434 return ret;
435
436 if (ret)
437 rec->samples++;
438
439 return 0;
440}
441
2dd6d8a1 442static int record__auxtrace_mmap_read_snapshot(struct record *rec,
e035f4ca 443 struct perf_mmap *map)
2dd6d8a1
AH
444{
445 int ret;
446
e035f4ca 447 ret = auxtrace_mmap__read_snapshot(map, rec->itr, &rec->tool,
2dd6d8a1
AH
448 record__process_auxtrace,
449 rec->opts.auxtrace_snapshot_size);
450 if (ret < 0)
451 return ret;
452
453 if (ret)
454 rec->samples++;
455
456 return 0;
457}
458
459static int record__auxtrace_read_snapshot_all(struct record *rec)
460{
461 int i;
462 int rc = 0;
463
464 for (i = 0; i < rec->evlist->nr_mmaps; i++) {
e035f4ca 465 struct perf_mmap *map = &rec->evlist->mmap[i];
2dd6d8a1 466
e035f4ca 467 if (!map->auxtrace_mmap.base)
2dd6d8a1
AH
468 continue;
469
e035f4ca 470 if (record__auxtrace_mmap_read_snapshot(rec, map) != 0) {
2dd6d8a1
AH
471 rc = -1;
472 goto out;
473 }
474 }
475out:
476 return rc;
477}
478
479static void record__read_auxtrace_snapshot(struct record *rec)
480{
481 pr_debug("Recording AUX area tracing snapshot\n");
482 if (record__auxtrace_read_snapshot_all(rec) < 0) {
5f9cf599 483 trigger_error(&auxtrace_snapshot_trigger);
2dd6d8a1 484 } else {
5f9cf599
WN
485 if (auxtrace_record__snapshot_finish(rec->itr))
486 trigger_error(&auxtrace_snapshot_trigger);
487 else
488 trigger_ready(&auxtrace_snapshot_trigger);
2dd6d8a1
AH
489 }
490}
491
4b5ea3bd
AH
492static int record__auxtrace_init(struct record *rec)
493{
494 int err;
495
496 if (!rec->itr) {
497 rec->itr = auxtrace_record__init(rec->evlist, &err);
498 if (err)
499 return err;
500 }
501
502 err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
503 rec->opts.auxtrace_snapshot_opts);
504 if (err)
505 return err;
506
507 return auxtrace_parse_filters(rec->evlist);
508}
509
e31f0d01
AH
510#else
511
512static inline
513int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
e035f4ca 514 struct perf_mmap *map __maybe_unused)
e31f0d01
AH
515{
516 return 0;
517}
518
2dd6d8a1
AH
519static inline
520void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
de9ac07b 521{
f7b7c26e
PZ
522}
523
2dd6d8a1
AH
524static inline
525int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
f7b7c26e 526{
2dd6d8a1 527 return 0;
de9ac07b
PZ
528}
529
4b5ea3bd
AH
530static int record__auxtrace_init(struct record *rec __maybe_unused)
531{
532 return 0;
533}
534
2dd6d8a1
AH
535#endif
536
cda57a8c
WN
537static int record__mmap_evlist(struct record *rec,
538 struct perf_evlist *evlist)
539{
540 struct record_opts *opts = &rec->opts;
541 char msg[512];
542
f13de660
AB
543 if (opts->affinity != PERF_AFFINITY_SYS)
544 cpu__setup_cpunode_map();
545
7a276ff6 546 if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
cda57a8c 547 opts->auxtrace_mmap_pages,
9d2ed645
AB
548 opts->auxtrace_snapshot_mode,
549 opts->nr_cblocks, opts->affinity) < 0) {
cda57a8c
WN
550 if (errno == EPERM) {
551 pr_err("Permission error mapping pages.\n"
552 "Consider increasing "
553 "/proc/sys/kernel/perf_event_mlock_kb,\n"
554 "or try again with a smaller value of -m/--mmap_pages.\n"
555 "(current value: %u,%u)\n",
556 opts->mmap_pages, opts->auxtrace_mmap_pages);
557 return -errno;
558 } else {
559 pr_err("failed to mmap with %d (%s)\n", errno,
c8b5f2c9 560 str_error_r(errno, msg, sizeof(msg)));
cda57a8c
WN
561 if (errno)
562 return -errno;
563 else
564 return -EINVAL;
565 }
566 }
567 return 0;
568}
569
570static int record__mmap(struct record *rec)
571{
572 return record__mmap_evlist(rec, rec->evlist);
573}
574
8c6f45a7 575static int record__open(struct record *rec)
dd7927f4 576{
d6195a6a 577 char msg[BUFSIZ];
6a4bb04c 578 struct perf_evsel *pos;
d20deb64
ACM
579 struct perf_evlist *evlist = rec->evlist;
580 struct perf_session *session = rec->session;
b4006796 581 struct record_opts *opts = &rec->opts;
8d3eca20 582 int rc = 0;
dd7927f4 583
d3dbf43c
ACM
584 /*
585 * For initial_delay we need to add a dummy event so that we can track
586 * PERF_RECORD_MMAP while we wait for the initial delay to enable the
587 * real events, the ones asked by the user.
588 */
589 if (opts->initial_delay) {
590 if (perf_evlist__add_dummy(evlist))
591 return -ENOMEM;
592
593 pos = perf_evlist__first(evlist);
594 pos->tracking = 0;
595 pos = perf_evlist__last(evlist);
596 pos->tracking = 1;
597 pos->attr.enable_on_exec = 1;
598 }
599
e68ae9cf 600 perf_evlist__config(evlist, opts, &callchain_param);
cac21425 601
e5cadb93 602 evlist__for_each_entry(evlist, pos) {
dd7927f4 603try_again:
d988d5ee 604 if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
56e52e85 605 if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
bb963e16 606 if (verbose > 0)
c0a54341 607 ui__warning("%s\n", msg);
d6d901c2
ZY
608 goto try_again;
609 }
cf99ad14
AK
610 if ((errno == EINVAL || errno == EBADF) &&
611 pos->leader != pos &&
612 pos->weak_group) {
613 pos = perf_evlist__reset_weak_group(evlist, pos);
614 goto try_again;
615 }
56e52e85
ACM
616 rc = -errno;
617 perf_evsel__open_strerror(pos, &opts->target,
618 errno, msg, sizeof(msg));
619 ui__error("%s\n", msg);
8d3eca20 620 goto out;
c171b552 621 }
bfd8f72c
AK
622
623 pos->supported = true;
c171b552 624 }
a43d3f08 625
23d4aad4 626 if (perf_evlist__apply_filters(evlist, &pos)) {
62d94b00 627 pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
23d4aad4 628 pos->filter, perf_evsel__name(pos), errno,
c8b5f2c9 629 str_error_r(errno, msg, sizeof(msg)));
8d3eca20 630 rc = -1;
5d8bb1ec
MP
631 goto out;
632 }
633
cda57a8c
WN
634 rc = record__mmap(rec);
635 if (rc)
8d3eca20 636 goto out;
0a27d7f9 637
563aecb2 638 session->evlist = evlist;
7b56cce2 639 perf_session__set_id_hdr_size(session);
8d3eca20
DA
640out:
641 return rc;
16c8a109
PZ
642}
643
e3d59112
NK
644static int process_sample_event(struct perf_tool *tool,
645 union perf_event *event,
646 struct perf_sample *sample,
647 struct perf_evsel *evsel,
648 struct machine *machine)
649{
650 struct record *rec = container_of(tool, struct record, tool);
651
68588baf
JY
652 if (rec->evlist->first_sample_time == 0)
653 rec->evlist->first_sample_time = sample->time;
654
655 rec->evlist->last_sample_time = sample->time;
e3d59112 656
68588baf
JY
657 if (rec->buildid_all)
658 return 0;
659
660 rec->samples++;
e3d59112
NK
661 return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
662}
663
8c6f45a7 664static int process_buildids(struct record *rec)
6122e4e4 665{
f5fc1412 666 struct perf_session *session = rec->session;
6122e4e4 667
45112e89 668 if (perf_data__size(&rec->data) == 0)
9f591fd7
ACM
669 return 0;
670
00dc8657
NK
671 /*
672 * During this process, it'll load kernel map and replace the
673 * dso->long_name to a real pathname it found. In this case
674 * we prefer the vmlinux path like
675 * /lib/modules/3.16.4/build/vmlinux
676 *
677 * rather than build-id path (in debug directory).
678 * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
679 */
680 symbol_conf.ignore_vmlinux_buildid = true;
681
6156681b
NK
682 /*
683 * If --buildid-all is given, it marks all DSO regardless of hits,
68588baf
JY
684 * so no need to process samples. But if timestamp_boundary is enabled,
685 * it still needs to walk on all samples to get the timestamps of
686 * first/last samples.
6156681b 687 */
68588baf 688 if (rec->buildid_all && !rec->timestamp_boundary)
6156681b
NK
689 rec->tool.sample = NULL;
690
b7b61cbe 691 return perf_session__process_events(session);
6122e4e4
ACM
692}
693
8115d60c 694static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
a1645ce1
ZY
695{
696 int err;
45694aa7 697 struct perf_tool *tool = data;
a1645ce1
ZY
698 /*
699 *As for guest kernel when processing subcommand record&report,
700 *we arrange module mmap prior to guest kernel mmap and trigger
701 *a preload dso because default guest module symbols are loaded
702 *from guest kallsyms instead of /lib/modules/XXX/XXX. This
703 *method is used to avoid symbol missing when the first addr is
704 *in module instead of in guest kernel.
705 */
45694aa7 706 err = perf_event__synthesize_modules(tool, process_synthesized_event,
743eb868 707 machine);
a1645ce1
ZY
708 if (err < 0)
709 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 710 " relocation symbol.\n", machine->pid);
a1645ce1 711
a1645ce1
ZY
712 /*
713 * We use _stext for guest kernel because guest kernel's /proc/kallsyms
714 * have no _text sometimes.
715 */
45694aa7 716 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
0ae617be 717 machine);
a1645ce1
ZY
718 if (err < 0)
719 pr_err("Couldn't record guest kernel [%d]'s reference"
23346f21 720 " relocation symbol.\n", machine->pid);
a1645ce1
ZY
721}
722
98402807
FW
723static struct perf_event_header finished_round_event = {
724 .size = sizeof(struct perf_event_header),
725 .type = PERF_RECORD_FINISHED_ROUND,
726};
727
f13de660
AB
728static void record__adjust_affinity(struct record *rec, struct perf_mmap *map)
729{
730 if (rec->opts.affinity != PERF_AFFINITY_SYS &&
731 !CPU_EQUAL(&rec->affinity_mask, &map->affinity_mask)) {
732 CPU_ZERO(&rec->affinity_mask);
733 CPU_OR(&rec->affinity_mask, &rec->affinity_mask, &map->affinity_mask);
734 sched_setaffinity(0, sizeof(rec->affinity_mask), &rec->affinity_mask);
735 }
736}
737
a4ea0ec4 738static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
0b72d69a 739 bool overwrite)
98402807 740{
dcabb507 741 u64 bytes_written = rec->bytes_written;
0e2e63dd 742 int i;
8d3eca20 743 int rc = 0;
a4ea0ec4 744 struct perf_mmap *maps;
d3d1af6f
AB
745 int trace_fd = rec->data.file.fd;
746 off_t off;
98402807 747
cb21686b
WN
748 if (!evlist)
749 return 0;
ef149c25 750
0b72d69a 751 maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
a4ea0ec4
WN
752 if (!maps)
753 return 0;
754
0b72d69a 755 if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
54cc54de
WN
756 return 0;
757
d3d1af6f
AB
758 if (record__aio_enabled(rec))
759 off = record__aio_get_pos(trace_fd);
760
cb21686b 761 for (i = 0; i < evlist->nr_mmaps; i++) {
e035f4ca 762 struct perf_mmap *map = &maps[i];
cb21686b 763
e035f4ca 764 if (map->base) {
f13de660 765 record__adjust_affinity(rec, map);
d3d1af6f
AB
766 if (!record__aio_enabled(rec)) {
767 if (perf_mmap__push(map, rec, record__pushfn) != 0) {
768 rc = -1;
769 goto out;
770 }
771 } else {
93f20c0f 772 int idx;
d3d1af6f
AB
773 /*
774 * Call record__aio_sync() to wait till map->data buffer
775 * becomes available after previous aio write request.
776 */
93f20c0f
AB
777 idx = record__aio_sync(map, false);
778 if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) {
d3d1af6f
AB
779 record__aio_set_pos(trace_fd, off);
780 rc = -1;
781 goto out;
782 }
8d3eca20
DA
783 }
784 }
ef149c25 785
e035f4ca
JO
786 if (map->auxtrace_mmap.base && !rec->opts.auxtrace_snapshot_mode &&
787 record__auxtrace_mmap_read(rec, map) != 0) {
ef149c25
AH
788 rc = -1;
789 goto out;
790 }
98402807
FW
791 }
792
d3d1af6f
AB
793 if (record__aio_enabled(rec))
794 record__aio_set_pos(trace_fd, off);
795
dcabb507
JO
796 /*
797 * Mark the round finished in case we wrote
798 * at least one event.
799 */
800 if (bytes_written != rec->bytes_written)
ded2b8fe 801 rc = record__write(rec, NULL, &finished_round_event, sizeof(finished_round_event));
8d3eca20 802
0b72d69a 803 if (overwrite)
54cc54de 804 perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
8d3eca20
DA
805out:
806 return rc;
98402807
FW
807}
808
cb21686b
WN
809static int record__mmap_read_all(struct record *rec)
810{
811 int err;
812
a4ea0ec4 813 err = record__mmap_read_evlist(rec, rec->evlist, false);
cb21686b
WN
814 if (err)
815 return err;
816
05737464 817 return record__mmap_read_evlist(rec, rec->evlist, true);
cb21686b
WN
818}
819
8c6f45a7 820static void record__init_features(struct record *rec)
57706abc 821{
57706abc
DA
822 struct perf_session *session = rec->session;
823 int feat;
824
825 for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
826 perf_header__set_feat(&session->header, feat);
827
828 if (rec->no_buildid)
829 perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
830
3e2be2da 831 if (!have_tracepoints(&rec->evlist->entries))
57706abc
DA
832 perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
833
834 if (!rec->opts.branch_stack)
835 perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
ef149c25
AH
836
837 if (!rec->opts.full_auxtrace)
838 perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
ffa517ad 839
cf790516
AB
840 if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
841 perf_header__clear_feat(&session->header, HEADER_CLOCKID);
842
258031c0
JO
843 perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
844
ffa517ad 845 perf_header__clear_feat(&session->header, HEADER_STAT);
57706abc
DA
846}
847
e1ab48ba
WN
848static void
849record__finish_output(struct record *rec)
850{
8ceb41d7
JO
851 struct perf_data *data = &rec->data;
852 int fd = perf_data__fd(data);
e1ab48ba 853
8ceb41d7 854 if (data->is_pipe)
e1ab48ba
WN
855 return;
856
857 rec->session->header.data_size += rec->bytes_written;
45112e89 858 data->file.size = lseek(perf_data__fd(data), 0, SEEK_CUR);
e1ab48ba
WN
859
860 if (!rec->no_buildid) {
861 process_buildids(rec);
862
863 if (rec->buildid_all)
864 dsos__hit_all(rec->session);
865 }
866 perf_session__write_header(rec->session, rec->evlist, fd, true);
867
868 return;
869}
870
4ea648ae 871static int record__synthesize_workload(struct record *rec, bool tail)
be7b0c9e 872{
9d6aae72
ACM
873 int err;
874 struct thread_map *thread_map;
be7b0c9e 875
4ea648ae
WN
876 if (rec->opts.tail_synthesize != tail)
877 return 0;
878
9d6aae72
ACM
879 thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
880 if (thread_map == NULL)
881 return -1;
882
883 err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
be7b0c9e
WN
884 process_synthesized_event,
885 &rec->session->machines.host,
3fcb10e4 886 rec->opts.sample_address);
9d6aae72
ACM
887 thread_map__put(thread_map);
888 return err;
be7b0c9e
WN
889}
890
4ea648ae 891static int record__synthesize(struct record *rec, bool tail);
3c1cb7e3 892
ecfd7a9c
WN
893static int
894record__switch_output(struct record *rec, bool at_exit)
895{
8ceb41d7 896 struct perf_data *data = &rec->data;
ecfd7a9c 897 int fd, err;
03724b2e 898 char *new_filename;
ecfd7a9c
WN
899
900 /* Same Size: "2015122520103046"*/
901 char timestamp[] = "InvalidTimestamp";
902
d3d1af6f
AB
903 record__aio_mmap_read_sync(rec);
904
4ea648ae
WN
905 record__synthesize(rec, true);
906 if (target__none(&rec->opts.target))
907 record__synthesize_workload(rec, true);
908
ecfd7a9c
WN
909 rec->samples = 0;
910 record__finish_output(rec);
911 err = fetch_current_timestamp(timestamp, sizeof(timestamp));
912 if (err) {
913 pr_err("Failed to get current timestamp\n");
914 return -EINVAL;
915 }
916
8ceb41d7 917 fd = perf_data__switch(data, timestamp,
ecfd7a9c 918 rec->session->header.data_offset,
03724b2e 919 at_exit, &new_filename);
ecfd7a9c
WN
920 if (fd >= 0 && !at_exit) {
921 rec->bytes_written = 0;
922 rec->session->header.data_size = 0;
923 }
924
925 if (!quiet)
926 fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
2d4f2799 927 data->path, timestamp);
3c1cb7e3 928
03724b2e
AK
929 if (rec->switch_output.num_files) {
930 int n = rec->switch_output.cur_file + 1;
931
932 if (n >= rec->switch_output.num_files)
933 n = 0;
934 rec->switch_output.cur_file = n;
935 if (rec->switch_output.filenames[n]) {
936 remove(rec->switch_output.filenames[n]);
937 free(rec->switch_output.filenames[n]);
938 }
939 rec->switch_output.filenames[n] = new_filename;
940 } else {
941 free(new_filename);
942 }
943
3c1cb7e3 944 /* Output tracking events */
be7b0c9e 945 if (!at_exit) {
4ea648ae 946 record__synthesize(rec, false);
3c1cb7e3 947
be7b0c9e
WN
948 /*
949 * In 'perf record --switch-output' without -a,
950 * record__synthesize() in record__switch_output() won't
951 * generate tracking events because there's no thread_map
952 * in evlist. Which causes newly created perf.data doesn't
953 * contain map and comm information.
954 * Create a fake thread_map and directly call
955 * perf_event__synthesize_thread_map() for those events.
956 */
957 if (target__none(&rec->opts.target))
4ea648ae 958 record__synthesize_workload(rec, false);
be7b0c9e 959 }
ecfd7a9c
WN
960 return fd;
961}
962
f33cbe72
ACM
963static volatile int workload_exec_errno;
964
965/*
966 * perf_evlist__prepare_workload will send a SIGUSR1
967 * if the fork fails, since we asked by setting its
968 * want_signal to true.
969 */
45604710
NK
970static void workload_exec_failed_signal(int signo __maybe_unused,
971 siginfo_t *info,
f33cbe72
ACM
972 void *ucontext __maybe_unused)
973{
974 workload_exec_errno = info->si_value.sival_int;
975 done = 1;
f33cbe72
ACM
976 child_finished = 1;
977}
978
2dd6d8a1 979static void snapshot_sig_handler(int sig);
bfacbe3b 980static void alarm_sig_handler(int sig);
2dd6d8a1 981
46bc29b9
AH
982int __weak
983perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
984 struct perf_tool *tool __maybe_unused,
985 perf_event__handler_t process __maybe_unused,
986 struct machine *machine __maybe_unused)
987{
988 return 0;
989}
990
ee667f94
WN
991static const struct perf_event_mmap_page *
992perf_evlist__pick_pc(struct perf_evlist *evlist)
993{
b2cb615d
WN
994 if (evlist) {
995 if (evlist->mmap && evlist->mmap[0].base)
996 return evlist->mmap[0].base;
0b72d69a
WN
997 if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
998 return evlist->overwrite_mmap[0].base;
b2cb615d 999 }
ee667f94
WN
1000 return NULL;
1001}
1002
c45628b0
WN
1003static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
1004{
ee667f94
WN
1005 const struct perf_event_mmap_page *pc;
1006
1007 pc = perf_evlist__pick_pc(rec->evlist);
1008 if (pc)
1009 return pc;
c45628b0
WN
1010 return NULL;
1011}
1012
4ea648ae 1013static int record__synthesize(struct record *rec, bool tail)
c45c86eb
WN
1014{
1015 struct perf_session *session = rec->session;
1016 struct machine *machine = &session->machines.host;
8ceb41d7 1017 struct perf_data *data = &rec->data;
c45c86eb
WN
1018 struct record_opts *opts = &rec->opts;
1019 struct perf_tool *tool = &rec->tool;
8ceb41d7 1020 int fd = perf_data__fd(data);
c45c86eb
WN
1021 int err = 0;
1022
4ea648ae
WN
1023 if (rec->opts.tail_synthesize != tail)
1024 return 0;
1025
8ceb41d7 1026 if (data->is_pipe) {
a2015516
JO
1027 /*
1028 * We need to synthesize events first, because some
1029 * features works on top of them (on report side).
1030 */
318ec184 1031 err = perf_event__synthesize_attrs(tool, rec->evlist,
c45c86eb
WN
1032 process_synthesized_event);
1033 if (err < 0) {
1034 pr_err("Couldn't synthesize attrs.\n");
1035 goto out;
1036 }
1037
a2015516
JO
1038 err = perf_event__synthesize_features(tool, session, rec->evlist,
1039 process_synthesized_event);
1040 if (err < 0) {
1041 pr_err("Couldn't synthesize features.\n");
1042 return err;
1043 }
1044
c45c86eb
WN
1045 if (have_tracepoints(&rec->evlist->entries)) {
1046 /*
1047 * FIXME err <= 0 here actually means that
1048 * there were no tracepoints so its not really
1049 * an error, just that we don't need to
1050 * synthesize anything. We really have to
1051 * return this more properly and also
1052 * propagate errors that now are calling die()
1053 */
1054 err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
1055 process_synthesized_event);
1056 if (err <= 0) {
1057 pr_err("Couldn't record tracing data.\n");
1058 goto out;
1059 }
1060 rec->bytes_written += err;
1061 }
1062 }
1063
c45628b0 1064 err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
46bc29b9
AH
1065 process_synthesized_event, machine);
1066 if (err)
1067 goto out;
1068
c45c86eb
WN
1069 if (rec->opts.full_auxtrace) {
1070 err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
1071 session, process_synthesized_event);
1072 if (err)
1073 goto out;
1074 }
1075
6c443954
ACM
1076 if (!perf_evlist__exclude_kernel(rec->evlist)) {
1077 err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
1078 machine);
1079 WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
1080 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1081 "Check /proc/kallsyms permission or run as root.\n");
1082
1083 err = perf_event__synthesize_modules(tool, process_synthesized_event,
1084 machine);
1085 WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
1086 "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
1087 "Check /proc/modules permission or run as root.\n");
1088 }
c45c86eb
WN
1089
1090 if (perf_guest) {
1091 machines__process_guests(&session->machines,
1092 perf_event__synthesize_guest_os, tool);
1093 }
1094
bfd8f72c
AK
1095 err = perf_event__synthesize_extra_attr(&rec->tool,
1096 rec->evlist,
1097 process_synthesized_event,
1098 data->is_pipe);
1099 if (err)
1100 goto out;
1101
373565d2
AK
1102 err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
1103 process_synthesized_event,
1104 NULL);
1105 if (err < 0) {
1106 pr_err("Couldn't synthesize thread map.\n");
1107 return err;
1108 }
1109
1110 err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
1111 process_synthesized_event, NULL);
1112 if (err < 0) {
1113 pr_err("Couldn't synthesize cpu map.\n");
1114 return err;
1115 }
1116
e5416950 1117 err = perf_event__synthesize_bpf_events(session, process_synthesized_event,
7b612e29
SL
1118 machine, opts);
1119 if (err < 0)
1120 pr_warning("Couldn't synthesize bpf events.\n");
1121
c45c86eb
WN
1122 err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
1123 process_synthesized_event, opts->sample_address,
3fcb10e4 1124 1);
c45c86eb
WN
1125out:
1126 return err;
1127}
1128
8c6f45a7 1129static int __cmd_record(struct record *rec, int argc, const char **argv)
16c8a109 1130{
57706abc 1131 int err;
45604710 1132 int status = 0;
8b412664 1133 unsigned long waking = 0;
46be604b 1134 const bool forks = argc > 0;
45694aa7 1135 struct perf_tool *tool = &rec->tool;
b4006796 1136 struct record_opts *opts = &rec->opts;
8ceb41d7 1137 struct perf_data *data = &rec->data;
d20deb64 1138 struct perf_session *session;
6dcf45ef 1139 bool disabled = false, draining = false;
42aa276f 1140 int fd;
de9ac07b 1141
45604710 1142 atexit(record__sig_exit);
f5970550
PZ
1143 signal(SIGCHLD, sig_handler);
1144 signal(SIGINT, sig_handler);
804f7ac7 1145 signal(SIGTERM, sig_handler);
a074865e 1146 signal(SIGSEGV, sigsegv_handler);
c0bdc1c4 1147
f3b3614a
HB
1148 if (rec->opts.record_namespaces)
1149 tool->namespace_events = true;
1150
dc0c6127 1151 if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
2dd6d8a1 1152 signal(SIGUSR2, snapshot_sig_handler);
3c1cb7e3
WN
1153 if (rec->opts.auxtrace_snapshot_mode)
1154 trigger_on(&auxtrace_snapshot_trigger);
dc0c6127 1155 if (rec->switch_output.enabled)
3c1cb7e3 1156 trigger_on(&switch_output_trigger);
c0bdc1c4 1157 } else {
2dd6d8a1 1158 signal(SIGUSR2, SIG_IGN);
c0bdc1c4 1159 }
f5970550 1160
8ceb41d7 1161 session = perf_session__new(data, false, tool);
94c744b6 1162 if (session == NULL) {
ffa91880 1163 pr_err("Perf session creation failed.\n");
a9a70bbc
ACM
1164 return -1;
1165 }
1166
8ceb41d7 1167 fd = perf_data__fd(data);
d20deb64
ACM
1168 rec->session = session;
1169
8c6f45a7 1170 record__init_features(rec);
330aa675 1171
cf790516
AB
1172 if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
1173 session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
1174
d4db3f16 1175 if (forks) {
3e2be2da 1176 err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
8ceb41d7 1177 argv, data->is_pipe,
735f7e0b 1178 workload_exec_failed_signal);
35b9d88e
ACM
1179 if (err < 0) {
1180 pr_err("Couldn't run the workload!\n");
45604710 1181 status = err;
35b9d88e 1182 goto out_delete_session;
856e9660 1183 }
856e9660
PZ
1184 }
1185
ad46e48c
JO
1186 /*
1187 * If we have just single event and are sending data
1188 * through pipe, we need to force the ids allocation,
1189 * because we synthesize event name through the pipe
1190 * and need the id for that.
1191 */
1192 if (data->is_pipe && rec->evlist->nr_entries == 1)
1193 rec->opts.sample_id = true;
1194
8c6f45a7 1195 if (record__open(rec) != 0) {
8d3eca20 1196 err = -1;
45604710 1197 goto out_child;
8d3eca20 1198 }
de9ac07b 1199
8690a2a7
WN
1200 err = bpf__apply_obj_config();
1201 if (err) {
1202 char errbuf[BUFSIZ];
1203
1204 bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
1205 pr_err("ERROR: Apply config to BPF failed: %s\n",
1206 errbuf);
1207 goto out_child;
1208 }
1209
cca8482c
AH
1210 /*
1211 * Normally perf_session__new would do this, but it doesn't have the
1212 * evlist.
1213 */
1214 if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
1215 pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
1216 rec->tool.ordered_events = false;
1217 }
1218
3e2be2da 1219 if (!rec->evlist->nr_groups)
a8bb559b
NK
1220 perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
1221
8ceb41d7 1222 if (data->is_pipe) {
42aa276f 1223 err = perf_header__write_pipe(fd);
529870e3 1224 if (err < 0)
45604710 1225 goto out_child;
563aecb2 1226 } else {
42aa276f 1227 err = perf_session__write_header(session, rec->evlist, fd, false);
d5eed904 1228 if (err < 0)
45604710 1229 goto out_child;
56b03f3c
ACM
1230 }
1231
d3665498 1232 if (!rec->no_buildid
e20960c0 1233 && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
d3665498 1234 pr_err("Couldn't generate buildids. "
e20960c0 1235 "Use --no-buildid to profile anyway.\n");
8d3eca20 1236 err = -1;
45604710 1237 goto out_child;
e20960c0
RR
1238 }
1239
4ea648ae 1240 err = record__synthesize(rec, false);
c45c86eb 1241 if (err < 0)
45604710 1242 goto out_child;
8d3eca20 1243
d20deb64 1244 if (rec->realtime_prio) {
de9ac07b
PZ
1245 struct sched_param param;
1246
d20deb64 1247 param.sched_priority = rec->realtime_prio;
de9ac07b 1248 if (sched_setscheduler(0, SCHED_FIFO, &param)) {
6beba7ad 1249 pr_err("Could not set realtime priority.\n");
8d3eca20 1250 err = -1;
45604710 1251 goto out_child;
de9ac07b
PZ
1252 }
1253 }
1254
774cb499
JO
1255 /*
1256 * When perf is starting the traced process, all the events
1257 * (apart from group members) have enable_on_exec=1 set,
1258 * so don't spoil it by prematurely enabling them.
1259 */
6619a53e 1260 if (!target__none(&opts->target) && !opts->initial_delay)
3e2be2da 1261 perf_evlist__enable(rec->evlist);
764e16a3 1262
856e9660
PZ
1263 /*
1264 * Let the child rip
1265 */
e803cf97 1266 if (forks) {
20a8a3cf 1267 struct machine *machine = &session->machines.host;
e5bed564 1268 union perf_event *event;
e907caf3 1269 pid_t tgid;
e5bed564
NK
1270
1271 event = malloc(sizeof(event->comm) + machine->id_hdr_size);
1272 if (event == NULL) {
1273 err = -ENOMEM;
1274 goto out_child;
1275 }
1276
e803cf97
NK
1277 /*
1278 * Some H/W events are generated before COMM event
1279 * which is emitted during exec(), so perf script
1280 * cannot see a correct process name for those events.
1281 * Synthesize COMM event to prevent it.
1282 */
e907caf3
HB
1283 tgid = perf_event__synthesize_comm(tool, event,
1284 rec->evlist->workload.pid,
1285 process_synthesized_event,
1286 machine);
1287 free(event);
1288
1289 if (tgid == -1)
1290 goto out_child;
1291
1292 event = malloc(sizeof(event->namespaces) +
1293 (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
1294 machine->id_hdr_size);
1295 if (event == NULL) {
1296 err = -ENOMEM;
1297 goto out_child;
1298 }
1299
1300 /*
1301 * Synthesize NAMESPACES event for the command specified.
1302 */
1303 perf_event__synthesize_namespaces(tool, event,
1304 rec->evlist->workload.pid,
1305 tgid, process_synthesized_event,
1306 machine);
e5bed564 1307 free(event);
e803cf97 1308
3e2be2da 1309 perf_evlist__start_workload(rec->evlist);
e803cf97 1310 }
856e9660 1311
6619a53e 1312 if (opts->initial_delay) {
0693e680 1313 usleep(opts->initial_delay * USEC_PER_MSEC);
6619a53e
AK
1314 perf_evlist__enable(rec->evlist);
1315 }
1316
5f9cf599 1317 trigger_ready(&auxtrace_snapshot_trigger);
3c1cb7e3 1318 trigger_ready(&switch_output_trigger);
a074865e 1319 perf_hooks__invoke_record_start();
649c48a9 1320 for (;;) {
9f065194 1321 unsigned long long hits = rec->samples;
de9ac07b 1322
05737464
WN
1323 /*
1324 * rec->evlist->bkw_mmap_state is possible to be
1325 * BKW_MMAP_EMPTY here: when done == true and
1326 * hits != rec->samples in previous round.
1327 *
1328 * perf_evlist__toggle_bkw_mmap ensure we never
1329 * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
1330 */
1331 if (trigger_is_hit(&switch_output_trigger) || done || draining)
1332 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
1333
8c6f45a7 1334 if (record__mmap_read_all(rec) < 0) {
5f9cf599 1335 trigger_error(&auxtrace_snapshot_trigger);
3c1cb7e3 1336 trigger_error(&switch_output_trigger);
8d3eca20 1337 err = -1;
45604710 1338 goto out_child;
8d3eca20 1339 }
de9ac07b 1340
2dd6d8a1
AH
1341 if (auxtrace_record__snapshot_started) {
1342 auxtrace_record__snapshot_started = 0;
5f9cf599 1343 if (!trigger_is_error(&auxtrace_snapshot_trigger))
2dd6d8a1 1344 record__read_auxtrace_snapshot(rec);
5f9cf599 1345 if (trigger_is_error(&auxtrace_snapshot_trigger)) {
2dd6d8a1
AH
1346 pr_err("AUX area tracing snapshot failed\n");
1347 err = -1;
1348 goto out_child;
1349 }
1350 }
1351
3c1cb7e3 1352 if (trigger_is_hit(&switch_output_trigger)) {
05737464
WN
1353 /*
1354 * If switch_output_trigger is hit, the data in
1355 * overwritable ring buffer should have been collected,
1356 * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
1357 *
1358 * If SIGUSR2 raise after or during record__mmap_read_all(),
1359 * record__mmap_read_all() didn't collect data from
1360 * overwritable ring buffer. Read again.
1361 */
1362 if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
1363 continue;
3c1cb7e3
WN
1364 trigger_ready(&switch_output_trigger);
1365
05737464
WN
1366 /*
1367 * Reenable events in overwrite ring buffer after
1368 * record__mmap_read_all(): we should have collected
1369 * data from it.
1370 */
1371 perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
1372
3c1cb7e3
WN
1373 if (!quiet)
1374 fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
1375 waking);
1376 waking = 0;
1377 fd = record__switch_output(rec, false);
1378 if (fd < 0) {
1379 pr_err("Failed to switch to new file\n");
1380 trigger_error(&switch_output_trigger);
1381 err = fd;
1382 goto out_child;
1383 }
bfacbe3b
JO
1384
1385 /* re-arm the alarm */
1386 if (rec->switch_output.time)
1387 alarm(rec->switch_output.time);
3c1cb7e3
WN
1388 }
1389
d20deb64 1390 if (hits == rec->samples) {
6dcf45ef 1391 if (done || draining)
649c48a9 1392 break;
f66a889d 1393 err = perf_evlist__poll(rec->evlist, -1);
a515114f
JO
1394 /*
1395 * Propagate error, only if there's any. Ignore positive
1396 * number of returned events and interrupt error.
1397 */
1398 if (err > 0 || (err < 0 && errno == EINTR))
45604710 1399 err = 0;
8b412664 1400 waking++;
6dcf45ef
ACM
1401
1402 if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
1403 draining = true;
8b412664
PZ
1404 }
1405
774cb499
JO
1406 /*
1407 * When perf is starting the traced process, at the end events
1408 * die with the process and we wait for that. Thus no need to
1409 * disable events in this case.
1410 */
602ad878 1411 if (done && !disabled && !target__none(&opts->target)) {
5f9cf599 1412 trigger_off(&auxtrace_snapshot_trigger);
3e2be2da 1413 perf_evlist__disable(rec->evlist);
2711926a
JO
1414 disabled = true;
1415 }
de9ac07b 1416 }
5f9cf599 1417 trigger_off(&auxtrace_snapshot_trigger);
3c1cb7e3 1418 trigger_off(&switch_output_trigger);
de9ac07b 1419
f33cbe72 1420 if (forks && workload_exec_errno) {
35550da3 1421 char msg[STRERR_BUFSIZE];
c8b5f2c9 1422 const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
f33cbe72
ACM
1423 pr_err("Workload failed: %s\n", emsg);
1424 err = -1;
45604710 1425 goto out_child;
f33cbe72
ACM
1426 }
1427
e3d59112 1428 if (!quiet)
45604710 1429 fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
b44308f5 1430
4ea648ae
WN
1431 if (target__none(&rec->opts.target))
1432 record__synthesize_workload(rec, true);
1433
45604710 1434out_child:
d3d1af6f
AB
1435 record__aio_mmap_read_sync(rec);
1436
45604710
NK
1437 if (forks) {
1438 int exit_status;
addc2785 1439
45604710
NK
1440 if (!child_finished)
1441 kill(rec->evlist->workload.pid, SIGTERM);
1442
1443 wait(&exit_status);
1444
1445 if (err < 0)
1446 status = err;
1447 else if (WIFEXITED(exit_status))
1448 status = WEXITSTATUS(exit_status);
1449 else if (WIFSIGNALED(exit_status))
1450 signr = WTERMSIG(exit_status);
1451 } else
1452 status = err;
1453
4ea648ae 1454 record__synthesize(rec, true);
e3d59112
NK
1455 /* this will be recalculated during process_buildids() */
1456 rec->samples = 0;
1457
ecfd7a9c
WN
1458 if (!err) {
1459 if (!rec->timestamp_filename) {
1460 record__finish_output(rec);
1461 } else {
1462 fd = record__switch_output(rec, true);
1463 if (fd < 0) {
1464 status = fd;
1465 goto out_delete_session;
1466 }
1467 }
1468 }
39d17dac 1469
a074865e
WN
1470 perf_hooks__invoke_record_end();
1471
e3d59112
NK
1472 if (!err && !quiet) {
1473 char samples[128];
ecfd7a9c
WN
1474 const char *postfix = rec->timestamp_filename ?
1475 ".<timestamp>" : "";
e3d59112 1476
ef149c25 1477 if (rec->samples && !rec->opts.full_auxtrace)
e3d59112
NK
1478 scnprintf(samples, sizeof(samples),
1479 " (%" PRIu64 " samples)", rec->samples);
1480 else
1481 samples[0] = '\0';
1482
ecfd7a9c 1483 fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
8ceb41d7 1484 perf_data__size(data) / 1024.0 / 1024.0,
2d4f2799 1485 data->path, postfix, samples);
e3d59112
NK
1486 }
1487
39d17dac
ACM
1488out_delete_session:
1489 perf_session__delete(session);
45604710 1490 return status;
de9ac07b 1491}
0e9b20b8 1492
0883e820 1493static void callchain_debug(struct callchain_param *callchain)
09b0fd45 1494{
aad2b21c 1495 static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
a601fdff 1496
0883e820 1497 pr_debug("callchain: type %s\n", str[callchain->record_mode]);
26d33022 1498
0883e820 1499 if (callchain->record_mode == CALLCHAIN_DWARF)
09b0fd45 1500 pr_debug("callchain: stack dump size %d\n",
0883e820 1501 callchain->dump_size);
09b0fd45
JO
1502}
1503
0883e820
ACM
1504int record_opts__parse_callchain(struct record_opts *record,
1505 struct callchain_param *callchain,
1506 const char *arg, bool unset)
09b0fd45 1507{
09b0fd45 1508 int ret;
0883e820 1509 callchain->enabled = !unset;
eb853e80 1510
09b0fd45
JO
1511 /* --no-call-graph */
1512 if (unset) {
0883e820 1513 callchain->record_mode = CALLCHAIN_NONE;
09b0fd45
JO
1514 pr_debug("callchain: disabled\n");
1515 return 0;
1516 }
1517
0883e820 1518 ret = parse_callchain_record_opt(arg, callchain);
5c0cf224
JO
1519 if (!ret) {
1520 /* Enable data address sampling for DWARF unwind. */
0883e820 1521 if (callchain->record_mode == CALLCHAIN_DWARF)
5c0cf224 1522 record->sample_address = true;
0883e820 1523 callchain_debug(callchain);
5c0cf224 1524 }
26d33022
JO
1525
1526 return ret;
1527}
1528
0883e820
ACM
1529int record_parse_callchain_opt(const struct option *opt,
1530 const char *arg,
1531 int unset)
1532{
1533 return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
1534}
1535
c421e80b 1536int record_callchain_opt(const struct option *opt,
09b0fd45
JO
1537 const char *arg __maybe_unused,
1538 int unset __maybe_unused)
1539{
2ddd5c04 1540 struct callchain_param *callchain = opt->value;
c421e80b 1541
2ddd5c04 1542 callchain->enabled = true;
09b0fd45 1543
2ddd5c04
ACM
1544 if (callchain->record_mode == CALLCHAIN_NONE)
1545 callchain->record_mode = CALLCHAIN_FP;
eb853e80 1546
2ddd5c04 1547 callchain_debug(callchain);
09b0fd45
JO
1548 return 0;
1549}
1550
eb853e80
JO
1551static int perf_record_config(const char *var, const char *value, void *cb)
1552{
7a29c087
NK
1553 struct record *rec = cb;
1554
1555 if (!strcmp(var, "record.build-id")) {
1556 if (!strcmp(value, "cache"))
1557 rec->no_buildid_cache = false;
1558 else if (!strcmp(value, "no-cache"))
1559 rec->no_buildid_cache = true;
1560 else if (!strcmp(value, "skip"))
1561 rec->no_buildid = true;
1562 else
1563 return -1;
1564 return 0;
1565 }
cff17205
YX
1566 if (!strcmp(var, "record.call-graph")) {
1567 var = "call-graph.record-mode";
1568 return perf_default_config(var, value, cb);
1569 }
93f20c0f
AB
1570#ifdef HAVE_AIO_SUPPORT
1571 if (!strcmp(var, "record.aio")) {
1572 rec->opts.nr_cblocks = strtol(value, NULL, 0);
1573 if (!rec->opts.nr_cblocks)
1574 rec->opts.nr_cblocks = nr_cblocks_default;
1575 }
1576#endif
eb853e80 1577
cff17205 1578 return 0;
eb853e80
JO
1579}
1580
814c8c38
PZ
1581struct clockid_map {
1582 const char *name;
1583 int clockid;
1584};
1585
1586#define CLOCKID_MAP(n, c) \
1587 { .name = n, .clockid = (c), }
1588
1589#define CLOCKID_END { .name = NULL, }
1590
1591
1592/*
1593 * Add the missing ones, we need to build on many distros...
1594 */
1595#ifndef CLOCK_MONOTONIC_RAW
1596#define CLOCK_MONOTONIC_RAW 4
1597#endif
1598#ifndef CLOCK_BOOTTIME
1599#define CLOCK_BOOTTIME 7
1600#endif
1601#ifndef CLOCK_TAI
1602#define CLOCK_TAI 11
1603#endif
1604
1605static const struct clockid_map clockids[] = {
1606 /* available for all events, NMI safe */
1607 CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
1608 CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
1609
1610 /* available for some events */
1611 CLOCKID_MAP("realtime", CLOCK_REALTIME),
1612 CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
1613 CLOCKID_MAP("tai", CLOCK_TAI),
1614
1615 /* available for the lazy */
1616 CLOCKID_MAP("mono", CLOCK_MONOTONIC),
1617 CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
1618 CLOCKID_MAP("real", CLOCK_REALTIME),
1619 CLOCKID_MAP("boot", CLOCK_BOOTTIME),
1620
1621 CLOCKID_END,
1622};
1623
cf790516
AB
1624static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
1625{
1626 struct timespec res;
1627
1628 *res_ns = 0;
1629 if (!clock_getres(clk_id, &res))
1630 *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
1631 else
1632 pr_warning("WARNING: Failed to determine specified clock resolution.\n");
1633
1634 return 0;
1635}
1636
814c8c38
PZ
1637static int parse_clockid(const struct option *opt, const char *str, int unset)
1638{
1639 struct record_opts *opts = (struct record_opts *)opt->value;
1640 const struct clockid_map *cm;
1641 const char *ostr = str;
1642
1643 if (unset) {
1644 opts->use_clockid = 0;
1645 return 0;
1646 }
1647
1648 /* no arg passed */
1649 if (!str)
1650 return 0;
1651
1652 /* no setting it twice */
1653 if (opts->use_clockid)
1654 return -1;
1655
1656 opts->use_clockid = true;
1657
1658 /* if its a number, we're done */
1659 if (sscanf(str, "%d", &opts->clockid) == 1)
cf790516 1660 return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
814c8c38
PZ
1661
1662 /* allow a "CLOCK_" prefix to the name */
1663 if (!strncasecmp(str, "CLOCK_", 6))
1664 str += 6;
1665
1666 for (cm = clockids; cm->name; cm++) {
1667 if (!strcasecmp(str, cm->name)) {
1668 opts->clockid = cm->clockid;
cf790516
AB
1669 return get_clockid_res(opts->clockid,
1670 &opts->clockid_res_ns);
814c8c38
PZ
1671 }
1672 }
1673
1674 opts->use_clockid = false;
1675 ui__warning("unknown clockid %s, check man page\n", ostr);
1676 return -1;
1677}
1678
f4fe11b7
AB
1679static int record__parse_affinity(const struct option *opt, const char *str, int unset)
1680{
1681 struct record_opts *opts = (struct record_opts *)opt->value;
1682
1683 if (unset || !str)
1684 return 0;
1685
1686 if (!strcasecmp(str, "node"))
1687 opts->affinity = PERF_AFFINITY_NODE;
1688 else if (!strcasecmp(str, "cpu"))
1689 opts->affinity = PERF_AFFINITY_CPU;
1690
1691 return 0;
1692}
1693
e9db1310
AH
1694static int record__parse_mmap_pages(const struct option *opt,
1695 const char *str,
1696 int unset __maybe_unused)
1697{
1698 struct record_opts *opts = opt->value;
1699 char *s, *p;
1700 unsigned int mmap_pages;
1701 int ret;
1702
1703 if (!str)
1704 return -EINVAL;
1705
1706 s = strdup(str);
1707 if (!s)
1708 return -ENOMEM;
1709
1710 p = strchr(s, ',');
1711 if (p)
1712 *p = '\0';
1713
1714 if (*s) {
1715 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
1716 if (ret)
1717 goto out_free;
1718 opts->mmap_pages = mmap_pages;
1719 }
1720
1721 if (!p) {
1722 ret = 0;
1723 goto out_free;
1724 }
1725
1726 ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
1727 if (ret)
1728 goto out_free;
1729
1730 opts->auxtrace_mmap_pages = mmap_pages;
1731
1732out_free:
1733 free(s);
1734 return ret;
1735}
1736
0c582449
JO
1737static void switch_output_size_warn(struct record *rec)
1738{
1739 u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
1740 struct switch_output *s = &rec->switch_output;
1741
1742 wakeup_size /= 2;
1743
1744 if (s->size < wakeup_size) {
1745 char buf[100];
1746
1747 unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
1748 pr_warning("WARNING: switch-output data size lower than "
1749 "wakeup kernel buffer size (%s) "
1750 "expect bigger perf.data sizes\n", buf);
1751 }
1752}
1753
cb4e1ebb
JO
1754static int switch_output_setup(struct record *rec)
1755{
1756 struct switch_output *s = &rec->switch_output;
dc0c6127
JO
1757 static struct parse_tag tags_size[] = {
1758 { .tag = 'B', .mult = 1 },
1759 { .tag = 'K', .mult = 1 << 10 },
1760 { .tag = 'M', .mult = 1 << 20 },
1761 { .tag = 'G', .mult = 1 << 30 },
1762 { .tag = 0 },
1763 };
bfacbe3b
JO
1764 static struct parse_tag tags_time[] = {
1765 { .tag = 's', .mult = 1 },
1766 { .tag = 'm', .mult = 60 },
1767 { .tag = 'h', .mult = 60*60 },
1768 { .tag = 'd', .mult = 60*60*24 },
1769 { .tag = 0 },
1770 };
dc0c6127 1771 unsigned long val;
cb4e1ebb
JO
1772
1773 if (!s->set)
1774 return 0;
1775
1776 if (!strcmp(s->str, "signal")) {
1777 s->signal = true;
1778 pr_debug("switch-output with SIGUSR2 signal\n");
dc0c6127
JO
1779 goto enabled;
1780 }
1781
1782 val = parse_tag_value(s->str, tags_size);
1783 if (val != (unsigned long) -1) {
1784 s->size = val;
1785 pr_debug("switch-output with %s size threshold\n", s->str);
1786 goto enabled;
cb4e1ebb
JO
1787 }
1788
bfacbe3b
JO
1789 val = parse_tag_value(s->str, tags_time);
1790 if (val != (unsigned long) -1) {
1791 s->time = val;
1792 pr_debug("switch-output with %s time threshold (%lu seconds)\n",
1793 s->str, s->time);
1794 goto enabled;
1795 }
1796
cb4e1ebb 1797 return -1;
dc0c6127
JO
1798
1799enabled:
1800 rec->timestamp_filename = true;
1801 s->enabled = true;
0c582449
JO
1802
1803 if (s->size && !rec->opts.no_buffering)
1804 switch_output_size_warn(rec);
1805
dc0c6127 1806 return 0;
cb4e1ebb
JO
1807}
1808
e5b2c207 1809static const char * const __record_usage[] = {
9e096753
MG
1810 "perf record [<options>] [<command>]",
1811 "perf record [<options>] -- <command> [<options>]",
0e9b20b8
IM
1812 NULL
1813};
e5b2c207 1814const char * const *record_usage = __record_usage;
0e9b20b8 1815
d20deb64 1816/*
8c6f45a7
ACM
1817 * XXX Ideally would be local to cmd_record() and passed to a record__new
1818 * because we need to have access to it in record__exit, that is called
d20deb64
ACM
1819 * after cmd_record() exits, but since record_options need to be accessible to
1820 * builtin-script, leave it here.
1821 *
1822 * At least we don't ouch it in all the other functions here directly.
1823 *
1824 * Just say no to tons of global variables, sigh.
1825 */
8c6f45a7 1826static struct record record = {
d20deb64 1827 .opts = {
8affc2b8 1828 .sample_time = true,
d20deb64
ACM
1829 .mmap_pages = UINT_MAX,
1830 .user_freq = UINT_MAX,
1831 .user_interval = ULLONG_MAX,
447a6013 1832 .freq = 4000,
d1cb9fce
NK
1833 .target = {
1834 .uses_mmap = true,
3aa5939d 1835 .default_per_cpu = true,
d1cb9fce 1836 },
d20deb64 1837 },
e3d59112
NK
1838 .tool = {
1839 .sample = process_sample_event,
1840 .fork = perf_event__process_fork,
cca8482c 1841 .exit = perf_event__process_exit,
e3d59112 1842 .comm = perf_event__process_comm,
f3b3614a 1843 .namespaces = perf_event__process_namespaces,
e3d59112
NK
1844 .mmap = perf_event__process_mmap,
1845 .mmap2 = perf_event__process_mmap2,
cca8482c 1846 .ordered_events = true,
e3d59112 1847 },
d20deb64 1848};
7865e817 1849
76a26549
NK
1850const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
1851 "\n\t\t\t\tDefault: fp";
61eaa3be 1852
0aab2136
WN
1853static bool dry_run;
1854
d20deb64
ACM
1855/*
1856 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
1857 * with it and switch to use the library functions in perf_evlist that came
b4006796 1858 * from builtin-record.c, i.e. use record_opts,
d20deb64
ACM
1859 * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
1860 * using pipes, etc.
1861 */
efd21307 1862static struct option __record_options[] = {
d20deb64 1863 OPT_CALLBACK('e', "event", &record.evlist, "event",
86847b62 1864 "event selector. use 'perf list' to list available events",
f120f9d5 1865 parse_events_option),
d20deb64 1866 OPT_CALLBACK(0, "filter", &record.evlist, "filter",
c171b552 1867 "event filter", parse_filter),
4ba1faa1
WN
1868 OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
1869 NULL, "don't record events from perf itself",
1870 exclude_perf),
bea03405 1871 OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
d6d901c2 1872 "record events on existing process id"),
bea03405 1873 OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
d6d901c2 1874 "record events on existing thread id"),
d20deb64 1875 OPT_INTEGER('r', "realtime", &record.realtime_prio,
0e9b20b8 1876 "collect data with this RT SCHED_FIFO priority"),
509051ea 1877 OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
acac03fa 1878 "collect data without buffering"),
d20deb64 1879 OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
daac07b2 1880 "collect raw sample records from all opened counters"),
bea03405 1881 OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
0e9b20b8 1882 "system-wide collection from all CPUs"),
bea03405 1883 OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
c45c6ea2 1884 "list of cpus to monitor"),
d20deb64 1885 OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
2d4f2799 1886 OPT_STRING('o', "output", &record.data.path, "file",
abaff32a 1887 "output file name"),
69e7e5b0
AH
1888 OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
1889 &record.opts.no_inherit_set,
1890 "child tasks do not inherit counters"),
4ea648ae
WN
1891 OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
1892 "synthesize non-sample events at the end of output"),
626a6b78 1893 OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
71184c6a 1894 OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
b09c2364
ACM
1895 OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
1896 "Fail if the specified frequency can't be used"),
67230479
ACM
1897 OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
1898 "profile at this frequency",
1899 record__parse_freq),
e9db1310
AH
1900 OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
1901 "number of mmap data pages and AUX area tracing mmap pages",
1902 record__parse_mmap_pages),
d20deb64 1903 OPT_BOOLEAN(0, "group", &record.opts.group,
43bece79 1904 "put the counters into a counter group"),
2ddd5c04 1905 OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
09b0fd45
JO
1906 NULL, "enables call-graph recording" ,
1907 &record_callchain_opt),
1908 OPT_CALLBACK(0, "call-graph", &record.opts,
76a26549 1909 "record_mode[,record_size]", record_callchain_help,
09b0fd45 1910 &record_parse_callchain_opt),
c0555642 1911 OPT_INCR('v', "verbose", &verbose,
3da297a6 1912 "be more verbose (show counter open errors, etc)"),
b44308f5 1913 OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
d20deb64 1914 OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
649c48a9 1915 "per thread counts"),
56100321 1916 OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
3b0a5daa
KL
1917 OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
1918 "Record the sample physical addresses"),
b6f35ed7 1919 OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
3abebc55
AH
1920 OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
1921 &record.opts.sample_time_set,
1922 "Record the sample timestamps"),
f290aa1f
JO
1923 OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
1924 "Record the sample period"),
d20deb64 1925 OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
649c48a9 1926 "don't sample"),
d2db9a98
WN
1927 OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
1928 &record.no_buildid_cache_set,
1929 "do not update the buildid cache"),
1930 OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
1931 &record.no_buildid_set,
1932 "do not collect buildids in perf.data"),
d20deb64 1933 OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
023695d9
SE
1934 "monitor event in cgroup name only",
1935 parse_cgroups),
a6205a35 1936 OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
6619a53e 1937 "ms to wait before starting measurement after program start"),
bea03405
NK
1938 OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
1939 "user to profile"),
a5aabdac
SE
1940
1941 OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1942 "branch any", "sample any taken branches",
1943 parse_branch_stack),
1944
1945 OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1946 "branch filter mask", "branch stack filter modes",
bdfebd84 1947 parse_branch_stack),
05484298
AK
1948 OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
1949 "sample by weight (on special events only)"),
475eeab9
AK
1950 OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
1951 "sample transaction flags (special events only)"),
3aa5939d
AH
1952 OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
1953 "use per-thread mmaps"),
bcc84ec6
SE
1954 OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
1955 "sample selected machine registers on interrupt,"
1956 " use -I ? to list register names", parse_regs),
84c41742
AK
1957 OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
1958 "sample selected machine registers on interrupt,"
1959 " use -I ? to list register names", parse_regs),
85c273d2
AK
1960 OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
1961 "Record running/enabled time of read (:S) events"),
814c8c38
PZ
1962 OPT_CALLBACK('k', "clockid", &record.opts,
1963 "clockid", "clockid to use for events, see clock_gettime()",
1964 parse_clockid),
2dd6d8a1
AH
1965 OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
1966 "opts", "AUX area tracing Snapshot Mode", ""),
3fcb10e4 1967 OPT_UINTEGER(0, "proc-map-timeout", &proc_map_timeout,
9d9cad76 1968 "per thread proc mmap processing timeout in ms"),
f3b3614a
HB
1969 OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
1970 "Record namespaces events"),
b757bb09
AH
1971 OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
1972 "Record context switch events"),
85723885
JO
1973 OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
1974 "Configure all used events to run in kernel space.",
1975 PARSE_OPT_EXCLUSIVE),
1976 OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
1977 "Configure all used events to run in user space.",
1978 PARSE_OPT_EXCLUSIVE),
71dc2326
WN
1979 OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
1980 "clang binary to use for compiling BPF scriptlets"),
1981 OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
1982 "options passed to clang when compiling BPF scriptlets"),
7efe0e03
HK
1983 OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
1984 "file", "vmlinux pathname"),
6156681b
NK
1985 OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
1986 "Record build-id of all DSOs regardless of hits"),
ecfd7a9c
WN
1987 OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
1988 "append timestamp to output filename"),
68588baf
JY
1989 OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
1990 "Record timestamp boundary (time of first/last samples)"),
cb4e1ebb 1991 OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
c38dab7d
AK
1992 &record.switch_output.set, "signal or size[BKMG] or time[smhd]",
1993 "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
dc0c6127 1994 "signal"),
03724b2e
AK
1995 OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
1996 "Limit number of switch output generated files"),
0aab2136
WN
1997 OPT_BOOLEAN(0, "dry-run", &dry_run,
1998 "Parse options then exit"),
d3d1af6f 1999#ifdef HAVE_AIO_SUPPORT
93f20c0f
AB
2000 OPT_CALLBACK_OPTARG(0, "aio", &record.opts,
2001 &nr_cblocks_default, "n", "Use <n> control blocks in asynchronous trace writing mode (default: 1, max: 4)",
d3d1af6f
AB
2002 record__aio_parse),
2003#endif
f4fe11b7
AB
2004 OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu",
2005 "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer",
2006 record__parse_affinity),
0e9b20b8
IM
2007 OPT_END()
2008};
2009
e5b2c207
NK
2010struct option *record_options = __record_options;
2011
b0ad8ea6 2012int cmd_record(int argc, const char **argv)
0e9b20b8 2013{
ef149c25 2014 int err;
8c6f45a7 2015 struct record *rec = &record;
16ad2ffb 2016 char errbuf[BUFSIZ];
0e9b20b8 2017
67230479
ACM
2018 setlocale(LC_ALL, "");
2019
48e1cab1
WN
2020#ifndef HAVE_LIBBPF_SUPPORT
2021# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
2022 set_nobuild('\0', "clang-path", true);
2023 set_nobuild('\0', "clang-opt", true);
2024# undef set_nobuild
7efe0e03
HK
2025#endif
2026
2027#ifndef HAVE_BPF_PROLOGUE
2028# if !defined (HAVE_DWARF_SUPPORT)
2029# define REASON "NO_DWARF=1"
2030# elif !defined (HAVE_LIBBPF_SUPPORT)
2031# define REASON "NO_LIBBPF=1"
2032# else
2033# define REASON "this architecture doesn't support BPF prologue"
2034# endif
2035# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
2036 set_nobuild('\0', "vmlinux", true);
2037# undef set_nobuild
2038# undef REASON
48e1cab1
WN
2039#endif
2040
9d2ed645
AB
2041 CPU_ZERO(&rec->affinity_mask);
2042 rec->opts.affinity = PERF_AFFINITY_SYS;
2043
3e2be2da
ACM
2044 rec->evlist = perf_evlist__new();
2045 if (rec->evlist == NULL)
361c99a6
ACM
2046 return -ENOMEM;
2047
ecc4c561
ACM
2048 err = perf_config(perf_record_config, rec);
2049 if (err)
2050 return err;
eb853e80 2051
bca647aa 2052 argc = parse_options(argc, argv, record_options, record_usage,
655000e7 2053 PARSE_OPT_STOP_AT_NON_OPTION);
68ba3235
NK
2054 if (quiet)
2055 perf_quiet_option();
483635a9
JO
2056
2057 /* Make system wide (-a) the default target. */
602ad878 2058 if (!argc && target__none(&rec->opts.target))
483635a9 2059 rec->opts.target.system_wide = true;
0e9b20b8 2060
bea03405 2061 if (nr_cgroups && !rec->opts.target.system_wide) {
c7118369
NK
2062 usage_with_options_msg(record_usage, record_options,
2063 "cgroup monitoring only available in system-wide mode");
2064
023695d9 2065 }
b757bb09
AH
2066 if (rec->opts.record_switch_events &&
2067 !perf_can_record_switch_events()) {
c7118369
NK
2068 ui__error("kernel does not support recording context switch events\n");
2069 parse_options_usage(record_usage, record_options, "switch-events", 0);
2070 return -EINVAL;
b757bb09 2071 }
023695d9 2072
cb4e1ebb
JO
2073 if (switch_output_setup(rec)) {
2074 parse_options_usage(record_usage, record_options, "switch-output", 0);
2075 return -EINVAL;
2076 }
2077
bfacbe3b
JO
2078 if (rec->switch_output.time) {
2079 signal(SIGALRM, alarm_sig_handler);
2080 alarm(rec->switch_output.time);
2081 }
2082
03724b2e
AK
2083 if (rec->switch_output.num_files) {
2084 rec->switch_output.filenames = calloc(sizeof(char *),
2085 rec->switch_output.num_files);
2086 if (!rec->switch_output.filenames)
2087 return -EINVAL;
2088 }
2089
1b36c03e
AH
2090 /*
2091 * Allow aliases to facilitate the lookup of symbols for address
2092 * filters. Refer to auxtrace_parse_filters().
2093 */
2094 symbol_conf.allow_aliases = true;
2095
2096 symbol__init(NULL);
2097
4b5ea3bd 2098 err = record__auxtrace_init(rec);
1b36c03e
AH
2099 if (err)
2100 goto out;
2101
0aab2136 2102 if (dry_run)
5c01ad60 2103 goto out;
0aab2136 2104
d7888573
WN
2105 err = bpf__setup_stdout(rec->evlist);
2106 if (err) {
2107 bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
2108 pr_err("ERROR: Setup BPF stdout failed: %s\n",
2109 errbuf);
5c01ad60 2110 goto out;
d7888573
WN
2111 }
2112
ef149c25
AH
2113 err = -ENOMEM;
2114
6c443954 2115 if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
646aaea6
ACM
2116 pr_warning(
2117"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
2118"check /proc/sys/kernel/kptr_restrict.\n\n"
2119"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
2120"file is not found in the buildid cache or in the vmlinux path.\n\n"
2121"Samples in kernel modules won't be resolved at all.\n\n"
2122"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
2123"even with a suitable vmlinux or kallsyms file.\n\n");
ec80fde7 2124
0c1d46a8 2125 if (rec->no_buildid_cache || rec->no_buildid) {
a1ac1d3c 2126 disable_buildid_cache();
dc0c6127 2127 } else if (rec->switch_output.enabled) {
0c1d46a8
WN
2128 /*
2129 * In 'perf record --switch-output', disable buildid
2130 * generation by default to reduce data file switching
2131 * overhead. Still generate buildid if they are required
2132 * explicitly using
2133 *
60437ac0 2134 * perf record --switch-output --no-no-buildid \
0c1d46a8
WN
2135 * --no-no-buildid-cache
2136 *
2137 * Following code equals to:
2138 *
2139 * if ((rec->no_buildid || !rec->no_buildid_set) &&
2140 * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
2141 * disable_buildid_cache();
2142 */
2143 bool disable = true;
2144
2145 if (rec->no_buildid_set && !rec->no_buildid)
2146 disable = false;
2147 if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
2148 disable = false;
2149 if (disable) {
2150 rec->no_buildid = true;
2151 rec->no_buildid_cache = true;
2152 disable_buildid_cache();
2153 }
2154 }
655000e7 2155
4ea648ae
WN
2156 if (record.opts.overwrite)
2157 record.opts.tail_synthesize = true;
2158
3e2be2da 2159 if (rec->evlist->nr_entries == 0 &&
4b4cd503 2160 __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
69aad6f1 2161 pr_err("Not enough memory for event selector list\n");
394c01ed 2162 goto out;
bbd36e5e 2163 }
0e9b20b8 2164
69e7e5b0
AH
2165 if (rec->opts.target.tid && !rec->opts.no_inherit_set)
2166 rec->opts.no_inherit = true;
2167
602ad878 2168 err = target__validate(&rec->opts.target);
16ad2ffb 2169 if (err) {
602ad878 2170 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
c3dec27b 2171 ui__warning("%s\n", errbuf);
16ad2ffb
NK
2172 }
2173
602ad878 2174 err = target__parse_uid(&rec->opts.target);
16ad2ffb
NK
2175 if (err) {
2176 int saved_errno = errno;
4bd0f2d2 2177
602ad878 2178 target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
3780f488 2179 ui__error("%s", errbuf);
16ad2ffb
NK
2180
2181 err = -saved_errno;
394c01ed 2182 goto out;
16ad2ffb 2183 }
0d37aa34 2184
ca800068
MZ
2185 /* Enable ignoring missing threads when -u/-p option is defined. */
2186 rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
23dc4f15 2187
16ad2ffb 2188 err = -ENOMEM;
3e2be2da 2189 if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
dd7927f4 2190 usage_with_options(record_usage, record_options);
69aad6f1 2191
ef149c25
AH
2192 err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
2193 if (err)
394c01ed 2194 goto out;
ef149c25 2195
6156681b
NK
2196 /*
2197 * We take all buildids when the file contains
2198 * AUX area tracing data because we do not decode the
2199 * trace because it would take too long.
2200 */
2201 if (rec->opts.full_auxtrace)
2202 rec->buildid_all = true;
2203
b4006796 2204 if (record_opts__config(&rec->opts)) {
39d17dac 2205 err = -EINVAL;
394c01ed 2206 goto out;
7e4ff9e3
MG
2207 }
2208
93f20c0f
AB
2209 if (rec->opts.nr_cblocks > nr_cblocks_max)
2210 rec->opts.nr_cblocks = nr_cblocks_max;
d3d1af6f
AB
2211 if (verbose > 0)
2212 pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks);
2213
9d2ed645
AB
2214 pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]);
2215
d20deb64 2216 err = __cmd_record(&record, argc, argv);
394c01ed 2217out:
45604710 2218 perf_evlist__delete(rec->evlist);
d65a458b 2219 symbol__exit();
ef149c25 2220 auxtrace_record__free(rec->itr);
39d17dac 2221 return err;
0e9b20b8 2222}
2dd6d8a1
AH
2223
2224static void snapshot_sig_handler(int sig __maybe_unused)
2225{
dc0c6127
JO
2226 struct record *rec = &record;
2227
5f9cf599
WN
2228 if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
2229 trigger_hit(&auxtrace_snapshot_trigger);
2230 auxtrace_record__snapshot_started = 1;
2231 if (auxtrace_record__snapshot_start(record.itr))
2232 trigger_error(&auxtrace_snapshot_trigger);
2233 }
3c1cb7e3 2234
dc0c6127 2235 if (switch_output_signal(rec))
3c1cb7e3 2236 trigger_hit(&switch_output_trigger);
2dd6d8a1 2237}
bfacbe3b
JO
2238
2239static void alarm_sig_handler(int sig __maybe_unused)
2240{
2241 struct record *rec = &record;
2242
2243 if (switch_output_time(rec))
2244 trigger_hit(&switch_output_trigger);
2245}