]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/perf_event_open.2
perf_event_open.2: ffix
[thirdparty/man-pages.git] / man2 / perf_event_open.2
CommitLineData
f2b1d720
MK
1.\" Copyright (c) 2012, Vincent Weaver
2.\"
1dd72f9c 3.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
f2b1d720
MK
4.\" This is free documentation; you can redistribute it and/or
5.\" modify it under the terms of the GNU General Public License as
6.\" published by the Free Software Foundation; either version 2 of
7.\" the License, or (at your option) any later version.
8.\"
9.\" The GNU General Public License's references to "object code"
10.\" and "executables" are to be interpreted as the output of any
11.\" document formatting or typesetting system, including
12.\" intermediate and printed output.
13.\"
14.\" This manual is distributed in the hope that it will be useful,
15.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
16.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17.\" GNU General Public License for more details.
18.\"
19.\" You should have received a copy of the GNU General Public
20.\" License along with this manual; if not, see
21.\" <http://www.gnu.org/licenses/>.
6a8d8745 22.\" %%%LICENSE_END
f2b1d720
MK
23.\"
24.\" This document is based on the perf_event.h header file, the
25.\" tools/perf/design.txt file, and a lot of bitter experience.
26.\"
35deeb87 27.TH PERF_EVENT_OPEN 2 2016-12-12 "Linux" "Linux Programmer's Manual"
f2b1d720
MK
28.SH NAME
29perf_event_open \- set up performance monitoring
30.SH SYNOPSIS
31.nf
32.B #include <linux/perf_event.h>
33.B #include <linux/hw_breakpoint.h>
34.sp
35.BI "int perf_event_open(struct perf_event_attr *" attr ,
36.BI " pid_t " pid ", int " cpu ", int " group_fd ,
37.BI " unsigned long " flags );
38.fi
39
40.IR Note :
41There is no glibc wrapper for this system call; see NOTES.
42.SH DESCRIPTION
43Given a list of parameters,
44.BR perf_event_open ()
45returns a file descriptor, for use in subsequent system calls
46.RB ( read "(2), " mmap "(2), " prctl "(2), " fcntl "(2), etc.)."
47.PP
48A call to
49.BR perf_event_open ()
50creates a file descriptor that allows measuring performance
51information.
52Each file descriptor corresponds to one
53event that is measured; these can be grouped together
54to measure multiple events simultaneously.
55.PP
56Events can be enabled and disabled in two ways: via
57.BR ioctl (2)
58and via
0fe9e4b1 59.BR prctl (2).
f2b1d720
MK
60When an event is disabled it does not count or generate overflows but does
61continue to exist and maintain its count value.
62.PP
63Events come in two flavors: counting and sampled.
64A
65.I counting
66event is one that is used for counting the aggregate number of events
67that occur.
68In general, counting event results are gathered with a
69.BR read (2)
70call.
71A
72.I sampling
73event periodically writes measurements to a buffer that can then
74be accessed via
0fe9e4b1 75.BR mmap (2).
f2b1d720
MK
76.SS Arguments
77.P
f2b1d720 78The
a02a1737 79.I pid
f2b1d720 80and
a02a1737
VW
81.I cpu
82arguments allow specifying which process and CPU to monitor:
83.TP
f2d15dc9 84.BR "pid == 0" " and " "cpu == \-1"
ee7b0cbf 85This measures the calling process/thread on any CPU.
a02a1737 86.TP
f2d15dc9 87.BR "pid == 0" " and " "cpu >= 0"
ee7b0cbf 88This measures the calling process/thread only
a02a1737
VW
89when running on the specified CPU.
90.TP
f2d15dc9 91.BR "pid > 0" " and " "cpu == \-1"
a02a1737
VW
92This measures the specified process/thread on any CPU.
93.TP
f2d15dc9 94.BR "pid > 0" " and " "cpu >= 0"
a02a1737
VW
95This measures the specified process/thread only
96when running on the specified CPU.
97.TP
f2d15dc9 98.BR "pid == \-1" " and " "cpu >= 0"
a02a1737 99This measures all processes/threads on the specified CPU.
ce88f77b 100This requires
f2b1d720
MK
101.B CAP_SYS_ADMIN
102capability or a
103.I /proc/sys/kernel/perf_event_paranoid
104value of less than 1.
a02a1737 105.TP
ce88f77b 106.BR "pid == \-1" " and " "cpu == \-1"
a02a1737 107This setting is invalid and will return an error.
f2b1d720 108.P
13ec13dc
MK
109When
110.I pid
111is greater than zero, permission to perform this system call
112is governed by a ptrace access mode
113.B PTRACE_MODE_READ_REALCREDS
114check; see
115.BR ptrace (2).
116
f2b1d720
MK
117The
118.I group_fd
119argument allows event groups to be created.
120An event group has one event which is the group leader.
121The leader is created first, with
122.IR group_fd " = \-1."
123The rest of the group members are created with subsequent
124.BR perf_event_open ()
125calls with
126.IR group_fd
bec6277e 127being set to the file descriptor of the group leader.
f2b1d720
MK
128(A single event on its own is created with
129.IR group_fd " = \-1"
130and is considered to be a group with only 1 member.)
33a0ccb2 131An event group is scheduled onto the CPU as a unit: it will
d1007d14 132be put onto the CPU only if all of the events in the group can be put onto
f2b1d720
MK
133the CPU.
134This means that the values of the member events can be
ce88f77b 135meaningfully compared\(emadded, divided (to get ratios), and so on\(emwith each
f2b1d720
MK
136other, since they have counted events for the same set of executed
137instructions.
138.P
139The
140.I flags
08e325e8 141argument is formed by ORing together zero or more of the following values:
f2b1d720 142.TP
60dafbc1
MK
143.BR PERF_FLAG_FD_CLOEXEC " (since Linux 3.14)"
144.\" commit a21b0b354d4ac39be691f51c53562e2c24443d9e
e9b1ab78
MK
145This flag enables the close-on-exec flag for the created
146event file descriptor,
147so that the file descriptor is automatically closed on
148.BR execve (2).
8bad22e5
MK
149Setting the close-on-exec flags at creation time, rather than later with
150.BR fcntl (2),
e9b1ab78
MK
151avoids potential race conditions where the calling thread invokes
152.BR perf_event_open ()
a61dba34
MK
153and
154.BR fcntl (2)
e9b1ab78
MK
155at the same time as another thread calls
156.BR fork (2)
157then
158.BR execve (2).
159.TP
f2b1d720 160.BR PERF_FLAG_FD_NO_GROUP
31266c04
VW
161This flag tells the event to ignore the
162.IR group_fd
163parameter except for the purpose of setting up output redirection
164using the
165.B PERF_FLAG_FD_OUTPUT
166flag.
f2b1d720 167.TP
3117263f 168.BR PERF_FLAG_FD_OUTPUT " (broken since Linux 2.6.35)"
747a6e7c 169.\" commit ac9721f3f54b27a16c7e1afb2481e7ee95a70318
31266c04
VW
170This flag re-routes the event's sampled output to instead
171be included in the mmap buffer of the event specified by
172.IR group_fd .
f2b1d720 173.TP
3117263f 174.BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)"
60dafbc1 175.\" commit e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25
f2b1d720
MK
176This flag activates per-container system-wide monitoring.
177A container
ce88f77b 178is an abstraction that isolates a set of resources for finer-grained
699893d8 179control (CPUs, memory, etc.).
f2b1d720
MK
180In this mode, the event is measured
181only if the thread running on the monitored CPU belongs to the designated
182container (cgroup).
183The cgroup is identified by passing a file descriptor
184opened on its directory in the cgroupfs filesystem.
185For instance, if the
186cgroup to monitor is called
187.IR test ,
188then a file descriptor opened on
189.I /dev/cgroup/test
190(assuming cgroupfs is mounted on
191.IR /dev/cgroup )
192must be passed as the
193.I pid
194parameter.
33a0ccb2 195cgroup monitoring is available only
f2b1d720
MK
196for system-wide events and may therefore require extra permissions.
197.P
198The
199.I perf_event_attr
200structure provides detailed configuration information
201for the event being created.
202
203.in +4n
204.nf
205struct perf_event_attr {
da8bd8a4
MK
206 __u32 type; /* Type of event */
207 __u32 size; /* Size of attribute structure */
208 __u64 config; /* Type-specific configuration */
f2b1d720
MK
209
210 union {
211 __u64 sample_period; /* Period of sampling */
212 __u64 sample_freq; /* Frequency of sampling */
213 };
214
ce88f77b
MK
215 __u64 sample_type; /* Specifies values included in sample */
216 __u64 read_format; /* Specifies values returned in read */
217
218 __u64 disabled : 1, /* off by default */
219 inherit : 1, /* children inherit it */
220 pinned : 1, /* must always be on PMU */
221 exclusive : 1, /* only group on PMU */
222 exclude_user : 1, /* don't count user */
223 exclude_kernel : 1, /* don't count kernel */
224 exclude_hv : 1, /* don't count hypervisor */
225 exclude_idle : 1, /* don't count when idle */
226 mmap : 1, /* include mmap data */
227 comm : 1, /* include comm data */
228 freq : 1, /* use freq, not period */
229 inherit_stat : 1, /* per task counts */
230 enable_on_exec : 1, /* next exec enables */
231 task : 1, /* trace fork/exit */
232 watermark : 1, /* wakeup_watermark */
233 precise_ip : 2, /* skid constraint */
234 mmap_data : 1, /* non-exec mmap data */
235 sample_id_all : 1, /* sample_type all events */
236 exclude_host : 1, /* don't count in host */
237 exclude_guest : 1, /* don't count in guest */
238 exclude_callchain_kernel : 1,
239 /* exclude kernel callchains */
240 exclude_callchain_user : 1,
241 /* exclude user callchains */
9bfc542b 242 mmap2 : 1, /* include mmap with inode data */
dc9ec146
MK
243 comm_exec : 1, /* flag comm events that are
244 due to exec */
6bd5186a 245 use_clockid : 1, /* use clockid for time fields */
9277a75d 246 context_switch : 1, /* context switch data */
6bd5186a 247
9277a75d 248 __reserved_1 : 37;
f2b1d720
MK
249
250 union {
251 __u32 wakeup_events; /* wakeup every n events */
7db515ef 252 __u32 wakeup_watermark; /* bytes before wakeup */
f2b1d720
MK
253 };
254
255 __u32 bp_type; /* breakpoint type */
256
257 union {
258 __u64 bp_addr; /* breakpoint address */
259 __u64 config1; /* extension of config */
260 };
261
262 union {
263 __u64 bp_len; /* breakpoint length */
264 __u64 config2; /* extension of config1 */
265 };
ce88f77b
MK
266 __u64 branch_sample_type; /* enum perf_branch_sample_type */
267 __u64 sample_regs_user; /* user regs to dump on samples */
268 __u32 sample_stack_user; /* size of stack to dump on
7db515ef 269 samples */
6bd5186a 270 __s32 clockid; /* clock to use for time fields */
f5281dfd 271 __u64 sample_regs_intr; /* regs to dump on samples */
cdc52f4a 272 __u32 aux_watermark; /* aux bytes before wakeup */
fd133d5d
VW
273 __u16 sample_max_stack; /* max frames in callchain */
274 __u16 __reserved_2; /* align to u64 */
cdc52f4a 275
f2b1d720
MK
276};
277.fi
278.in
279
280The fields of the
281.I perf_event_attr
282structure are described in more detail below:
f2b1d720
MK
283.TP
284.I type
285This field specifies the overall event type.
286It has one of the following values:
287.RS
288.TP
289.B PERF_TYPE_HARDWARE
290This indicates one of the "generalized" hardware events provided
291by the kernel.
292See the
293.I config
294field definition for more details.
295.TP
296.B PERF_TYPE_SOFTWARE
297This indicates one of the software-defined events provided by the kernel
298(even if no hardware support is available).
299.TP
300.B PERF_TYPE_TRACEPOINT
301This indicates a tracepoint
302provided by the kernel tracepoint infrastructure.
303.TP
304.B PERF_TYPE_HW_CACHE
305This indicates a hardware cache event.
306This has a special encoding, described in the
307.I config
308field definition.
309.TP
310.B PERF_TYPE_RAW
311This indicates a "raw" implementation-specific event in the
312.IR config " field."
313.TP
31c1f2b0 314.BR PERF_TYPE_BREAKPOINT " (since Linux 2.6.33)"
60dafbc1 315.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
316This indicates a hardware breakpoint as provided by the CPU.
317Breakpoints can be read/write accesses to an address as well as
318execution of an instruction address.
319.TP
320.RB "dynamic PMU"
747a6e7c
VW
321Since Linux 2.6.38,
322.\" commit 2e80a82a49c4c7eca4e35734380f28298ba5db19
7db515ef 323.BR perf_event_open ()
f2b1d720
MK
324can support multiple PMUs.
325To enable this, a value exported by the kernel can be used in the
326.I type
327field to indicate which PMU to use.
328The value to use can be found in the sysfs filesystem:
329there is a subdirectory per PMU instance under
330.IR /sys/bus/event_source/devices .
7d182bb6 331In each subdirectory there is a
f2b1d720
MK
332.I type
333file whose content is an integer that can be used in the
334.I type
335field.
336For instance,
337.I /sys/bus/event_source/devices/cpu/type
338contains the value for the core CPU PMU, which is usually 4.
339.RE
f2b1d720
MK
340.TP
341.I "size"
342The size of the
343.I perf_event_attr
344structure for forward/backward compatibility.
345Set this using
346.I sizeof(struct perf_event_attr)
347to allow the kernel to see
348the struct size at the time of compilation.
349
350The related define
351.B PERF_ATTR_SIZE_VER0
352is set to 64; this was the size of the first published struct.
353.B PERF_ATTR_SIZE_VER1
354is 72, corresponding to the addition of breakpoints in Linux 2.6.33.
747a6e7c
VW
355.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
356.\" this was added much later when PERF_ATTR_SIZE_VER2 happened
357.\" but the actual attr_size had increased in 2.6.33
f2b1d720
MK
358.B PERF_ATTR_SIZE_VER2
359is 80 corresponding to the addition of branch sampling in Linux 3.4.
747a6e7c 360.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
d2a6be2f 361.B PERF_ATTR_SIZE_VER3
f2b1d720 362is 96 corresponding to the addition
7ede2f66
DP
363of
364.I sample_regs_user
365and
366.I sample_stack_user
367in Linux 3.7.
747a6e7c 368.\" commit 1659d129ed014b715b0b2120e6fd929bdd33ed03
f5281dfd
VW
369.B PERF_ATTR_SIZE_VER4
370is 104 corresponding to the addition of
371.I sample_regs_intr
372in Linux 3.19.
373.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
cdc52f4a
VW
374.B PERF_ATTR_SIZE_VER5
375is 112 corresponding to the addition of
2050c098 376.I aux_watermark
cdc52f4a
VW
377in Linux 4.1.
378.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
f2b1d720
MK
379.TP
380.I "config"
381This specifies which event you want, in conjunction with
382the
383.I type
384field.
385The
386.IR config1 " and " config2
387fields are also taken into account in cases where 64 bits is not
388enough to fully specify the event.
389The encoding of these fields are event dependent.
390
f2b1d720
MK
391There are various ways to set the
392.I config
393field that are dependent on the value of the previously
394described
395.I type
396field.
397What follows are various possible settings for
398.I config
399separated out by
400.IR type .
401
402If
403.I type
404is
405.BR PERF_TYPE_HARDWARE ,
406we are measuring one of the generalized hardware CPU events.
407Not all of these are available on all platforms.
408Set
409.I config
410to one of the following:
411.RS 12
412.TP
413.B PERF_COUNT_HW_CPU_CYCLES
414Total cycles.
2b538c3e 415Be wary of what happens during CPU frequency scaling.
f2b1d720
MK
416.TP
417.B PERF_COUNT_HW_INSTRUCTIONS
418Retired instructions.
419Be careful, these can be affected by various
2b538c3e 420issues, most notably hardware interrupt counts.
f2b1d720
MK
421.TP
422.B PERF_COUNT_HW_CACHE_REFERENCES
423Cache accesses.
424Usually this indicates Last Level Cache accesses but this may
425vary depending on your CPU.
426This may include prefetches and coherency messages; again this
427depends on the design of your CPU.
428.TP
429.B PERF_COUNT_HW_CACHE_MISSES
430Cache misses.
431Usually this indicates Last Level Cache misses; this is intended to be
432used in conjunction with the
433.B PERF_COUNT_HW_CACHE_REFERENCES
434event to calculate cache miss rates.
435.TP
436.B PERF_COUNT_HW_BRANCH_INSTRUCTIONS
437Retired branch instructions.
747a6e7c 438Prior to Linux 2.6.35, this used
f2b1d720 439the wrong event on AMD processors.
747a6e7c 440.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
f2b1d720
MK
441.TP
442.B PERF_COUNT_HW_BRANCH_MISSES
443Mispredicted branch instructions.
444.TP
445.B PERF_COUNT_HW_BUS_CYCLES
446Bus cycles, which can be different from total cycles.
447.TP
31c1f2b0 448.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (since Linux 3.0)"
747a6e7c 449.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
f2b1d720
MK
450Stalled cycles during issue.
451.TP
31c1f2b0 452.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (since Linux 3.0)"
747a6e7c 453.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
f2b1d720
MK
454Stalled cycles during retirement.
455.TP
31c1f2b0 456.BR PERF_COUNT_HW_REF_CPU_CYCLES " (since Linux 3.3)"
60dafbc1 457.\" commit c37e17497e01fc0f5d2d6feb5723b210b3ab8890
f2b1d720
MK
458Total cycles; not affected by CPU frequency scaling.
459.RE
460.IP
461If
462.I type
463is
464.BR PERF_TYPE_SOFTWARE ,
465we are measuring software events provided by the kernel.
466Set
467.I config
468to one of the following:
469.RS 12
470.TP
471.B PERF_COUNT_SW_CPU_CLOCK
472This reports the CPU clock, a high-resolution per-CPU timer.
473.TP
474.B PERF_COUNT_SW_TASK_CLOCK
475This reports a clock count specific to the task that is running.
476.TP
477.B PERF_COUNT_SW_PAGE_FAULTS
478This reports the number of page faults.
479.TP
480.B PERF_COUNT_SW_CONTEXT_SWITCHES
481This counts context switches.
482Until Linux 2.6.34, these were all reported as user-space
483events, after that they are reported as happening in the kernel.
747a6e7c 484.\" commit e49a5bd38159dfb1928fd25b173bc9de4bbadb21
f2b1d720
MK
485.TP
486.B PERF_COUNT_SW_CPU_MIGRATIONS
487This reports the number of times the process
488has migrated to a new CPU.
489.TP
490.B PERF_COUNT_SW_PAGE_FAULTS_MIN
491This counts the number of minor page faults.
492These did not require disk I/O to handle.
493.TP
494.B PERF_COUNT_SW_PAGE_FAULTS_MAJ
495This counts the number of major page faults.
496These required disk I/O to handle.
497.TP
31c1f2b0 498.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (since Linux 2.6.33)"
60dafbc1 499.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
f2b1d720
MK
500This counts the number of alignment faults.
501These happen when unaligned memory accesses happen; the kernel
502can handle these but it reduces performance.
33a0ccb2 503This happens only on some architectures (never on x86).
f2b1d720 504.TP
31c1f2b0 505.BR PERF_COUNT_SW_EMULATION_FAULTS " (since Linux 2.6.33)"
60dafbc1 506.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
f2b1d720
MK
507This counts the number of emulation faults.
508The kernel sometimes traps on unimplemented instructions
7db515ef 509and emulates them for user space.
f2b1d720 510This can negatively impact performance.
dab38455 511.TP
31c1f2b0 512.BR PERF_COUNT_SW_DUMMY " (since Linux 3.12)"
60dafbc1 513.\" commit fa0097ee690693006ab1aea6c01ad3c851b65c77
dab38455
VW
514This is a placeholder event that counts nothing.
515Informational sample record types such as mmap or comm
516must be associated with an active event.
517This dummy event allows gathering such records without requiring
518a counting event.
f2b1d720 519.RE
f2b1d720 520
f2b1d720
MK
521.RS
522If
523.I type
524is
525.BR PERF_TYPE_TRACEPOINT ,
526then we are measuring kernel tracepoints.
527The value to use in
528.I config
529can be obtained from under debugfs
530.I tracing/events/*/*/id
531if ftrace is enabled in the kernel.
f2b1d720 532.RE
1f22e274 533
f2b1d720
MK
534.RS
535If
536.I type
537is
538.BR PERF_TYPE_HW_CACHE ,
539then we are measuring a hardware CPU cache event.
540To calculate the appropriate
541.I config
542value use the following equation:
543.RS 4
544.nf
545
546 (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
547 (perf_hw_cache_op_result_id << 16)
548.fi
549.P
550where
551.I perf_hw_cache_id
552is one of:
7db515ef 553.RS 4
f2b1d720
MK
554.TP
555.B PERF_COUNT_HW_CACHE_L1D
556for measuring Level 1 Data Cache
557.TP
558.B PERF_COUNT_HW_CACHE_L1I
559for measuring Level 1 Instruction Cache
560.TP
561.B PERF_COUNT_HW_CACHE_LL
562for measuring Last-Level Cache
563.TP
564.B PERF_COUNT_HW_CACHE_DTLB
565for measuring the Data TLB
566.TP
567.B PERF_COUNT_HW_CACHE_ITLB
568for measuring the Instruction TLB
569.TP
570.B PERF_COUNT_HW_CACHE_BPU
571for measuring the branch prediction unit
572.TP
5a69ce9c
MK
573.BR PERF_COUNT_HW_CACHE_NODE " (since Linux 3.1)"
574.\" commit 89d6c0b5bdbb1927775584dcf532d98b3efe1477
f2b1d720
MK
575for measuring local memory accesses
576.RE
f2b1d720
MK
577.P
578and
579.I perf_hw_cache_op_id
4af27572 580is one of:
7db515ef 581.RS 4
f2b1d720
MK
582.TP
583.B PERF_COUNT_HW_CACHE_OP_READ
584for read accesses
585.TP
586.B PERF_COUNT_HW_CACHE_OP_WRITE
587for write accesses
588.TP
589.B PERF_COUNT_HW_CACHE_OP_PREFETCH
590for prefetch accesses
591.RE
f2b1d720
MK
592.P
593and
594.I perf_hw_cache_op_result_id
4af27572 595is one of:
7db515ef 596.RS 4
f2b1d720
MK
597.TP
598.B PERF_COUNT_HW_CACHE_RESULT_ACCESS
599to measure accesses
600.TP
601.B PERF_COUNT_HW_CACHE_RESULT_MISS
602to measure misses
603.RE
604.RE
605
606If
607.I type
608is
609.BR PERF_TYPE_RAW ,
610then a custom "raw"
611.I config
612value is needed.
613Most CPUs support events that are not covered by the "generalized" events.
614These are implementation defined; see your CPU manual (for example
615the Intel Volume 3B documentation or the AMD BIOS and Kernel Developer
616Guide).
617The libpfm4 library can be used to translate from the name in the
618architectural manuals to the raw hex value
619.BR perf_event_open ()
620expects in this field.
621
622If
623.I type
624is
625.BR PERF_TYPE_BREAKPOINT ,
626then leave
627.I config
628set to zero.
629Its parameters are set in other places.
630.RE
631.TP
632.IR sample_period ", " sample_freq
21977c9d 633A "sampling" event is one that generates an overflow notification
f2b1d720
MK
634every N events, where N is given by
635.IR sample_period .
21977c9d 636A sampling event has
f2b1d720 637.IR sample_period " > 0."
21977c9d 638When an overflow occurs, requested data is recorded
f2b1d720
MK
639in the mmap buffer.
640The
641.I sample_type
21977c9d 642field controls what data is recorded on each overflow.
f2b1d720
MK
643
644.I sample_freq
645can be used if you wish to use frequency rather than period.
37bee118 646In this case, you set the
f2b1d720
MK
647.I freq
648flag.
649The kernel will adjust the sampling period
650to try and achieve the desired rate.
651The rate of adjustment is a
652timer tick.
f2b1d720
MK
653.TP
654.I "sample_type"
655The various bits in this field specify which values to include
656in the sample.
657They will be recorded in a ring-buffer,
ad73a2cc 658which is available to user space using
f2b1d720
MK
659.BR mmap (2).
660The order in which the values are saved in the
661sample are documented in the MMAP Layout subsection below;
662it is not the
663.I "enum perf_event_sample_format"
664order.
665.RS
666.TP
667.B PERF_SAMPLE_IP
668Records instruction pointer.
669.TP
670.B PERF_SAMPLE_TID
7db515ef 671Records the process and thread IDs.
f2b1d720
MK
672.TP
673.B PERF_SAMPLE_TIME
674Records a timestamp.
675.TP
676.B PERF_SAMPLE_ADDR
677Records an address, if applicable.
678.TP
679.B PERF_SAMPLE_READ
680Record counter values for all events in a group, not just the group leader.
681.TP
682.B PERF_SAMPLE_CALLCHAIN
683Records the callchain (stack backtrace).
684.TP
685.B PERF_SAMPLE_ID
686Records a unique ID for the opened event's group leader.
687.TP
688.B PERF_SAMPLE_CPU
689Records CPU number.
690.TP
691.B PERF_SAMPLE_PERIOD
692Records the current sampling period.
693.TP
694.B PERF_SAMPLE_STREAM_ID
695Records a unique ID for the opened event.
696Unlike
697.B PERF_SAMPLE_ID
698the actual ID is returned, not the group leader.
8859d3a9
DP
699This ID is the same as the one returned by
700.BR PERF_FORMAT_ID .
f2b1d720
MK
701.TP
702.B PERF_SAMPLE_RAW
703Records additional data, if applicable.
704Usually returned by tracepoint events.
705.TP
31c1f2b0 706.BR PERF_SAMPLE_BRANCH_STACK " (since Linux 3.4)"
60dafbc1 707.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
045bf4d3
VW
708This provides a record of recent branches, as provided
709by CPU branch sampling hardware (such as Intel Last Branch Record).
710Not all hardware supports this feature.
711
712See the
713.I branch_sample_type
714field for how to filter which branches are reported.
f2b1d720 715.TP
31c1f2b0 716.BR PERF_SAMPLE_REGS_USER " (since Linux 3.7)"
60dafbc1 717.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
d1007d14
VW
718Records the current user-level CPU register state
719(the values in the process before the kernel was called).
f2b1d720 720.TP
31c1f2b0 721.BR PERF_SAMPLE_STACK_USER " (since Linux 3.7)"
60dafbc1 722.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
d1007d14
VW
723Records the user level stack, allowing stack unwinding.
724.TP
31c1f2b0 725.BR PERF_SAMPLE_WEIGHT " (since Linux 3.10)"
60dafbc1 726.\" commit c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c
d1007d14 727Records a hardware provided weight value that expresses how
51700fd7 728costly the sampled event was.
d1007d14
VW
729This allows the hardware to highlight expensive events in
730a profile.
731.TP
31c1f2b0 732.BR PERF_SAMPLE_DATA_SRC " (since Linux 3.10)"
60dafbc1 733.\" commit d6be9ad6c960f43800a6f118932bc8a5a4eadcd1
d1007d14
VW
734Records the data source: where in the memory hierarchy
735the data associated with the sampled instruction came from.
6170255e 736This is available only if the underlying hardware
d1007d14 737supports this feature.
7480dabb 738.TP
31c1f2b0 739.BR PERF_SAMPLE_IDENTIFIER " (since Linux 3.12)"
60dafbc1 740.\" commit ff3d527cebc1fa3707c617bfe9e74f53fcfb0955
8859d3a9
DP
741Places the
742.B SAMPLE_ID
743value in a fixed position in the record,
7480dabb
VW
744either at the beginning (for sample events) or at the end
745(if a non-sample event).
746
747This was necessary because a sample stream may have
748records from various different event sources with different
749.I sample_type
750settings.
e9bd9b2c 751Parsing the event stream properly was not possible because the
8859d3a9
DP
752format of the record was needed to find
753.BR SAMPLE_ID ,
754but
27f52b52 755the format could not be found without knowing what
7480dabb
VW
756event the sample belonged to (causing a circular
757dependency).
758
e41c36b2 759The
7480dabb
VW
760.B PERF_SAMPLE_IDENTIFIER
761setting makes the event stream always parsable
8859d3a9
DP
762by putting
763.B SAMPLE_ID
764in a fixed location, even though
765it means having duplicate
766.B SAMPLE_ID
767values in records.
1e043959 768.TP
60dafbc1
MK
769.BR PERF_SAMPLE_TRANSACTION " (since Linux 3.13)"
770.\" commit fdfbbd07e91f8fe387140776f3fd94605f0c89e5
84fc2a6e 771Records reasons for transactional memory abort events
1e043959
VW
772(for example, from Intel TSX transactional memory support).
773
774The
775.I precise_ip
b3f39642 776setting must be greater than 0 and a transactional memory abort
1e043959 777event must be measured or no values will be recorded.
84fc2a6e
MK
778Also note that some perf_event measurements, such as sampled
779cycle counting, may cause extraneous aborts (by causing an
1e043959 780interrupt during a transaction).
f5281dfd
VW
781.TP
782.BR PERF_SAMPLE_REGS_INTR " (since Linux 3.19)"
783.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
784Records a subset of the current CPU register state
785as specified by
786.IR sample_regs_intr .
787Unlike
788.B PERF_SAMPLE_REGS_USER
789the register values will return kernel register
790state if the overflow happened while kernel
791code is running.
792If the CPU supports hardware sampling of
b01ae37b 793register state (i.e., PEBS on Intel x86) and
f5281dfd
VW
794.I precise_ip
795is set higher than zero then the register
796values returned are those captured by
797hardware at the time of the sampled
798instruction's retirement.
f2b1d720 799.RE
f2b1d720
MK
800.TP
801.IR "read_format"
802This field specifies the format of the data returned by
803.BR read (2)
804on a
7db515ef 805.BR perf_event_open ()
f2b1d720
MK
806file descriptor.
807.RS
808.TP
809.B PERF_FORMAT_TOTAL_TIME_ENABLED
7ede2f66
DP
810Adds the 64-bit
811.I time_enabled
812field.
f2b1d720
MK
813This can be used to calculate estimated totals if
814the PMU is overcommitted and multiplexing is happening.
815.TP
816.B PERF_FORMAT_TOTAL_TIME_RUNNING
7ede2f66
DP
817Adds the 64-bit
818.I time_running
819field.
f2b1d720 820This can be used to calculate estimated totals if
3d1ee497 821the PMU is overcommitted and multiplexing is happening.
f2b1d720
MK
822.TP
823.B PERF_FORMAT_ID
824Adds a 64-bit unique value that corresponds to the event group.
825.TP
826.B PERF_FORMAT_GROUP
827Allows all counter values in an event group to be read with one read.
828.RE
f2b1d720
MK
829.TP
830.IR "disabled"
831The
832.I disabled
833bit specifies whether the counter starts out disabled or enabled.
834If disabled, the event can later be enabled by
835.BR ioctl (2),
836.BR prctl (2),
837or
838.IR enable_on_exec .
406650db
VW
839
840When creating an event group, typically the group leader is initialized
841with
842.I disabled
843set to 1 and any child events are initialized with
844.I disabled
845set to 0.
846Despite
847.I disabled
848being 0, the child events will not start until the group leader
849is enabled.
f2b1d720
MK
850.TP
851.IR "inherit"
852The
853.I inherit
854bit specifies that this counter should count events of child
855tasks as well as the task specified.
33a0ccb2 856This applies only to new children, not to any existing children at
f2b1d720
MK
857the time the counter is created (nor to any new children of
858existing children).
859
860Inherit does not work for some combinations of
4b3a5f01
MK
861.IR read_format
862values, such as
f2b1d720 863.BR PERF_FORMAT_GROUP .
f2b1d720
MK
864.TP
865.IR "pinned"
866The
867.I pinned
868bit specifies that the counter should always be on the CPU if at all
869possible.
33a0ccb2 870It applies only to hardware counters and only to group leaders.
f2b1d720
MK
871If a pinned counter cannot be put onto the CPU (e.g., because there are
872not enough hardware counters or because of a conflict with some other
873event), then the counter goes into an 'error' state, where reads
874return end-of-file (i.e.,
875.BR read (2)
876returns 0) until the counter is subsequently enabled or disabled.
f2b1d720
MK
877.TP
878.IR "exclusive"
879The
880.I exclusive
881bit specifies that when this counter's group is on the CPU,
882it should be the only group using the CPU's counters.
883In the future this may allow monitoring programs to
884support PMU features that need to run alone so that they do not
885disrupt other hardware counters.
bea10c8c
VW
886
887Note that many unexpected situations may prevent events with the
888.I exclusive
d3532647 889bit set from ever running.
bea10c8c 890This includes any users running a system-wide
d3532647 891measurement as well as any kernel use of the performance counters
bea10c8c 892(including the commonly enabled NMI Watchdog Timer interface).
f2b1d720
MK
893.TP
894.IR "exclude_user"
ad73a2cc 895If this bit is set, the count excludes events that happen in user space.
f2b1d720
MK
896.TP
897.IR "exclude_kernel"
edb3e316 898If this bit is set, the count excludes events that happen in kernel space.
f2b1d720
MK
899.TP
900.IR "exclude_hv"
901If this bit is set, the count excludes events that happen in the
902hypervisor.
903This is mainly for PMUs that have built-in support for handling this
904(such as POWER).
905Extra support is needed for handling hypervisor measurements on most
906machines.
f2b1d720
MK
907.TP
908.IR "exclude_idle"
909If set, don't count when the CPU is idle.
f2b1d720
MK
910.TP
911.IR "mmap"
912The
913.I mmap
75ee11e5 914bit enables generation of
cd7c700a 915.B PERF_RECORD_MMAP
75ee11e5
VW
916samples for every
917.BR mmap (2)
918call that has
cd7c700a 919.B PROT_EXEC
75ee11e5
VW
920set.
921This allows tools to notice new executable code being mapped into
922a program (dynamic shared libraries for example)
923so that addresses can be mapped back to the original code.
f2b1d720
MK
924.TP
925.IR "comm"
926The
927.I comm
928bit enables tracking of process command name as modified by the
cd7c700a 929.BR exec (2)
f2b1d720 930and
cd7c700a 931.BR prctl (PR_SET_NAME)
49bc411c
VW
932system calls as well as writing to
933.IR /proc/self/comm .
790ee6d6 934If the
49bc411c 935.I comm_exec
790ee6d6 936flag is also successfully set (possible since Linux 3.16),
747a6e7c 937.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
49bc411c
VW
938then the misc flag
939.B PERF_RECORD_MISC_COMM_EXEC
940can be used to differentiate the
941.BR exec (2)
942case from the others.
f2b1d720
MK
943.TP
944.IR "freq"
945If this bit is set, then
946.I sample_frequency
947not
948.I sample_period
949is used when setting up the sampling interval.
f2b1d720
MK
950.TP
951.IR "inherit_stat"
952This bit enables saving of event counts on context switch for
953inherited tasks.
33a0ccb2 954This is meaningful only if the
f2b1d720
MK
955.I inherit
956field is set.
f2b1d720
MK
957.TP
958.IR "enable_on_exec"
959If this bit is set, a counter is automatically
960enabled after a call to
961.BR exec (2).
f2b1d720
MK
962.TP
963.IR "task"
964If this bit is set, then
965fork/exit notifications are included in the ring buffer.
f2b1d720
MK
966.TP
967.IR "watermark"
21977c9d 968If set, have an overflow notification happen when we cross the
f2b1d720
MK
969.I wakeup_watermark
970boundary.
21977c9d 971Otherwise, overflow notifications happen after
f2b1d720
MK
972.I wakeup_events
973samples.
f2b1d720 974.TP
31c1f2b0 975.IR "precise_ip" " (since Linux 2.6.35)"
747a6e7c 976.\" commit ab608344bcbde4f55ec4cd911b686b0ce3eae076
f2b1d720
MK
977This controls the amount of skid.
978Skid is how many instructions
979execute between an event of interest happening and the kernel
980being able to stop and record the event.
981Smaller skid is
982better and allows more accurate reporting of which events
983correspond to which instructions, but hardware is often limited
984with how small this can be.
985
5d73bc3f 986The possible values of this field are the following:
f2b1d720 987.RS
dc9ec146 988.IP 0 3
f2b1d720 989.B SAMPLE_IP
2b538c3e 990can have arbitrary skid.
dc9ec146 991.IP 1
f2b1d720 992.B SAMPLE_IP
2b538c3e 993must have constant skid.
dc9ec146 994.IP 2
f2b1d720 995.B SAMPLE_IP
2b538c3e 996requested to have 0 skid.
dc9ec146 997.IP 3
f2b1d720
MK
998.B SAMPLE_IP
999must have 0 skid.
5d73bc3f 1000See also the description of
f2b1d720
MK
1001.BR PERF_RECORD_MISC_EXACT_IP .
1002.RE
f2b1d720 1003.TP
31c1f2b0 1004.IR "mmap_data" " (since Linux 2.6.36)"
747a6e7c 1005.\" commit 3af9e859281bda7eb7c20b51879cf43aa788ac2e
b01ae37b 1006This is the counterpart of the
f2b1d720 1007.I mmap
75ee11e5
VW
1008field.
1009This enables generation of
cd7c700a 1010.B PERF_RECORD_MMAP
75ee11e5
VW
1011samples for
1012.BR mmap (2)
1013calls that do not have
cd7c700a 1014.B PROT_EXEC
75ee11e5 1015set (for example data and SysV shared memory).
f2b1d720 1016.TP
31c1f2b0 1017.IR "sample_id_all" " (since Linux 2.6.38)"
747a6e7c 1018.\" commit c980d1091810df13f21aabbce545fd98f545bbf7
7480dabb 1019If set, then TID, TIME, ID, STREAM_ID, and CPU can
f2b1d720
MK
1020additionally be included in
1021.RB non- PERF_RECORD_SAMPLE s
1022if the corresponding
1023.I sample_type
1024is selected.
7480dabb 1025
e9bd9b2c 1026If
7480dabb 1027.B PERF_SAMPLE_IDENTIFIER
37bee118 1028is specified, then an additional ID value is included
7480dabb
VW
1029as the last value to ease parsing the record stream.
1030This may lead to the
e9bd9b2c 1031.I id
7480dabb
VW
1032value appearing twice.
1033
1034The layout is described by this pseudo-structure:
dc9ec146 1035
7480dabb
VW
1036.in +4n
1037.nf
1038struct sample_id {
26d5cd2f
MK
1039 { u32 pid, tid; } /* if PERF_SAMPLE_TID set */
1040 { u64 time; } /* if PERF_SAMPLE_TIME set */
1041 { u64 id; } /* if PERF_SAMPLE_ID set */
7480dabb 1042 { u64 stream_id;} /* if PERF_SAMPLE_STREAM_ID set */
26d5cd2f 1043 { u32 cpu, res; } /* if PERF_SAMPLE_CPU set */
7480dabb
VW
1044 { u64 id; } /* if PERF_SAMPLE_IDENTIFIER set */
1045};
1046.fi
f2b1d720 1047.TP
31c1f2b0 1048.IR "exclude_host" " (since Linux 3.2)"
747a6e7c 1049.\" commit a240f76165e6255384d4bdb8139895fac7988799
e38fb93e 1050When conducting measurements that include processes running
5d73bc3f
MK
1051VM instances (i.e., have executed a
1052.B KVM_RUN
1053.BR ioctl (2)),
1054only measure events happening inside a guest instance.
e38fb93e
VW
1055This is only meaningful outside the guests; this setting does
1056not change counts gathered inside of a guest.
34d4e61d 1057Currently, this functionality is x86 only.
f2b1d720 1058.TP
31c1f2b0 1059.IR "exclude_guest" " (since Linux 3.2)"
747a6e7c 1060.\" commit a240f76165e6255384d4bdb8139895fac7988799
e38fb93e 1061When conducting measurements that include processes running
5d73bc3f
MK
1062VM instances (i.e., have executed a
1063.B KVM_RUN
1064.BR ioctl (2)),
1065do not measure events happening inside guest instances.
e38fb93e
VW
1066This is only meaningful outside the guests; this setting does
1067not change counts gathered inside of a guest.
34d4e61d 1068Currently, this functionality is x86 only.
f2b1d720 1069.TP
31c1f2b0 1070.IR "exclude_callchain_kernel" " (since Linux 3.7)"
747a6e7c 1071.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
f2b1d720 1072Do not include kernel callchains.
f2b1d720 1073.TP
31c1f2b0 1074.IR "exclude_callchain_user" " (since Linux 3.7)"
747a6e7c 1075.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
f2b1d720 1076Do not include user callchains.
f2b1d720 1077.TP
9bfc542b 1078.IR "mmap2" " (since Linux 3.16)"
747a6e7c
VW
1079.\" commit 13d7a2410fa637f450a29ecb515ac318ee40c741
1080.\" This is tricky; was committed during 3.12 development
1081.\" but right before release was disabled.
1082.\" So while you could select mmap2 starting with 3.12
1083.\" it did not work until 3.16
1084.\" commit a5a5ba72843dd05f991184d6cb9a4471acce1005
9bfc542b
VW
1085Generate an extended executable mmap record that contains enough
1086additional information to uniquely identify shared mappings.
1087The
1088.I mmap
1089flag must also be set for this to work.
1090.TP
49bc411c 1091.IR "comm_exec" " (since Linux 3.16)"
747a6e7c 1092.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
5ab35ae5 1093This is purely a feature-detection flag, it does not change
49bc411c 1094kernel behavior.
5ab35ae5 1095If this flag can successfully be set, then, when
49bc411c 1096.I comm
5ab35ae5 1097is enabled, the
49bc411c
VW
1098.B PERF_RECORD_MISC_COMM_EXEC
1099flag will be set in the
1100.I misc
1101field of a comm record header if the rename event being
1102reported was caused by a call to
1103.BR exec (2).
1104This allows tools to distinguish between the various
1105types of process renaming.
1106.TP
6bd5186a
VW
1107.IR "use_clockid" " (since Linux 4.1)"
1108.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
1109This allows selecting which internal Linux clock to use
1110when generating timestamps via the
1111.I clockid
1112field.
1113This can make it easier to correlate perf sample times with
1114timestamps generated by other tools.
1115.TP
9277a75d
VW
1116.IR "context_switch" " (since Linux 4.3)"
1117.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
1118This enables the generation of
1119.B PERF_RECORD_SWITCH
1120records when a context switch occurs.
1121It also enables the generation of
1122.B PERF_RECORD_SWITCH_CPU_WIDE
d5a24378 1123records when sampling in CPU-wide mode.
9277a75d
VW
1124This functionality is in addition to existing tracepoint and
1125software events for measuring context switches.
54905b0f
MK
1126The advantage of this method is that it will give full
1127information even with strict
9277a75d
VW
1128.I perf_event_paranoid
1129settings.
1130.TP
f2b1d720
MK
1131.IR "wakeup_events" ", " "wakeup_watermark"
1132This union sets how many samples
1133.RI ( wakeup_events )
1134or bytes
1135.RI ( wakeup_watermark )
21977c9d 1136happen before an overflow notification happens.
f2b1d720
MK
1137Which one is used is selected by the
1138.I watermark
cb8a928f 1139bit flag.
751c0f1a
VW
1140
1141.I wakeup_events
6170255e 1142counts only
751c0f1a 1143.B PERF_RECORD_SAMPLE
51700fd7 1144record types.
21977c9d 1145To receive overflow notification for all
751c0f1a 1146.B PERF_RECORD
21977c9d 1147types choose watermark and set
751c0f1a
VW
1148.I wakeup_watermark
1149to 1.
21977c9d 1150
fc79d996 1151Prior to Linux 3.0, setting
747a6e7c 1152.\" commit f506b3dc0ec454a16d40cab9ee5d75435b39dc50
21977c9d
VW
1153.I wakeup_events
1154to 0 resulted in no overflow notifications;
1155more recent kernels treat 0 the same as 1.
f2b1d720 1156.TP
31c1f2b0 1157.IR "bp_type" " (since Linux 2.6.33)"
747a6e7c 1158.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
1159This chooses the breakpoint type.
1160It is one of:
1161.RS
1162.TP
1163.BR HW_BREAKPOINT_EMPTY
2b538c3e 1164No breakpoint.
f2b1d720
MK
1165.TP
1166.BR HW_BREAKPOINT_R
2b538c3e 1167Count when we read the memory location.
f2b1d720
MK
1168.TP
1169.BR HW_BREAKPOINT_W
2b538c3e 1170Count when we write the memory location.
f2b1d720
MK
1171.TP
1172.BR HW_BREAKPOINT_RW
2b538c3e 1173Count when we read or write the memory location.
f2b1d720
MK
1174.TP
1175.BR HW_BREAKPOINT_X
2b538c3e 1176Count when we execute code at the memory location.
f2b1d720 1177.LP
7db515ef 1178The values can be combined via a bitwise or, but the
f2b1d720
MK
1179combination of
1180.B HW_BREAKPOINT_R
1181or
1182.B HW_BREAKPOINT_W
1183with
1184.B HW_BREAKPOINT_X
1185is not allowed.
1186.RE
f2b1d720 1187.TP
31c1f2b0 1188.IR "bp_addr" " (since Linux 2.6.33)"
747a6e7c 1189.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
5d73bc3f 1190This is the address of the breakpoint.
4b3a5f01
MK
1191For execution breakpoints, this is the memory address of the instruction
1192of interest; for read and write breakpoints, it is the memory address
f2b1d720 1193of the memory location of interest.
f2b1d720 1194.TP
31c1f2b0 1195.IR "config1" " (since Linux 2.6.39)"
747a6e7c 1196.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
f2b1d720
MK
1197.I config1
1198is used for setting events that need an extra register or otherwise
1199do not fit in the regular config field.
1200Raw OFFCORE_EVENTS on Nehalem/Westmere/SandyBridge use this field
4b3a5f01 1201on Linux 3.3 and later kernels.
f2b1d720 1202.TP
31c1f2b0 1203.IR "bp_len" " (since Linux 2.6.33)"
747a6e7c 1204.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
1205.I bp_len
1206is the length of the breakpoint being measured if
1207.I type
1208is
1209.BR PERF_TYPE_BREAKPOINT .
1210Options are
1211.BR HW_BREAKPOINT_LEN_1 ,
1212.BR HW_BREAKPOINT_LEN_2 ,
1213.BR HW_BREAKPOINT_LEN_4 ,
4b3a5f01 1214and
f2b1d720
MK
1215.BR HW_BREAKPOINT_LEN_8 .
1216For an execution breakpoint, set this to
1217.IR sizeof(long) .
f2b1d720 1218.TP
31c1f2b0 1219.IR "config2" " (since Linux 2.6.39)"
747a6e7c 1220.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
f2b1d720
MK
1221.I config2
1222is a further extension of the
1223.I config1
1224field.
f2b1d720 1225.TP
31c1f2b0 1226.IR "branch_sample_type" " (since Linux 3.4)"
747a6e7c 1227.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
8a94e783 1228If
045bf4d3
VW
1229.B PERF_SAMPLE_BRANCH_STACK
1230is enabled, then this specifies what branches to include
1231in the branch record.
e3c9782b
VW
1232
1233The first part of the value is the privilege level, which
4b3a5f01 1234is a combination of one of the values listed below.
045bf4d3
VW
1235If the user does not set privilege level explicitly, the kernel
1236will use the event's privilege level.
1237Event and branch privilege levels do not have to match.
f2b1d720
MK
1238.RS
1239.TP
1240.B PERF_SAMPLE_BRANCH_USER
33d6e2c7 1241Branch target is in user space.
f2b1d720
MK
1242.TP
1243.B PERF_SAMPLE_BRANCH_KERNEL
33d6e2c7 1244Branch target is in kernel space.
f2b1d720
MK
1245.TP
1246.B PERF_SAMPLE_BRANCH_HV
33d6e2c7 1247Branch target is in hypervisor.
e3c9782b
VW
1248.TP
1249.B PERF_SAMPLE_BRANCH_PLM_ALL
1250A convenience value that is the three preceding values ORed together.
e3c9782b
VW
1251.P
1252In addition to the privilege value, at least one or more of the
1253following bits must be set.
f2b1d720
MK
1254.TP
1255.B PERF_SAMPLE_BRANCH_ANY
33d6e2c7 1256Any branch type.
f2b1d720
MK
1257.TP
1258.B PERF_SAMPLE_BRANCH_ANY_CALL
c6e5df74 1259Any call branch (includes direct calls, indirect calls, and far jumps).
f2b1d720 1260.TP
e3c9782b 1261.B PERF_SAMPLE_BRANCH_IND_CALL
33d6e2c7 1262Indirect calls.
f2b1d720 1263.TP
c6e5df74
VW
1264.BR PERF_SAMPLE_BRANCH_CALL " (since Linux 4.4)"
1265.\" commit c229bf9dc179d2023e185c0f705bdf68484c1e73
1266Direct calls.
1267.TP
1268.B PERF_SAMPLE_BRANCH_ANY_RETURN
1269Any return branch.
1270.TP
dde354c9
VW
1271.BR PERF_SAMPLE_BRANCH_IND_JUMP " (since Linux 4.2)"
1272.\" commit c9fdfa14c3792c0160849c484e83aa57afd80ccc
1273Indirect jumps.
1274.TP
aea60aad 1275.BR PERF_SAMPLE_BRANCH_COND " (since Linux 3.16)"
60dafbc1 1276.\" commit bac52139f0b7ab31330e98fd87fc5a2664951050
aea60aad
VW
1277Conditional branches.
1278.TP
31c1f2b0 1279.BR PERF_SAMPLE_BRANCH_ABORT_TX " (since Linux 3.11)"
60dafbc1 1280.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1281Transactional memory aborts.
e3c9782b 1282.TP
31c1f2b0 1283.BR PERF_SAMPLE_BRANCH_IN_TX " (since Linux 3.11)"
60dafbc1 1284.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1285Branch in transactional memory transaction.
e3c9782b 1286.TP
31c1f2b0 1287.BR PERF_SAMPLE_BRANCH_NO_TX " (since Linux 3.11)"
60dafbc1 1288.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1289Branch not in transactional memory transaction.
bb7e6ff0
VW
1290.BR PERF_SAMPLE_BRANCH_CALL_STACK " (since Linux 4.1)"
1291.\" commit 2c44b1936bb3b135a3fac8b3493394d42e51cf70
95655a22 1292Branch is part of a hardware-generated call stack.
bb7e6ff0
VW
1293This requires hardware support, currently only found
1294on Intel x86 Haswell or newer.
f2b1d720 1295.RE
f2b1d720 1296.TP
31c1f2b0 1297.IR "sample_regs_user" " (since Linux 3.7)"
747a6e7c 1298.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
4651e412 1299This bit mask defines the set of user CPU registers to dump on samples.
76c637e1 1300The layout of the register mask is architecture-specific and
4b3a5f01 1301is described in the kernel header file
d1007d14 1302.IR arch/ARCH/include/uapi/asm/perf_regs.h .
f2b1d720 1303.TP
31c1f2b0 1304.IR "sample_stack_user" " (since Linux 3.7)"
747a6e7c 1305.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
d1007d14
VW
1306This defines the size of the user stack to dump if
1307.B PERF_SAMPLE_STACK_USER
1308is specified.
6bd5186a
VW
1309.TP
1310.IR "clockid" " (since Linux 4.1)"
1311.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
1312If
1313.I use_clockid
1314is set, then this field selects which internal Linux timer to
1315use for timestamps.
1316The available timers are defined in
1317.IR linux/time.h ,
1318with
95655a22
MK
1319.BR CLOCK_MONOTONIC ,
1320.BR CLOCK_MONOTONIC_RAW ,
1321.BR CLOCK_REALTIME ,
1322.BR CLOCK_BOOTTIME ,
1323and
1324.B CLOCK_TAI
6bd5186a 1325currently supported.
cdc52f4a
VW
1326.TP
1327.IR "aux_watermark" " (since Linux 4.1)"
1328.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
1329This specifies how much data is required to trigger a
1330.B PERF_RECORD_AUX
1331sample.
fd133d5d
VW
1332.TP
1333.IR "sample_max_stack" " (since Linux 4.8)"
1334.\" commit 97c79a38cd454602645f0470ffb444b3b75ce574
1335When
1336.I sample_type
1337includes
5dd3feec 1338.BR PERF_SAMPLE_CALLCHAIN ,
4b3a5f01 1339this field specifies how many stack frames to report when
fd133d5d 1340generating the callchain.
73d8cece 1341.SS Reading results
f2b1d720 1342Once a
7db515ef 1343.BR perf_event_open ()
3d1ee497 1344file descriptor has been opened, the values
f2b1d720
MK
1345of the events can be read from the file descriptor.
1346The values that are there are specified by the
1347.I read_format
7db515ef
MK
1348field in the
1349.I attr
1350structure at open time.
f2b1d720
MK
1351
1352If you attempt to read into a buffer that is not big enough to hold the
4b3a5f01 1353data, the error
f2b1d720 1354.B ENOSPC
4b3a5f01 1355results.
f2b1d720
MK
1356
1357Here is the layout of the data returned by a read:
e525b89f 1358.IP * 2
f2b1d720
MK
1359If
1360.B PERF_FORMAT_GROUP
1361was specified to allow reading all events in a group at once:
1362
1363.in +4n
1364.nf
1365struct read_format {
e525b89f
MK
1366 u64 nr; /* The number of events */
1367 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1368 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
e307112d 1369 struct {
e525b89f
MK
1370 u64 value; /* The value of the event */
1371 u64 id; /* if PERF_FORMAT_ID */
f2b1d720
MK
1372 } values[nr];
1373};
1374.fi
1375.in
e525b89f 1376.IP *
f2b1d720
MK
1377If
1378.B PERF_FORMAT_GROUP
1379was
1380.I not
e525b89f 1381specified:
f2b1d720
MK
1382
1383.in +4n
1384.nf
1385struct read_format {
1386 u64 value; /* The value of the event */
1387 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1388 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
1389 u64 id; /* if PERF_FORMAT_ID */
1390};
1391.fi
1392.in
e525b89f
MK
1393.PP
1394The values read are as follows:
f2b1d720
MK
1395.TP
1396.I nr
1397The number of events in this file descriptor.
fcc4f4f4 1398Available only if
f2b1d720
MK
1399.B PERF_FORMAT_GROUP
1400was specified.
f2b1d720
MK
1401.TP
1402.IR time_enabled ", " time_running
1403Total time the event was enabled and running.
4b3a5f01 1404Normally these values are the same.
37bee118
MK
1405If more events are started,
1406then available counter slots on the PMU, then multiplexing
33a0ccb2 1407happens and events run only part of the time.
37bee118 1408In that case, the
f2b1d720
MK
1409.I time_enabled
1410and
1411.I time running
1412values can be used to scale an estimated value for the count.
f2b1d720
MK
1413.TP
1414.I value
1415An unsigned 64-bit value containing the counter result.
f2b1d720
MK
1416.TP
1417.I id
4b3a5f01 1418A globally unique value for this particular event; only present if
f2b1d720 1419.B PERF_FORMAT_ID
e525b89f
MK
1420was specified in
1421.IR read_format .
73d8cece 1422.SS MMAP layout
f2b1d720 1423When using
7db515ef 1424.BR perf_event_open ()
f2b1d720
MK
1425in sampled mode, asynchronous events
1426(like counter overflow or
1427.B PROT_EXEC
1428mmap tracking)
1429are logged into a ring-buffer.
1430This ring-buffer is created and accessed through
1431.BR mmap (2).
1432
1433The mmap size should be 1+2^n pages, where the first page is a
1434metadata page
e525b89f 1435.RI ( "struct perf_event_mmap_page" )
f2b1d720
MK
1436that contains various
1437bits of information such as where the ring-buffer head is.
1438
95655a22 1439Before kernel 2.6.39, there is a bug that means you must allocate an mmap
f2b1d720
MK
1440ring buffer when sampling even if you do not plan to access it.
1441
1442The structure of the first metadata mmap page is as follows:
1443
1444.in +4n
1445.nf
1446struct perf_event_mmap_page {
ce88f77b
MK
1447 __u32 version; /* version number of this structure */
1448 __u32 compat_version; /* lowest version this is compat with */
1449 __u32 lock; /* seqlock for synchronization */
1450 __u32 index; /* hardware counter identifier */
1451 __s64 offset; /* add to hardware counter value */
1452 __u64 time_enabled; /* time event active */
1453 __u64 time_running; /* time event on CPU */
f2b1d720
MK
1454 union {
1455 __u64 capabilities;
135cba8b 1456 struct {
ce88f77b
MK
1457 __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1,
1458 cap_bit0_is_deprecated : 1,
1459 cap_user_rdpmc : 1,
1460 cap_user_time : 1,
1461 cap_user_time_zero : 1,
135cba8b 1462 };
f2b1d720 1463 };
ce88f77b
MK
1464 __u16 pmc_width;
1465 __u16 time_shift;
1466 __u32 time_mult;
1467 __u64 time_offset;
1468 __u64 __reserved[120]; /* Pad to 1k */
1469 __u64 data_head; /* head in the data section */
1470 __u64 data_tail; /* user-space written tail */
21d9849a
VW
1471 __u64 data_offset; /* where the buffer starts */
1472 __u64 data_size; /* data buffer size */
4e47c6e5
VW
1473 __u64 aux_head;
1474 __u64 aux_tail;
1475 __u64 aux_offset;
1476 __u64 aux_size;
21d9849a 1477
f2b1d720
MK
1478}
1479.fi
1480.in
1481
ce88f77b 1482The following list describes the fields in the
f2b1d720 1483.I perf_event_mmap_page
e525b89f 1484structure in more detail:
f2b1d720
MK
1485.TP
1486.I version
1487Version number of this structure.
f2b1d720
MK
1488.TP
1489.I compat_version
1490The lowest version this is compatible with.
f2b1d720
MK
1491.TP
1492.I lock
1493A seqlock for synchronization.
f2b1d720
MK
1494.TP
1495.I index
1496A unique hardware counter identifier.
f2b1d720
MK
1497.TP
1498.I offset
135cba8b
VW
1499When using rdpmc for reads this offset value
1500must be added to the one returned by rdpmc to get
1501the current total event count.
f2b1d720
MK
1502.TP
1503.I time_enabled
1504Time the event was active.
f2b1d720
MK
1505.TP
1506.I time_running
1507Time the event was running.
f2b1d720 1508.TP
31c1f2b0 1509.IR cap_usr_time " / " cap_usr_rdpmc " / " cap_bit0 " (since Linux 3.4)"
747a6e7c 1510.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
e9bd9b2c 1511There was a bug in the definition of
f2b1d720 1512.I cap_usr_time
135cba8b
VW
1513and
1514.I cap_usr_rdpmc
1515from Linux 3.4 until Linux 3.11.
1516Both bits were defined to point to the same location, so it was
e9bd9b2c 1517impossible to know if
135cba8b
VW
1518.I cap_usr_time
1519or
1520.I cap_usr_rdpmc
1521were actually set.
1522
4010bc07 1523Starting with Linux 3.12, these are renamed to
747a6e7c 1524.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b 1525.I cap_bit0
e41c36b2 1526and you should use the
135cba8b
VW
1527.I cap_user_time
1528and
1529.I cap_user_rdpmc
1530fields instead.
f2b1d720 1531.TP
31c1f2b0 1532.IR cap_bit0_is_deprecated " (since Linux 3.12)"
747a6e7c 1533.\" commit fa7315871046b9a4c48627905691dbde57e51033
37bee118 1534If set, this bit indicates that the kernel supports
135cba8b
VW
1535the properly separated
1536.I cap_user_time
1537and
1538.I cap_user_rdpmc
1539bits.
1540
1541If not-set, it indicates an older kernel where
1542.I cap_usr_time
1543and
f2b1d720 1544.I cap_usr_rdpmc
135cba8b
VW
1545map to the same bit and thus both features should
1546be used with caution.
135cba8b 1547.TP
31c1f2b0 1548.IR cap_user_rdpmc " (since Linux 3.12)"
747a6e7c 1549.\" commit fa7315871046b9a4c48627905691dbde57e51033
f2b1d720
MK
1550If the hardware supports user-space read of performance counters
1551without syscall (this is the "rdpmc" instruction on x86), then
1552the following code can be used to do a read:
1553
1554.in +4n
1555.nf
1556u32 seq, time_mult, time_shift, idx, width;
1557u64 count, enabled, running;
1558u64 cyc, time_offset;
f2b1d720
MK
1559
1560do {
1561 seq = pc\->lock;
1562 barrier();
1563 enabled = pc\->time_enabled;
1564 running = pc\->time_running;
1565
1566 if (pc\->cap_usr_time && enabled != running) {
1567 cyc = rdtsc();
1568 time_offset = pc\->time_offset;
1569 time_mult = pc\->time_mult;
1570 time_shift = pc\->time_shift;
1571 }
1572
1573 idx = pc\->index;
1574 count = pc\->offset;
1575
1576 if (pc\->cap_usr_rdpmc && idx) {
1577 width = pc\->pmc_width;
135cba8b 1578 count += rdpmc(idx \- 1);
f2b1d720
MK
1579 }
1580
1581 barrier();
1582} while (pc\->lock != seq);
1583.fi
1584.in
f2b1d720 1585.TP
cc19ea28 1586.IR cap_user_time " (since Linux 3.12)"
747a6e7c 1587.\" commit fa7315871046b9a4c48627905691dbde57e51033
7d182bb6 1588This bit indicates the hardware has a constant, nonstop
135cba8b
VW
1589timestamp counter (TSC on x86).
1590.TP
31c1f2b0 1591.IR cap_user_time_zero " (since Linux 3.12)"
747a6e7c 1592.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b
VW
1593Indicates the presence of
1594.I time_zero
1595which allows mapping timestamp values to
1596the hardware clock.
1597.TP
f2b1d720
MK
1598.I pmc_width
1599If
1600.IR cap_usr_rdpmc ,
1601this field provides the bit-width of the value
1602read using the rdpmc or equivalent instruction.
1603This can be used to sign extend the result like:
1604
1605.in +4n
1606.nf
1607pmc <<= 64 \- pmc_width;
1608pmc >>= 64 \- pmc_width; // signed shift right
1609count += pmc;
1610.fi
1611.in
f2b1d720
MK
1612.TP
1613.IR time_shift ", " time_mult ", " time_offset
1614
1615If
1616.IR cap_usr_time ,
1617these fields can be used to compute the time
4b3a5f01
MK
1618delta since
1619.I time_enabled
1620(in nanoseconds) using rdtsc or similar.
f2b1d720
MK
1621.nf
1622
1623 u64 quot, rem;
1624 u64 delta;
1625 quot = (cyc >> time_shift);
988688f6 1626 rem = cyc & (((u64)1 << time_shift) \- 1);
f2b1d720
MK
1627 delta = time_offset + quot * time_mult +
1628 ((rem * time_mult) >> time_shift);
1629.fi
1630
7db515ef
MK
1631Where
1632.IR time_offset ,
1633.IR time_mult ,
1634.IR time_shift ,
1635and
1636.IR cyc
1637are read in the
f2b1d720
MK
1638seqcount loop described above.
1639This delta can then be added to
1640enabled and possible running (if idx), improving the scaling:
1641.nf
1642
1643 enabled += delta;
1644 if (idx)
1645 running += delta;
1646 quot = count / running;
1647 rem = count % running;
1648 count = quot * enabled + (rem * enabled) / running;
1649.fi
f2b1d720 1650.TP
31c1f2b0 1651.IR time_zero " (since Linux 3.12)"
747a6e7c 1652.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b 1653
e9bd9b2c 1654If
135cba8b 1655.I cap_usr_time_zero
37bee118 1656is set, then the hardware clock (the TSC timestamp counter on x86)
135cba8b
VW
1657can be calculated from the
1658.IR time_zero ", " time_mult ", and " time_shift " values:"
ce88f77b 1659
135cba8b
VW
1660.nf
1661 time = timestamp - time_zero;
1662 quot = time / time_mult;
1663 rem = time % time_mult;
1664 cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
1665.fi
ce88f77b 1666
135cba8b 1667And vice versa:
ce88f77b 1668
135cba8b
VW
1669.nf
1670 quot = cyc >> time_shift;
988688f6 1671 rem = cyc & (((u64)1 << time_shift) - 1);
135cba8b
VW
1672 timestamp = time_zero + quot * time_mult +
1673 ((rem * time_mult) >> time_shift);
1674.fi
1675.TP
f2b1d720
MK
1676.I data_head
1677This points to the head of the data section.
7db515ef
MK
1678The value continuously increases, it does not wrap.
1679The value needs to be manually wrapped by the size of the mmap buffer
f2b1d720
MK
1680before accessing the samples.
1681
ce88f77b
MK
1682On SMP-capable platforms, after reading the
1683.I data_head
1684value,
ad73a2cc 1685user space should issue an rmb().
f2b1d720 1686.TP
fecd584f 1687.I data_tail
f2b1d720
MK
1688When the mapping is
1689.BR PROT_WRITE ,
7db515ef
MK
1690the
1691.I data_tail
1692value should be written by user space to reflect the last read data.
31020de9 1693In this case, the kernel will not overwrite unread data.
21d9849a
VW
1694.TP
1695.IR data_offset " (since Linux 4.1)"
1696.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
1697Contains the offset of the location in the mmap buffer
1698where perf sample data begins.
1699.TP
1700.IR data_size " (since Linux 4.1)"
1701.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
1702Contains the size of the perf sample region within
1703the mmap buffer.
4e47c6e5
VW
1704.TP
1705.IR aux_head ", " aux_tail ", " aux_offset ", " aux_size " (since Linux 4.1)
1706.\" commit 45bfb2e50471abbbfd83d40d28c986078b0d24ff
95655a22
MK
1707The AUX region allows mmaping a separate sample buffer for
1708high-bandwidth data streams (separate from the main perf sample buffer).
1709An example of a high-bandwidth stream is instruction tracing support,
4e47c6e5
VW
1710as is found in newer Intel processors.
1711
1712To set up an AUX area, first
1713.I aux_offset
1714needs to be set with an offset greater than
1715.IR data_offset + data_size
1716and
1717.I aux_size
1718needs to be set to the desired buffer size.
1719The desired offset and size must be page aligned, and the size
1720must be a power of two.
1721These values are then passed to mmap in order to map the AUX buffer.
95655a22
MK
1722Pages in the AUX buffer are included as part of the
1723.BR RLIMIT_MEMLOCK
1724resource limit (see
1725.BR setrlimit (2)),
1726and also as part of the
4e47c6e5
VW
1727.I perf_event_mlock_kb
1728allowance.
1729
95655a22 1730By default, the AUX buffer will be truncated if it will not fit
b1355f6a
VW
1731in the available space in the ring buffer.
1732If the AUX buffer is mapped as a read only buffer, then it will
1733operate in ring buffer mode where old data will be overwritten
1734by new.
95655a22 1735In overwrite mode, it might not be possible to infer where the
b1355f6a
VW
1736new data began, and it is the consumer's job to disable
1737measurement while reading to avoid possible data races.
1738
4e47c6e5
VW
1739The
1740.IR aux_head " and " aux_tail
1741ring buffer pointers have the same behavior and ordering
1742rules as the previous described
1743.IR data_head " and " data_tail .
e525b89f 1744.PP
f2b1d720
MK
1745The following 2^n ring-buffer pages have the layout described below.
1746
1747If
1748.I perf_event_attr.sample_id_all
1749is set, then all event types will
1750have the sample_type selected fields related to where/when (identity)
1751an event took place (TID, TIME, ID, CPU, STREAM_ID) described in
1752.B PERF_RECORD_SAMPLE
1753below, it will be stashed just after the
7db515ef
MK
1754.I perf_event_header
1755and the fields already present for the existing
3d1ee497 1756fields, that is, at the end of the payload.
4b3a5f01
MK
1757This allows a newer perf.data
1758file to be supported by older perf tools, with the new optional
f2b1d720
MK
1759fields being ignored.
1760
1761The mmap values start with a header:
1762
1763.in +4n
1764.nf
1765struct perf_event_header {
1766 __u32 type;
1767 __u16 misc;
1768 __u16 size;
1769};
1770.fi
1771.in
1772
1773Below, we describe the
1774.I perf_event_header
1775fields in more detail.
4047bc6c
MK
1776For ease of reading,
1777the fields with shorter descriptions are presented first.
1778.TP
1779.I size
1780This indicates the size of the record.
1781.TP
1782.I misc
1783The
1784.I misc
1785field contains additional information about the sample.
1786
1787The CPU mode can be determined from this value by masking with
1788.B PERF_RECORD_MISC_CPUMODE_MASK
1789and looking for one of the following (note these are not
1790bit masks, only one can be set at a time):
1791.RS
1792.TP
1793.B PERF_RECORD_MISC_CPUMODE_UNKNOWN
1794Unknown CPU mode.
1795.TP
1796.B PERF_RECORD_MISC_KERNEL
1797Sample happened in the kernel.
1798.TP
1799.B PERF_RECORD_MISC_USER
1800Sample happened in user code.
1801.TP
1802.B PERF_RECORD_MISC_HYPERVISOR
1803Sample happened in the hypervisor.
1804.TP
747a6e7c 1805.BR PERF_RECORD_MISC_GUEST_KERNEL " (since Linux 2.6.35)"
60dafbc1 1806.\" commit 39447b386c846bbf1c56f6403c5282837486200f
4047bc6c
MK
1807Sample happened in the guest kernel.
1808.TP
747a6e7c 1809.B PERF_RECORD_MISC_GUEST_USER " (since Linux 2.6.35)"
60dafbc1 1810.\" commit 39447b386c846bbf1c56f6403c5282837486200f
4047bc6c
MK
1811Sample happened in guest user code.
1812.RE
1813
1814.RS
d5a24378
MK
1815Since the following three statuses are generated by
1816different record types, they alias to the same bit:
4047bc6c 1817.TP
60dafbc1
MK
1818.BR PERF_RECORD_MISC_MMAP_DATA " (since Linux 3.10)"
1819.\" commit 2fe85427e3bf65d791700d065132772fc26e4d75
4047bc6c
MK
1820This is set when the mapping is not executable;
1821otherwise the mapping is executable.
1822.TP
60dafbc1
MK
1823.BR PERF_RECORD_MISC_COMM_EXEC " (since Linux 3.16)"
1824.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
49bc411c
VW
1825This is set for a
1826.B PERF_RECORD_COMM
1827record on kernels more recent than Linux 3.16
1828if a process name change was caused by an
1829.BR exec (2)
1830system call.
9277a75d
VW
1831.TP
1832.BR PERF_RECORD_MISC_SWITCH_OUT " (since Linux 4.3)"
1833.\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
1834When a
d5a24378
MK
1835.BR PERF_RECORD_SWITCH
1836or
1837.BR PERF_RECORD_SWITCH_CPU_WIDE
1838record is generated, this bit indicates that the
9277a75d 1839context switch is away from the current process
d5a24378 1840(instead of into the current process).
9277a75d
VW
1841.RE
1842
1843.RS
1844In addition, the following bits can be set:
49bc411c 1845.TP
4047bc6c
MK
1846.B PERF_RECORD_MISC_EXACT_IP
1847This indicates that the content of
1848.B PERF_SAMPLE_IP
1849points
1850to the actual instruction that triggered the event.
1851See also
1852.IR perf_event_attr.precise_ip .
1853.TP
60dafbc1
MK
1854.BR PERF_RECORD_MISC_EXT_RESERVED " (since Linux 2.6.35)"
1855.\" commit 1676b8a077c352085d52578fb4f29350b58b6e74
4047bc6c 1856This indicates there is extended data available (currently not used).
ffbc7c02
VW
1857.TP
1858.B PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT
1859.\" commit 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4
1860This bit is not set by the kernel.
141efa1b
MK
1861It is reserved for the user-space perf utility to indicate that
1862.I /proc/i[pid]/maps
1863parsing was taking too long and was stopped, and thus the mmap
ffbc7c02 1864records may be truncated.
4047bc6c 1865.RE
f2b1d720
MK
1866.TP
1867.I type
1868The
1869.I type
1870value is one of the below.
1871The values in the corresponding record (that follows the header)
1872depend on the
1873.I type
1874selected as shown.
f2b1d720 1875.RS
7db515ef 1876.TP 4
f2b1d720
MK
1877.B PERF_RECORD_MMAP
1878The MMAP events record the
1879.B PROT_EXEC
1880mappings so that we can correlate
ad73a2cc 1881user-space IPs to code.
f2b1d720
MK
1882They have the following structure:
1883
1884.in +4n
1885.nf
1886struct {
1887 struct perf_event_header header;
1888 u32 pid, tid;
1889 u64 addr;
1890 u64 len;
1891 u64 pgoff;
1892 char filename[];
1893};
1894.fi
1895.in
9bfc542b
VW
1896.RS
1897.TP
1898.I pid
3a058284 1899is the process ID.
9bfc542b
VW
1900.TP
1901.I tid
3a058284 1902is the thread ID.
9bfc542b
VW
1903.TP
1904.I addr
1905is the address of the allocated memory.
1906.I len
1907is the length of the allocated memory.
1908.I pgoff
1909is the page offset of the allocated memory.
1910.I filename
1911is a string describing the backing of the allocated memory.
1912.RE
f2b1d720
MK
1913.TP
1914.B PERF_RECORD_LOST
1915This record indicates when events are lost.
1916
1917.in +4n
1918.nf
1919struct {
1920 struct perf_event_header header;
7a10da70
MK
1921 u64 id;
1922 u64 lost;
7480dabb 1923 struct sample_id sample_id;
f2b1d720
MK
1924};
1925.fi
1926.in
f2b1d720
MK
1927.RS
1928.TP
1929.I id
1930is the unique event ID for the samples that were lost.
1931.TP
1932.I lost
1933is the number of events that were lost.
1934.RE
f2b1d720
MK
1935.TP
1936.B PERF_RECORD_COMM
1937This record indicates a change in the process name.
1938
1939.in +4n
1940.nf
1941struct {
1942 struct perf_event_header header;
7a10da70
MK
1943 u32 pid;
1944 u32 tid;
1945 char comm[];
7480dabb 1946 struct sample_id sample_id;
f2b1d720
MK
1947};
1948.fi
1949.in
49bc411c
VW
1950.RS
1951.TP
1952.I pid
5ab35ae5 1953is the process ID.
49bc411c
VW
1954.TP
1955.I tid
5ab35ae5 1956is the thread ID.
49bc411c
VW
1957.TP
1958.I comm
1959is a string containing the new name of the process.
1960.RE
f2b1d720
MK
1961.TP
1962.B PERF_RECORD_EXIT
1963This record indicates a process exit event.
1964
1965.in +4n
1966.nf
1967struct {
1968 struct perf_event_header header;
7a10da70
MK
1969 u32 pid, ppid;
1970 u32 tid, ptid;
1971 u64 time;
7480dabb 1972 struct sample_id sample_id;
f2b1d720
MK
1973};
1974.fi
1975.in
f2b1d720
MK
1976.TP
1977.BR PERF_RECORD_THROTTLE ", " PERF_RECORD_UNTHROTTLE
1978This record indicates a throttle/unthrottle event.
1979
1980.in +4n
1981.nf
1982struct {
1983 struct perf_event_header header;
7a10da70
MK
1984 u64 time;
1985 u64 id;
1986 u64 stream_id;
7480dabb 1987 struct sample_id sample_id;
f2b1d720
MK
1988};
1989.fi
1990.in
f2b1d720
MK
1991.TP
1992.B PERF_RECORD_FORK
1993This record indicates a fork event.
1994
1995.in +4n
1996.nf
1997struct {
1998 struct perf_event_header header;
7a10da70
MK
1999 u32 pid, ppid;
2000 u32 tid, ptid;
2001 u64 time;
7480dabb 2002 struct sample_id sample_id;
f2b1d720
MK
2003};
2004.fi
2005.in
f2b1d720
MK
2006.TP
2007.B PERF_RECORD_READ
2008This record indicates a read event.
2009
2010.in +4n
2011.nf
2012struct {
2013 struct perf_event_header header;
7a10da70 2014 u32 pid, tid;
f2b1d720 2015 struct read_format values;
7480dabb 2016 struct sample_id sample_id;
f2b1d720
MK
2017};
2018.fi
2019.in
f2b1d720
MK
2020.TP
2021.B PERF_RECORD_SAMPLE
2022This record indicates a sample.
2023
2024.in +4n
2025.nf
2026struct {
2027 struct perf_event_header header;
880403e9
MK
2028 u64 sample_id; /* if PERF_SAMPLE_IDENTIFIER */
2029 u64 ip; /* if PERF_SAMPLE_IP */
2030 u32 pid, tid; /* if PERF_SAMPLE_TID */
2031 u64 time; /* if PERF_SAMPLE_TIME */
2032 u64 addr; /* if PERF_SAMPLE_ADDR */
2033 u64 id; /* if PERF_SAMPLE_ID */
2034 u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
2035 u32 cpu, res; /* if PERF_SAMPLE_CPU */
2036 u64 period; /* if PERF_SAMPLE_PERIOD */
2037 struct read_format v; /* if PERF_SAMPLE_READ */
2038 u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
2039 u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
2040 u32 size; /* if PERF_SAMPLE_RAW */
2041 char data[size]; /* if PERF_SAMPLE_RAW */
2042 u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
7db515ef 2043 struct perf_branch_entry lbr[bnr];
880403e9
MK
2044 /* if PERF_SAMPLE_BRANCH_STACK */
2045 u64 abi; /* if PERF_SAMPLE_REGS_USER */
7a10da70 2046 u64 regs[weight(mask)];
880403e9
MK
2047 /* if PERF_SAMPLE_REGS_USER */
2048 u64 size; /* if PERF_SAMPLE_STACK_USER */
2049 char data[size]; /* if PERF_SAMPLE_STACK_USER */
2050 u64 dyn_size; /* if PERF_SAMPLE_STACK_USER &&
2051 size != 0 */
2052 u64 weight; /* if PERF_SAMPLE_WEIGHT */
2053 u64 data_src; /* if PERF_SAMPLE_DATA_SRC */
2054 u64 transaction; /* if PERF_SAMPLE_TRANSACTION */
2055 u64 abi; /* if PERF_SAMPLE_REGS_INTR */
7a10da70 2056 u64 regs[weight(mask)];
880403e9 2057 /* if PERF_SAMPLE_REGS_INTR */
f2b1d720
MK
2058};
2059.fi
4047bc6c
MK
2060.RS 4
2061.TP 4
7480dabb
VW
2062.I sample_id
2063If
2064.B PERF_SAMPLE_IDENTIFIER
2065is enabled, a 64-bit unique ID is included.
e9bd9b2c 2066This is a duplication of the
7480dabb
VW
2067.B PERF_SAMPLE_ID
2068.I id
2069value, but included at the beginning of the sample
2070so parsers can easily obtain the value.
2071.TP
f2b1d720 2072.I ip
7db515ef
MK
2073If
2074.B PERF_SAMPLE_IP
2075is enabled, then a 64-bit instruction
f2b1d720 2076pointer value is included.
f2b1d720 2077.TP
7db515ef
MK
2078.IR pid ", " tid
2079If
2080.B PERF_SAMPLE_TID
2081is enabled, then a 32-bit process ID
2082and 32-bit thread ID are included.
f2b1d720
MK
2083.TP
2084.I time
7db515ef
MK
2085If
2086.B PERF_SAMPLE_TIME
2087is enabled, then a 64-bit timestamp
f2b1d720
MK
2088is included.
2089This is obtained via local_clock() which is a hardware timestamp
2090if available and the jiffies value if not.
f2b1d720
MK
2091.TP
2092.I addr
7db515ef
MK
2093If
2094.B PERF_SAMPLE_ADDR
2095is enabled, then a 64-bit address is included.
f2b1d720
MK
2096This is usually the address of a tracepoint,
2097breakpoint, or software event; otherwise the value is 0.
f2b1d720
MK
2098.TP
2099.I id
7db515ef
MK
2100If
2101.B PERF_SAMPLE_ID
2102is enabled, a 64-bit unique ID is included.
f2b1d720 2103If the event is a member of an event group, the group leader ID is returned.
7db515ef
MK
2104This ID is the same as the one returned by
2105.BR PERF_FORMAT_ID .
f2b1d720
MK
2106.TP
2107.I stream_id
7db515ef
MK
2108If
2109.B PERF_SAMPLE_STREAM_ID
2110is enabled, a 64-bit unique ID is included.
f2b1d720
MK
2111Unlike
2112.B PERF_SAMPLE_ID
2113the actual ID is returned, not the group leader.
7db515ef
MK
2114This ID is the same as the one returned by
2115.BR PERF_FORMAT_ID .
f2b1d720 2116.TP
7db515ef
MK
2117.IR cpu ", " res
2118If
2119.B PERF_SAMPLE_CPU
2120is enabled, this is a 32-bit value indicating
f2b1d720
MK
2121which CPU was being used, in addition to a reserved (unused)
212232-bit value.
f2b1d720
MK
2123.TP
2124.I period
7db515ef
MK
2125If
2126.B PERF_SAMPLE_PERIOD
2127is enabled, a 64-bit value indicating
f2b1d720 2128the current sampling period is written.
f2b1d720
MK
2129.TP
2130.I v
7db515ef
MK
2131If
2132.B PERF_SAMPLE_READ
2133is enabled, a structure of type read_format
f2b1d720
MK
2134is included which has values for all events in the event group.
2135The values included depend on the
2136.I read_format
7db515ef
MK
2137value used at
2138.BR perf_event_open ()
2139time.
f2b1d720 2140.TP
7db515ef
MK
2141.IR nr ", " ips[nr]
2142If
2143.B PERF_SAMPLE_CALLCHAIN
2144is enabled, then a 64-bit number is included
f2b1d720 2145which indicates how many following 64-bit instruction pointers will
7db515ef
MK
2146follow.
2147This is the current callchain.
f2b1d720 2148.TP
7ede2f66 2149.IR size ", " data[size]
7db515ef
MK
2150If
2151.B PERF_SAMPLE_RAW
2152is enabled, then a 32-bit value indicating size
f2b1d720
MK
2153is included followed by an array of 8-bit values of length size.
2154The values are padded with 0 to have 64-bit alignment.
2155
2156This RAW record data is opaque with respect to the ABI.
2157The ABI doesn't make any promises with respect to the stability
2158of its content, it may vary depending
2159on event, hardware, and kernel version.
f2b1d720 2160.TP
7db515ef
MK
2161.IR bnr ", " lbr[bnr]
2162If
2163.B PERF_SAMPLE_BRANCH_STACK
2164is enabled, then a 64-bit value indicating
2165the number of records is included, followed by
2166.I bnr
2167.I perf_branch_entry
045bf4d3
VW
2168structures which each include the fields:
2169.RS
2170.TP
2171.I from
2b538c3e 2172This indicates the source instruction (may not be a branch).
045bf4d3
VW
2173.TP
2174.I to
2b538c3e 2175The branch target.
045bf4d3
VW
2176.TP
2177.I mispred
2b538c3e 2178The branch target was mispredicted.
045bf4d3
VW
2179.TP
2180.I predicted
2b538c3e 2181The branch target was predicted.
e3c9782b 2182.TP
31c1f2b0 2183.IR in_tx " (since Linux 3.11)"
747a6e7c 2184.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
2b538c3e 2185The branch was in a transactional memory transaction.
e3c9782b 2186.TP
31c1f2b0 2187.IR abort " (since Linux 3.11)"
747a6e7c 2188.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
2b538c3e 2189The branch was in an aborted transactional memory transaction.
96919592
VW
2190.TP
2191.IR cycles " (since Linux 4.3)"
2192.\" commit 71ef3c6b9d4665ee7afbbe4c208a98917dcfc32f
2193This reports the number of cycles elapsed since the
2194previous branch stack update.
e3c9782b 2195.P
045bf4d3
VW
2196The entries are from most to least recent, so the first entry
2197has the most recent branch.
2198
8a94e783 2199Support for
dceb9af6
MK
2200.IR mispred ,
2201.IR predicted ,
2202and
2203.IR cycles
96919592 2204is optional; if not supported, those
045bf4d3
VW
2205values will be 0.
2206
e3c9782b
VW
2207The type of branches recorded is specified by the
2208.I branch_sample_type
2209field.
2210.RE
f2b1d720 2211.TP
7db515ef
MK
2212.IR abi ", " regs[weight(mask)]
2213If
2214.B PERF_SAMPLE_REGS_USER
d1007d14 2215is enabled, then the user CPU registers are recorded.
f2b1d720
MK
2216
2217The
2218.I abi
2219field is one of
2220.BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or "
7db515ef 2221.BR PERF_SAMPLE_REGS_ABI_64 .
d1007d14
VW
2222
2223The
2224.I regs
2225field is an array of the CPU registers that were specified by
2226the
2227.I sample_regs_user
2228attr field.
2229The number of values is the number of bits set in the
51700fd7 2230.I sample_regs_user
4651e412 2231bit mask.
f2b1d720 2232.TP
7db515ef
MK
2233.IR size ", " data[size] ", " dyn_size
2234If
2235.B PERF_SAMPLE_STACK_USER
02ca78a0
VW
2236is enabled, then the user stack is recorded.
2237This can be used to generate stack backtraces.
d1007d14
VW
2238.I size
2239is the size requested by the user in
02ca78a0 2240.I sample_stack_user
d1007d14
VW
2241or else the maximum record size.
2242.I data
02ca78a0
VW
2243is the stack data (a raw dump of the memory pointed to by the
2244stack pointer at the time of sampling).
d1007d14
VW
2245.I dyn_size
2246is the amount of data actually dumped (can be less than
460e3d7a 2247.IR size ).
4dc411dd
KF
2248Note that
2249.I dyn_size
2250is omitted if
2251.I size
2252is 0.
d1007d14 2253.TP
51700fd7 2254.I weight
d1007d14
VW
2255If
2256.B PERF_SAMPLE_WEIGHT
7de4a1e3 2257is enabled, then a 64-bit value provided by the hardware
d1007d14
VW
2258is recorded that indicates how costly the event was.
2259This allows expensive events to stand out more clearly
2260in profiles.
2261.TP
2262.I data_src
51700fd7 2263If
d1007d14 2264.B PERF_SAMPLE_DATA_SRC
7de4a1e3 2265is enabled, then a 64-bit value is recorded that is made up of
d1007d14
VW
2266the following fields:
2267.RS
2b538c3e 2268.TP 4
d1007d14 2269.I mem_op
2b538c3e
MK
2270Type of opcode, a bitwise combination of:
2271
2272.PD 0
2273.RS
2274.TP 24
d1007d14 2275.B PERF_MEM_OP_NA
2b538c3e
MK
2276Not available
2277.TP
d1007d14 2278.B PERF_MEM_OP_LOAD
2b538c3e
MK
2279Load instruction
2280.TP
d1007d14 2281.B PERF_MEM_OP_STORE
2b538c3e
MK
2282Store instruction
2283.TP
d1007d14 2284.B PERF_MEM_OP_PFETCH
2b538c3e
MK
2285Prefetch
2286.TP
d1007d14 2287.B PERF_MEM_OP_EXEC
2b538c3e
MK
2288Executable code
2289.RE
2290.PD
d1007d14
VW
2291.TP
2292.I mem_lvl
bc9d90b5 2293Memory hierarchy level hit or miss, a bitwise combination of
ef4f4031 2294the following, shifted left by
bc9d90b5 2295.BR PERF_MEM_LVL_SHIFT :
2b538c3e
MK
2296
2297.PD 0
2298.RS
2299.TP 24
d1007d14 2300.B PERF_MEM_LVL_NA
2b538c3e
MK
2301Not available
2302.TP
d1007d14 2303.B PERF_MEM_LVL_HIT
2b538c3e
MK
2304Hit
2305.TP
d1007d14 2306.B PERF_MEM_LVL_MISS
2b538c3e
MK
2307Miss
2308.TP
d1007d14 2309.B PERF_MEM_LVL_L1
2b538c3e
MK
2310Level 1 cache
2311.TP
d1007d14 2312.B PERF_MEM_LVL_LFB
2b538c3e
MK
2313Line fill buffer
2314.TP
d1007d14 2315.B PERF_MEM_LVL_L2
2b538c3e
MK
2316Level 2 cache
2317.TP
d1007d14 2318.B PERF_MEM_LVL_L3
2b538c3e
MK
2319Level 3 cache
2320.TP
d1007d14 2321.B PERF_MEM_LVL_LOC_RAM
2b538c3e
MK
2322Local DRAM
2323.TP
d1007d14 2324.B PERF_MEM_LVL_REM_RAM1
2b538c3e
MK
2325Remote DRAM 1 hop
2326.TP
d1007d14 2327.B PERF_MEM_LVL_REM_RAM2
2b538c3e
MK
2328Remote DRAM 2 hops
2329.TP
d1007d14 2330.B PERF_MEM_LVL_REM_CCE1
2b538c3e
MK
2331Remote cache 1 hop
2332.TP
d1007d14 2333.B PERF_MEM_LVL_REM_CCE2
2b538c3e
MK
2334Remote cache 2 hops
2335.TP
d1007d14 2336.B PERF_MEM_LVL_IO
2b538c3e
MK
2337I/O memory
2338.TP
d1007d14 2339.B PERF_MEM_LVL_UNC
2b538c3e
MK
2340Uncached memory
2341.RE
2342.PD
d1007d14
VW
2343.TP
2344.I mem_snoop
bc9d90b5
VW
2345Snoop mode, a bitwise combination of the following, shifted left by
2346.BR PERF_MEM_SNOOP_SHIFT :
2b538c3e
MK
2347
2348.PD 0
2349.RS
2350.TP 24
d1007d14 2351.B PERF_MEM_SNOOP_NA
2b538c3e
MK
2352Not available
2353.TP
d1007d14 2354.B PERF_MEM_SNOOP_NONE
2b538c3e
MK
2355No snoop
2356.TP
d1007d14 2357.B PERF_MEM_SNOOP_HIT
2b538c3e
MK
2358Snoop hit
2359.TP
d1007d14 2360.B PERF_MEM_SNOOP_MISS
2b538c3e
MK
2361Snoop miss
2362.TP
d1007d14 2363.B PERF_MEM_SNOOP_HITM
2b538c3e
MK
2364Snoop hit modified
2365.RE
2366.PD
d1007d14
VW
2367.TP
2368.I mem_lock
bc9d90b5
VW
2369Lock instruction, a bitwise combination of the following, shifted left by
2370.BR PERF_MEM_LOCK_SHIFT :
2b538c3e
MK
2371
2372.PD 0
2373.RS
2374.TP 24
d1007d14 2375.B PERF_MEM_LOCK_NA
2b538c3e
MK
2376Not available
2377.TP
d1007d14 2378.B PERF_MEM_LOCK_LOCKED
2b538c3e
MK
2379Locked transaction
2380.RE
2381.PD
d1007d14
VW
2382.TP
2383.I mem_dtlb
bc9d90b5
VW
2384TLB access hit or miss, a bitwise combination of the following, shifted
2385left by
2386.BR PERF_MEM_TLB_SHIFT :
2b538c3e
MK
2387
2388.PD 0
2389.RS
2390.TP 24
d1007d14 2391.B PERF_MEM_TLB_NA
2b538c3e
MK
2392Not available
2393.TP
d1007d14 2394.B PERF_MEM_TLB_HIT
2b538c3e
MK
2395Hit
2396.TP
d1007d14 2397.B PERF_MEM_TLB_MISS
2b538c3e
MK
2398Miss
2399.TP
d1007d14 2400.B PERF_MEM_TLB_L1
2b538c3e
MK
2401Level 1 TLB
2402.TP
d1007d14 2403.B PERF_MEM_TLB_L2
2b538c3e
MK
2404Level 2 TLB
2405.TP
d1007d14 2406.B PERF_MEM_TLB_WK
2b538c3e
MK
2407Hardware walker
2408.TP
d1007d14 2409.B PERF_MEM_TLB_OS
2b538c3e
MK
2410OS fault handler
2411.RE
2412.PD
d1007d14 2413.RE
1e043959
VW
2414.TP
2415.I transaction
2416If the
2417.B PERF_SAMPLE_TRANSACTION
37bee118 2418flag is set, then a 64-bit field is recorded describing
1e043959
VW
2419the sources of any transactional memory aborts.
2420
2421The field is a bitwise combination of the following values:
2422.RS
2423.TP
2424.B PERF_TXN_ELISION
b3f39642 2425Abort from an elision type transaction (Intel-CPU-specific).
1e043959
VW
2426.TP
2427.B PERF_TXN_TRANSACTION
b3f39642 2428Abort from a generic transaction.
1e043959
VW
2429.TP
2430.B PERF_TXN_SYNC
b3f39642 2431Synchronous abort (related to the reported instruction).
1e043959
VW
2432.TP
2433.B PERF_TXN_ASYNC
b3f39642 2434Asynchronous abort (not related to the reported instruction).
1e043959
VW
2435.TP
2436.B PERF_TXN_RETRY
053a3e08 2437Retryable abort (retrying the transaction may have succeeded).
1e043959
VW
2438.TP
2439.B PERF_TXN_CONFLICT
b3f39642 2440Abort due to memory conflicts with other threads.
1e043959
VW
2441.TP
2442.B PERF_TXN_CAPACITY_WRITE
b3f39642 2443Abort due to write capacity overflow.
1e043959
VW
2444.TP
2445.B PERF_TXN_CAPACITY_READ
b3f39642 2446Abort due to read capacity overflow.
1e043959 2447.RE
b3f39642
MK
2448.IP
2449In addition, a user-specified abort code can be obtained from
2450the high 32 bits of the field by shifting right by
1e043959 2451.B PERF_TXN_ABORT_SHIFT
4b3a5f01 2452and masking with the value
1e043959 2453.BR PERF_TXN_ABORT_MASK .
f5281dfd
VW
2454.TP
2455.IR abi ", " regs[weight(mask)]
2456If
2457.B PERF_SAMPLE_REGS_INTR
2458is enabled, then the user CPU registers are recorded.
2459
2460The
2461.I abi
2462field is one of
4b3a5f01
MK
2463.BR PERF_SAMPLE_REGS_ABI_NONE ,
2464.BR PERF_SAMPLE_REGS_ABI_32 ,
2465or
f5281dfd
VW
2466.BR PERF_SAMPLE_REGS_ABI_64 .
2467
2468The
2469.I regs
2470field is an array of the CPU registers that were specified by
2471the
2472.I sample_regs_intr
2473attr field.
2474The number of values is the number of bits set in the
2475.I sample_regs_intr
2476bit mask.
f2b1d720 2477.RE
9bfc542b
VW
2478.TP
2479.B PERF_RECORD_MMAP2
2480This record includes extended information on
2481.BR mmap (2)
2482calls returning executable mappings.
2483The format is similar to that of the
2484.B PERF_RECORD_MMAP
3a058284 2485record, but includes extra values that allow uniquely identifying
9bfc542b 2486shared mappings.
3a058284 2487
9bfc542b
VW
2488.in +4n
2489.nf
2490struct {
2491 struct perf_event_header header;
7a10da70
MK
2492 u32 pid;
2493 u32 tid;
2494 u64 addr;
2495 u64 len;
2496 u64 pgoff;
2497 u32 maj;
2498 u32 min;
2499 u64 ino;
2500 u64 ino_generation;
2501 u32 prot;
2502 u32 flags;
2503 char filename[];
9bfc542b
VW
2504 struct sample_id sample_id;
2505};
2506.fi
2507.RS
2508.TP
2509.I pid
3a058284 2510is the process ID.
9bfc542b
VW
2511.TP
2512.I tid
3a058284 2513is the thread ID.
9bfc542b
VW
2514.TP
2515.I addr
2516is the address of the allocated memory.
2517.TP
2518.I len
2519is the length of the allocated memory.
2520.TP
2521.I pgoff
2522is the page offset of the allocated memory.
2523.TP
2524.I maj
3a058284 2525is the major ID of the underlying device.
9bfc542b
VW
2526.TP
2527.I min
3a058284 2528is the minor ID of the underlying device.
9bfc542b
VW
2529.TP
2530.I ino
3a058284 2531is the inode number.
9bfc542b
VW
2532.TP
2533.I ino_generation
2534is the inode generation.
2535.TP
2536.I prot
2537is the protection information.
2538.TP
2539.I flags
2540is the flags information.
2541.TP
2542.I filename
2543is a string describing the backing of the allocated memory.
2544.RE
1fda209c
VW
2545.TP
2546.BR PERF_RECORD_AUX " (since Linux 4.1)"
2547\" commit 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0
2548This record reports that new data is available in the separate
2549AUX buffer region.
2550
2551.in +4n
2552.nf
2553struct {
2554 struct perf_event_header header;
7a10da70
MK
2555 u64 aux_offset;
2556 u64 aux_size;
2557 u64 flags;
1fda209c
VW
2558 struct sample_id sample_id;
2559};
2560.fi
2561.RS
2562.TP
2563.I aux_offset
2564offset in the AUX mmap region where the new data begins.
2565.TP
2566.I aux_size
2567size of the data made available.
2568.TP
2569.I flags
95655a22 2570describes the AUX update.
1fda209c
VW
2571.RS
2572.TP
2573.B PERF_AUX_FLAG_TRUNCATED
95655a22 2574if set, then the data returned was truncated to fit the available
1fda209c 2575buffer size.
b1355f6a
VW
2576.TP
2577.B PERF_AUX_FLAG_OVERWRITE
2578.\" commit 2023a0d2829e521fe6ad6b9907f3f90bfbf57142
95655a22 2579if set, then the data returned has overwritten previous data.
1fda209c
VW
2580.RE
2581.RE
6932aac3
VW
2582.TP
2583.BR PERF_RECORD_ITRACE_START " (since Linux 4.1)"
2584\" ec0d7729bbaed4b9d2d3fada693278e13a3d1368
2585This record indicates which process has initiated an instruction
2586trace event, allowing tools to properly correlate the instruction
2587addresses in the AUX buffer with the proper executable.
2588
2589.in +4n
2590.nf
2591struct {
2592 struct perf_event_header header;
7a10da70
MK
2593 u32 pid;
2594 u32 tid;
6932aac3
VW
2595};
2596.fi
2597.RS
2598.TP
2599.I pid
95655a22 2600process ID of the thread starting an instruction trace.
6932aac3
VW
2601.TP
2602.I tid
95655a22 2603thread ID of the thread starting an instruction trace.
6932aac3 2604.RE
46012ba3
DH
2605.TP
2606.BR PERF_RECORD_LOST_SAMPLES " (since Linux 4.2)"
2607\" f38b0dbb491a6987e198aa6b428db8692a6480f8
2608When using hardware sampling (such as Intel PEBS) this record
4199d3a1 2609indicates some number of samples that may have been lost.
46012ba3
DH
2610
2611.in +4n
2612.nf
2613struct {
2614 struct perf_event_header header;
7a10da70 2615 u64 lost;
46012ba3
DH
2616 struct sample_id sample_id;
2617};
2618.fi
2619.RS
2620.TP
2621.I lost
2622the number of potentially lost samples.
2623.RE
9277a75d
VW
2624.TP
2625.BR PERF_RECORD_SWITCH " (since Linux 4.3)"
2626\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
2627This record indicates a context switch has happened.
2628The
2629.B PERF_RECORD_MISC_SWITCH_OUT
2630bit in the
2631.I misc
2632field indicates whether it was a context switch into
2633or away from the current process.
2634
2635.in +4n
2636.nf
2637struct {
2638 struct perf_event_header header;
2639 struct sample_id sample_id;
2640};
2641.fi
2642.TP
2643.BR PERF_RECORD_SWITCH_CPU_WIDE " (since Linux 4.3)"
2644\" commit 45ac1403f564f411c6a383a2448688ba8dd705a4
2645As with
2646.B PERF_RECORD_SWITCH
2647this record indicates a context switch has happened,
d5a24378 2648but it only occurs when sampling in CPU-wide mode
9277a75d
VW
2649and provides additional information on the process
2650being switched to/from.
2651The
2652.B PERF_RECORD_MISC_SWITCH_OUT
2653bit in the
2654.I misc
2655field indicates whether it was a context switch into
2656or away from the current process.
2657
2658.in +4n
2659.nf
2660struct {
2661 struct perf_event_header header;
2662 u32 next_prev_pid;
2663 u32 next_prev_tid;
2664 struct sample_id sample_id;
2665};
2666.fi
2667.RS
2668.TP
2669.I next_prev_pid
d5a24378 2670The process ID of the previous (if switching in)
9277a75d
VW
2671or next (if switching out) process on the CPU.
2672.TP
2673.I next_prev_tid
d5a24378 2674The thread ID of the previous (if switching in)
9277a75d
VW
2675or next (if switching out) thread on the CPU.
2676.RE
f2b1d720 2677.RE
21977c9d
VW
2678.SS Overflow handling
2679Events can be set to notify when a threshold is crossed,
2680indicating an overflow.
2681Overflow conditions can be captured by monitoring the
2682event file descriptor with
f2b1d720
MK
2683.BR poll (2),
2684.BR select (2),
21977c9d 2685or
4b3a5f01 2686.BR epoll (7).
6831ba6b
MK
2687Alternatively, the overflow events can be captured via sa signal handler,
2688by enabling I/O signaling on the file descriptor; see the discussion of the
fc79d996 2689.BR F_SETOWN
6831ba6b
MK
2690and
2691.BR F_SETSIG
2692operations in
2693.BR fcntl (2).
f2b1d720 2694
6170255e 2695Overflows are generated only by sampling events
f2b1d720 2696.RI ( sample_period
7d182bb6 2697must have a nonzero value).
f2b1d720 2698
21977c9d 2699There are two ways to generate overflow notifications.
f2b1d720
MK
2700
2701The first is to set a
2702.I wakeup_events
2703or
2704.I wakeup_watermark
21977c9d 2705value that will trigger if a certain number of samples
f2b1d720 2706or bytes have been written to the mmap ring buffer.
fc79d996 2707In this case,
7db515ef 2708.B POLL_IN
21977c9d 2709is indicated.
f2b1d720
MK
2710
2711The other way is by use of the
7db515ef 2712.B PERF_EVENT_IOC_REFRESH
f2b1d720
MK
2713ioctl.
2714This ioctl adds to a counter that decrements each time the event overflows.
21977c9d 2715When nonzero,
7db515ef 2716.B POLL_IN
21977c9d
VW
2717is indicated, but
2718once the counter reaches 0
7db515ef 2719.B POLL_HUP
21977c9d 2720is indicated and
f2b1d720
MK
2721the underlying event is disabled.
2722
50e4319c
VW
2723Refreshing an event group leader refreshes all siblings and
2724refreshing with a parameter of 0 currently enables infinite
2725refreshes;
2726these behaviors are unsupported and should not be relied on.
2727.\" See https://lkml.org/lkml/2011/5/24/337
2728
4010bc07 2729Starting with Linux 3.18,
747a6e7c 2730.\" commit 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883
21977c9d
VW
2731.B POLL_HUP
2732is indicated if the event being monitored is attached to a different
2733process and that process exits.
73d8cece 2734.SS rdpmc instruction
f2b1d720 2735Starting with Linux 3.4 on x86, you can use the
747a6e7c 2736.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
f2b1d720
MK
2737.I rdpmc
2738instruction to get low-latency reads without having to enter the kernel.
2739Note that using
2740.I rdpmc
2741is not necessarily faster than other methods for reading event values.
2742
2743Support for this can be detected with the
2744.I cap_usr_rdpmc
2745field in the mmap page; documentation on how
2746to calculate event values can be found in that section.
562c69f6
VW
2747
2748Originally, when rdpmc support was enabled, any process (not just ones
2749with an active perf event) could use the rdpmc instruction to access
2750the counters.
fc79d996 2751Starting with Linux 4.0,
562c69f6
VW
2752.\" 7911d3f7af14a614617e38245fedf98a724e46a9
2753rdpmc support is only allowed if an event is currently enabled
95655a22 2754in a process's context.
562c69f6
VW
2755To restore the old behavior, write the value 2 to
2756.IR /sys/devices/cpu/rdpmc .
73d8cece 2757.SS perf_event ioctl calls
f2b1d720
MK
2758.PP
2759Various ioctls act on
7db515ef 2760.BR perf_event_open ()
ce88f77b 2761file descriptors:
f2b1d720
MK
2762.TP
2763.B PERF_EVENT_IOC_ENABLE
ce88f77b 2764This enables the individual event or event group specified by the
7db515ef 2765file descriptor argument.
f2b1d720 2766
51700fd7 2767If the
8cc8b90d 2768.B PERF_IOC_FLAG_GROUP
51700fd7 2769bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2770enabled, even if the event specified is not the group leader
2771(but see BUGS).
f2b1d720
MK
2772.TP
2773.B PERF_EVENT_IOC_DISABLE
ce88f77b 2774This disables the individual counter or event group specified by the
7db515ef 2775file descriptor argument.
f2b1d720
MK
2776
2777Enabling or disabling the leader of a group enables or disables the
2778entire group; that is, while the group leader is disabled, none of the
2779counters in the group will count.
33a0ccb2
MK
2780Enabling or disabling a member of a group other than the leader
2781affects only that counter; disabling a non-leader
f2b1d720
MK
2782stops that counter from counting but doesn't affect any other counter.
2783
51700fd7 2784If the
8cc8b90d 2785.B PERF_IOC_FLAG_GROUP
51700fd7 2786bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2787disabled, even if the event specified is not the group leader
2788(but see BUGS).
f2b1d720
MK
2789.TP
2790.B PERF_EVENT_IOC_REFRESH
2791Non-inherited overflow counters can use this
2792to enable a counter for a number of overflows specified by the argument,
2793after which it is disabled.
2794Subsequent calls of this ioctl add the argument value to the current
2795count.
21977c9d 2796An overflow notification with
7db515ef
MK
2797.B POLL_IN
2798set will happen on each overflow until the
21977c9d
VW
2799count reaches 0; when that happens a notification with
2800.B POLL_HUP
7db515ef 2801set is sent and the event is disabled.
f2b1d720 2802Using an argument of 0 is considered undefined behavior.
f2b1d720
MK
2803.TP
2804.B PERF_EVENT_IOC_RESET
36127c0e 2805Reset the event count specified by the
6061d29f 2806file descriptor argument to zero.
33a0ccb2 2807This resets only the counts; there is no way to reset the
f2b1d720
MK
2808multiplexing
2809.I time_enabled
2810or
2811.I time_running
2812values.
f2b1d720 2813
51700fd7 2814If the
8cc8b90d 2815.B PERF_IOC_FLAG_GROUP
51700fd7 2816bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2817reset, even if the event specified is not the group leader
2818(but see BUGS).
f2b1d720
MK
2819.TP
2820.B PERF_EVENT_IOC_PERIOD
e6cf5694 2821This updates the overflow period for the event.
3f118a29 2822
747a6e7c
VW
2823Since Linux 3.7 (on ARM)
2824.\" commit 3581fe0ef37ce12ac7a4f74831168352ae848edc
2825and Linux 3.14 (all other architectures),
2826.\" commit bad7192b842c83e580747ca57104dd51fe08c223
3f118a29 2827the new period takes effect immediately.
ed81fdd9 2828On older kernels, the new period did not take effect until
3f118a29 2829after the next overflow.
f2b1d720
MK
2830
2831The argument is a pointer to a 64-bit value containing the
2832desired new period.
e6cf5694 2833
fc79d996 2834Prior to Linux 2.6.36,
747a6e7c
VW
2835.\" commit ad0cf3478de8677f720ee06393b3147819568d6a
2836this ioctl always failed due to a bug
e6cf5694 2837in the kernel.
f2b1d720
MK
2838.TP
2839.B PERF_EVENT_IOC_SET_OUTPUT
2840This tells the kernel to report event notifications to the specified
2841file descriptor rather than the default one.
2842The file descriptors must all be on the same CPU.
2843
2844The argument specifies the desired file descriptor, or \-1 if
2845output should be ignored.
f2b1d720 2846.TP
31c1f2b0 2847.BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
60dafbc1 2848.\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
f2b1d720
MK
2849This adds an ftrace filter to this event.
2850
2851The argument is a pointer to the desired ftrace filter.
a0dcc8dd 2852.TP
31c1f2b0 2853.BR PERF_EVENT_IOC_ID " (since Linux 3.12)"
60dafbc1 2854.\" commit cf4957f17f2a89984915ea808876d9c82225b862
bec6277e 2855This returns the event ID value for the given event file descriptor.
a0dcc8dd
VW
2856
2857The argument is a pointer to a 64-bit unsigned integer
2858to hold the result.
b0f7b411
VW
2859.TP
2860.BR PERF_EVENT_IOC_SET_BPF " (since Linux 4.1)"
2861.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
2862This allows attaching a Berkeley Packet Filter (BPF)
2863program to an existing kprobe tracepoint event.
2864You need
2865.B CAP_SYS_ADMIN
2866privileges to use this ioctl.
2867
2868The argument is a BPF program file descriptor that was created by
2869a previous
2870.BR bpf (2)
2871system call.
fc79d996 2872.SS Using prctl(2)
f2b1d720
MK
2873A process can enable or disable all the event groups that are
2874attached to it using the
2875.BR prctl (2)
2876.B PR_TASK_PERF_EVENTS_ENABLE
2877and
2878.B PR_TASK_PERF_EVENTS_DISABLE
2879operations.
ee7b0cbf 2880This applies to all counters on the calling process, whether created by
f2b1d720
MK
2881this process or by another, and does not affect any counters that this
2882process has created on other processes.
33a0ccb2 2883It enables or disables only
f2b1d720 2884the group leaders, not any other members in the groups.
f2b1d720 2885.SS perf_event related configuration files
dc9ec146 2886
7db515ef
MK
2887Files in
2888.I /proc/sys/kernel/
7db515ef 2889.RS 4
f2b1d720 2890.TP
7db515ef 2891.I /proc/sys/kernel/perf_event_paranoid
f2b1d720
MK
2892The
2893.I perf_event_paranoid
2894file can be set to restrict access to the performance counters.
dc9ec146
MK
2895
2896.PD 0
2b538c3e
MK
2897.RS
2898.IP 2 4
3eb95192 2899allow only user-space measurements (default since Linux 4.6).
b5eb75f7 2900.\" default changed in commit 0161028b7c8aebef64194d3d73e43bc3b53b5c66
2b538c3e 2901.IP 1
3eb95192 2902allow both kernel and user measurements (default before Linux 4.6).
2b538c3e
MK
2903.IP 0
2904allow access to CPU-specific data but not raw tracepoint samples.
2905.IP \-1
2906no restrictions.
2907.RE
dc9ec146 2908.PD
2b538c3e 2909.IP
f2b1d720
MK
2910The existence of the
2911.I perf_event_paranoid
2912file is the official method for determining if a kernel supports
7db515ef 2913.BR perf_event_open ().
f2b1d720
MK
2914.TP
2915.I /proc/sys/kernel/perf_event_max_sample_rate
7db515ef
MK
2916This sets the maximum sample rate.
2917Setting this too high can allow
f2b1d720 2918users to sample at a rate that impacts overall machine performance
7db515ef
MK
2919and potentially lock up the machine.
2920The default value is
f2b1d720 2921100000 (samples per second).
fd133d5d
VW
2922.TP
2923.I /proc/sys/kernel/perf_event_max_stack
2924.\" Introduced in c5dfd78eb79851e278b7973031b9ca363da87a7e
5dd3feec 2925This file sets the maximum depth of stack frame entries reported
fd133d5d 2926when generating a call trace.
f2b1d720
MK
2927.TP
2928.I /proc/sys/kernel/perf_event_mlock_kb
ce88f77b
MK
2929Maximum number of pages an unprivileged user can
2930.BR mlock (2).
f2b1d720
MK
2931The default is 516 (kB).
2932.RE
dc9ec146 2933
7db515ef
MK
2934Files in
2935.I /sys/bus/event_source/devices/
dc9ec146 2936
7db515ef 2937.RS 4
ce88f77b 2938Since Linux 2.6.34, the kernel supports having multiple PMUs
f2b1d720
MK
2939available for monitoring.
2940Information on how to program these PMUs can be found under
2941.IR /sys/bus/event_source/devices/ .
2942Each subdirectory corresponds to a different PMU.
f2b1d720 2943.TP
31c1f2b0 2944.IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)"
747a6e7c 2945.\" commit abe43400579d5de0078c2d3a760e6598e183f871
f2b1d720
MK
2946This contains an integer that can be used in the
2947.I type
ce88f77b
MK
2948field of
2949.I perf_event_attr
2950to indicate that you wish to use this PMU.
f2b1d720 2951.TP
562c69f6 2952.IR /sys/bus/event_source/devices/cpu/rdpmc " (since Linux 3.4)"
747a6e7c 2953.\" commit 0c9d42ed4cee2aa1dfc3a260b741baae8615744f
8a94e783 2954If this file is 1, then direct user-space access to the
e30dc77f
VW
2955performance counter registers is allowed via the rdpmc instruction.
2956This can be disabled by echoing 0 to the file.
562c69f6
VW
2957
2958As of Linux 4.0
2959.\" a66734297f78707ce39d756b656bfae861d53f62
2960.\" 7911d3f7af14a614617e38245fedf98a724e46a9
2961the behavior has changed, so that 1 now means only allow access
2962to processes with active perf events, with 2 indicating the old
2963allow-anyone-access behavior.
f2b1d720 2964.TP
31c1f2b0 2965.IR /sys/bus/event_source/devices/*/format/ " (since Linux 3.4)"
747a6e7c 2966.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
7d182bb6
MK
2967This subdirectory contains information on the architecture-specific
2968subfields available for programming the various
f2b1d720 2969.I config
ce88f77b
MK
2970fields in the
2971.I perf_event_attr
2972struct.
e30dc77f
VW
2973
2974The content of each file is the name of the config field, followed
2975by a colon, followed by a series of integer bit ranges separated by
2976commas.
8a94e783 2977For example, the file
e30dc77f
VW
2978.I event
2979may contain the value
2980.I config1:1,6-10,44
2981which indicates that event is an attribute that occupies bits 1,6-10, and 44
ce88f77b
MK
2982of
2983.IR perf_event_attr::config1 .
e30dc77f 2984.TP
31c1f2b0 2985.IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)"
747a6e7c 2986.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
7d182bb6 2987This subdirectory contains files with predefined events.
f2b1d720 2988The contents are strings describing the event settings
e30dc77f 2989expressed in terms of the fields found in the previously mentioned
f2b1d720
MK
2990.I ./format/
2991directory.
2992These are not necessarily complete lists of all events supported by
2993a PMU, but usually a subset of events deemed useful or interesting.
e30dc77f
VW
2994
2995The content of each file is a list of attribute names
8a94e783
MK
2996separated by commas.
2997Each entry has an optional value (either hex or decimal).
37bee118 2998If no value is specified, then it is assumed to be a single-bit
e30dc77f
VW
2999field with a value of 1.
3000An example entry may look like this:
699893d8 3001.IR event=0x2,inv,ldlat=3 .
f2b1d720
MK
3002.TP
3003.I /sys/bus/event_source/devices/*/uevent
e30dc77f
VW
3004This file is the standard kernel device interface
3005for injecting hotplug events.
3006.TP
31c1f2b0 3007.IR /sys/bus/event_source/devices/*/cpumask " (since Linux 3.7)"
747a6e7c 3008.\" commit 314d9f63f385096580e9e2a06eaa0745d92fe4ac
699893d8
DP
3009The
3010.I cpumask
3011file contains a comma-separated list of integers that
3012indicate a representative CPU number for each socket (package)
e30dc77f
VW
3013on the motherboard.
3014This is needed when setting up uncore or northbridge events, as
3015those PMUs present socket-wide events.
f2b1d720 3016.RE
47297adb 3017.SH RETURN VALUE
f2b1d720
MK
3018.BR perf_event_open ()
3019returns the new file descriptor, or \-1 if an error occurred
3020(in which case,
3021.I errno
3022is set appropriately).
3023.SH ERRORS
d8b7d950
VW
3024The errors returned by
3025.BR perf_event_open ()
3026can be inconsistent, and may
3027vary across processor architectures and performance monitoring units.
f2b1d720 3028.TP
82b09254 3029.B E2BIG
ce88f77b
MK
3030Returned if the
3031.I perf_event_attr
82b09254
VW
3032.I size
3033value is too small
3034(smaller than
3035.BR PERF_ATTR_SIZE_VER0 ),
3036too big (larger than the page size),
3037or larger than the kernel supports and the extra bytes are not zero.
3038When
3039.B E2BIG
ce88f77b
MK
3040is returned, the
3041.I perf_event_attr
e9bd9b2c 3042.I size
d6af98f8 3043field is overwritten by the kernel to be the size of the structure
82b09254
VW
3044it was expecting.
3045.TP
d8b7d950 3046.B EACCES
27f0af8e
VW
3047Returned when the requested event requires
3048.B CAP_SYS_ADMIN
3049permissions (or a more permissive perf_event paranoid setting).
3050Some common cases where an unprivileged process
3051may encounter this error:
3052attaching to a process owned by a different user;
2b23ecbd
MK
3053monitoring all processes on a given CPU (i.e., specifying the
3054.I pid
3055argument as \-1);
079928f3 3056and not setting
accec051 3057.I exclude_kernel
079928f3 3058when the paranoid setting requires it.
d8b7d950
VW
3059.TP
3060.B EBADF
3061Returned if the
3062.I group_fd
accec051
MK
3063file descriptor is not valid, or, if
3064.B PERF_FLAG_PID_CGROUP
3065is set,
d8b7d950
VW
3066the cgroup file descriptor in
3067.I pid
3068is not valid.
3069.TP
f27486cb
VW
3070.BR EBUSY " (since Linux 4.1)"
3071.\" bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0
3072Returned if another event already has exclusive
3073access to the PMU.
3074.TP
d8b7d950
VW
3075.B EFAULT
3076Returned if the
3077.I attr
3078pointer points at an invalid memory address.
3079.TP
f2b1d720 3080.B EINVAL
d8b7d950
VW
3081Returned if the specified event is invalid.
3082There are many possible reasons for this.
3083A not-exhaustive list:
3084.I sample_freq
accec051 3085is higher than the maximum setting;
d8b7d950
VW
3086the
3087.I cpu
accec051 3088to monitor does not exist;
d8b7d950 3089.I read_format
accec051 3090is out of range;
d8b7d950 3091.I sample_type
accec051 3092is out of range;
d8b7d950
VW
3093the
3094.I flags
accec051 3095value is out of range;
d8b7d950
VW
3096.I exclusive
3097or
3098.I pinned
accec051 3099set and the event is not a group leader;
d8b7d950
VW
3100the event
3101.I config
accec051
MK
3102values are out of range or set reserved bits;
3103the generic event selected is not supported; or
d8b7d950
VW
3104there is not enough room to add the selected event.
3105.TP
3106.B EMFILE
3107Each opened event uses one file descriptor.
26c32fab
MK
3108If a large number of events are opened,
3109the per-process limit on the number of open file descriptors will be reached,
3110and no more events can be created.
d8b7d950
VW
3111.TP
3112.B ENODEV
3113Returned when the event involves a feature not supported
accec051 3114by the current CPU.
d8b7d950
VW
3115.TP
3116.B ENOENT
3117Returned if the
3118.I type
3119setting is not valid.
accec051 3120This error is also returned for
d8b7d950 3121some unsupported generic events.
f2b1d720
MK
3122.TP
3123.B ENOSPC
3124Prior to Linux 3.3, if there was not enough room for the event,
747a6e7c 3125.\" commit aa2bc1ade59003a379ffc485d6da2d92ea3370a6
f2b1d720
MK
3126.B ENOSPC
3127was returned.
accec051 3128In Linux 3.3, this was changed to
f2b1d720
MK
3129.BR EINVAL .
3130.B ENOSPC
d8b7d950 3131is still returned if you try to add more breakpoint events
accec051 3132than supported by the hardware.
d8b7d950
VW
3133.TP
3134.B ENOSYS
3135Returned if
3136.B PERF_SAMPLE_STACK_USER
3137is set in
3138.I sample_type
3139and it is not supported by hardware.
3140.TP
3141.B EOPNOTSUPP
3142Returned if an event requiring a specific hardware feature is
3143requested but there is no hardware support.
3144This includes requesting low-skid events if not supported,
3145branch tracing if it is not available, sampling if no PMU
3146interrupt is available, and branch stacks for software events.
3147.TP
fd133d5d
VW
3148.BR EOVERFLOW " (since Linux 4.8)"
3149.\" 97c79a38cd454602645f0470ffb444b3b75ce574
3150Returned if
3151.B PERF_SAMPLE_CALLCHAIN
3152is requested and
3153.I sample_max_stack
3154is larger than the maximum specified in
3155.IR /proc/sys/kernel/perf_event_max_stack .
3156.TP
d8b7d950 3157.B EPERM
27f0af8e
VW
3158Returned on many (but not all) architectures when an unsupported
3159.IR exclude_hv ", " exclude_idle ", " exclude_user ", or " exclude_kernel
3160setting is specified.
3161
3162It can also happen, as with
3163.BR EACCES ,
3164when the requested event requires
3165.B CAP_SYS_ADMIN
3166permissions (or a more permissive perf_event paranoid setting).
3167This includes setting a breakpoint on a kernel address,
3168and (since Linux 3.13) setting a kernel function-trace tracepoint.
747a6e7c 3169.\" commit a4e95fc2cbb31d70a65beffeaf8773f881328c34
d8b7d950
VW
3170.TP
3171.B ESRCH
3172Returned if attempting to attach to a process that does not exist.
f2b1d720 3173.SH VERSION
f2b1d720
MK
3174.BR perf_event_open ()
3175was introduced in Linux 2.6.31 but was called
747a6e7c 3176.\" commit 0793a61d4df8daeac6492dbf8d2f3e5713caae5e
ffd4dec0 3177.BR perf_counter_open ().
f2b1d720 3178It was renamed in Linux 2.6.32.
747a6e7c 3179.\" commit cdd6c482c9ff9c55475ee7392ec8f672eddb7be6
f2b1d720 3180.SH CONFORMING TO
7db515ef
MK
3181This
3182.BR perf_event_open ()
dc9ec146 3183system call Linux-specific
f2b1d720 3184and should not be used in programs intended to be portable.
f2b1d720
MK
3185.SH NOTES
3186Glibc does not provide a wrapper for this system call; call it using
3187.BR syscall (2).
7db515ef 3188See the example below.
f2b1d720
MK
3189
3190The official way of knowing if
7db515ef 3191.BR perf_event_open ()
f2b1d720
MK
3192support is enabled is checking
3193for the existence of the file
7db515ef 3194.IR /proc/sys/kernel/perf_event_paranoid .
f2b1d720 3195.SH BUGS
f2b1d720
MK
3196The
3197.B F_SETOWN_EX
3198option to
7db515ef 3199.BR fcntl (2)
f2b1d720
MK
3200is needed to properly get overflow signals in threads.
3201This was introduced in Linux 2.6.32.
747a6e7c 3202.\" commit ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5
f2b1d720 3203
747a6e7c
VW
3204Prior to Linux 2.6.33 (at least for x86),
3205.\" commit b690081d4d3f6a23541493f1682835c3cd5c54a1
3206the kernel did not check
f2b1d720
MK
3207if events could be scheduled together until read time.
3208The same happens on all known kernels if the NMI watchdog is enabled.
3209This means to see if a given set of events works you have to
3210.BR perf_event_open (),
3211start, then read before you know for sure you
3212can get valid measurements.
3213
b5190152
MK
3214Prior to Linux 2.6.34,
3215.\" FIXME . cannot find a kernel commit for this one
3216event constraints were not enforced by the kernel.
f2b1d720
MK
3217In that case, some events would silently return "0" if the kernel
3218scheduled them in an improper counter slot.
3219
ce88f77b 3220Prior to Linux 2.6.34, there was a bug when multiplexing where the
f2b1d720 3221wrong results could be returned.
747a6e7c 3222.\" commit 45e16a6834b6af098702e5ea6c9a40de42ff77d8
f2b1d720
MK
3223
3224Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if
3225"inherit" is enabled and many threads are started.
747a6e7c 3226.\" commit 38b435b16c36b0d863efcf3f07b34a6fac9873fd
f2b1d720
MK
3227
3228Prior to Linux 2.6.35,
747a6e7c 3229.\" commit 050735b08ca8a016bbace4445fa025b88fee770b
f2b1d720
MK
3230.B PERF_FORMAT_GROUP
3231did not work with attached processes.
3232
f2b1d720
MK
3233There is a bug in the kernel code between
3234Linux 2.6.36 and Linux 3.0 that ignores the
3235"watermark" field and acts as if a wakeup_event
3236was chosen if the union has a
7d182bb6 3237nonzero value in it.
747a6e7c 3238.\" commit 4ec8363dfc1451f8c8f86825731fe712798ada02
f2b1d720 3239
8a94e783 3240From Linux 2.6.31 to Linux 3.4, the
dbc01ecd
VW
3241.B PERF_IOC_FLAG_GROUP
3242ioctl argument was broken and would repeatedly operate
3243on the event specified rather than iterating across
3244all sibling events in a group.
747a6e7c 3245.\" commit 724b6daa13e100067c30cfc4d1ad06629609dc4e
dbc01ecd 3246
7205b8df 3247From Linux 3.4 to Linux 3.11, the mmap
747a6e7c 3248.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b
VW
3249.I cap_usr_rdpmc
3250and
3251.I cap_usr_time
3252bits mapped to the same location.
3253Code should migrate to the new
3254.I cap_user_rdpmc
3255and
3256.I cap_user_time
3257fields instead.
3258
7db515ef
MK
3259Always double-check your results!
3260Various generalized events have had wrong values.
f2b1d720
MK
3261For example, retired branches measured
3262the wrong thing on AMD machines until Linux 2.6.35.
747a6e7c 3263.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
f2b1d720
MK
3264.SH EXAMPLE
3265The following is a short example that measures the total
7db515ef
MK
3266instruction count of a call to
3267.BR printf (3).
f2b1d720
MK
3268.nf
3269
3270#include <stdlib.h>
3271#include <stdio.h>
3272#include <unistd.h>
3273#include <string.h>
3274#include <sys/ioctl.h>
3275#include <linux/perf_event.h>
3276#include <asm/unistd.h>
3277
571767ca 3278static long
7db515ef
MK
3279perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
3280 int cpu, int group_fd, unsigned long flags)
f2b1d720
MK
3281{
3282 int ret;
3283
7db515ef
MK
3284 ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
3285 group_fd, flags);
f2b1d720
MK
3286 return ret;
3287}
3288
f2b1d720
MK
3289int
3290main(int argc, char **argv)
3291{
f2b1d720
MK
3292 struct perf_event_attr pe;
3293 long long count;
3294 int fd;
3295
3296 memset(&pe, 0, sizeof(struct perf_event_attr));
3297 pe.type = PERF_TYPE_HARDWARE;
3298 pe.size = sizeof(struct perf_event_attr);
3299 pe.config = PERF_COUNT_HW_INSTRUCTIONS;
3300 pe.disabled = 1;
3301 pe.exclude_kernel = 1;
3302 pe.exclude_hv = 1;
3303
3304 fd = perf_event_open(&pe, 0, \-1, \-1, 0);
7db515ef 3305 if (fd == \-1) {
f2b1d720 3306 fprintf(stderr, "Error opening leader %llx\\n", pe.config);
7db515ef 3307 exit(EXIT_FAILURE);
f2b1d720
MK
3308 }
3309
3310 ioctl(fd, PERF_EVENT_IOC_RESET, 0);
3311 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
3312
3313 printf("Measuring instruction count for this printf\\n");
3314
3315 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
3316 read(fd, &count, sizeof(long long));
3317
3318 printf("Used %lld instructions\\n", count);
3319
3320 close(fd);
3321}
3322.fi
47297adb 3323.SH SEE ALSO
f2b1d720
MK
3324.BR fcntl (2),
3325.BR mmap (2),
3326.BR open (2),
3327.BR prctl (2),
3328.BR read (2)