]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/perf_event_open.2
perf_event_open.2: Document new perf_event_paranoid default
[thirdparty/man-pages.git] / man2 / perf_event_open.2
CommitLineData
f2b1d720
MK
1.\" Copyright (c) 2012, Vincent Weaver
2.\"
1dd72f9c 3.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
f2b1d720
MK
4.\" This is free documentation; you can redistribute it and/or
5.\" modify it under the terms of the GNU General Public License as
6.\" published by the Free Software Foundation; either version 2 of
7.\" the License, or (at your option) any later version.
8.\"
9.\" The GNU General Public License's references to "object code"
10.\" and "executables" are to be interpreted as the output of any
11.\" document formatting or typesetting system, including
12.\" intermediate and printed output.
13.\"
14.\" This manual is distributed in the hope that it will be useful,
15.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
16.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17.\" GNU General Public License for more details.
18.\"
19.\" You should have received a copy of the GNU General Public
20.\" License along with this manual; if not, see
21.\" <http://www.gnu.org/licenses/>.
6a8d8745 22.\" %%%LICENSE_END
f2b1d720
MK
23.\"
24.\" This document is based on the perf_event.h header file, the
25.\" tools/perf/design.txt file, and a lot of bitter experience.
26.\"
6f3c74a8 27.TH PERF_EVENT_OPEN 2 2015-12-05 "Linux" "Linux Programmer's Manual"
f2b1d720
MK
28.SH NAME
29perf_event_open \- set up performance monitoring
30.SH SYNOPSIS
31.nf
32.B #include <linux/perf_event.h>
33.B #include <linux/hw_breakpoint.h>
34.sp
35.BI "int perf_event_open(struct perf_event_attr *" attr ,
36.BI " pid_t " pid ", int " cpu ", int " group_fd ,
37.BI " unsigned long " flags );
38.fi
39
40.IR Note :
41There is no glibc wrapper for this system call; see NOTES.
42.SH DESCRIPTION
43Given a list of parameters,
44.BR perf_event_open ()
45returns a file descriptor, for use in subsequent system calls
46.RB ( read "(2), " mmap "(2), " prctl "(2), " fcntl "(2), etc.)."
47.PP
48A call to
49.BR perf_event_open ()
50creates a file descriptor that allows measuring performance
51information.
52Each file descriptor corresponds to one
53event that is measured; these can be grouped together
54to measure multiple events simultaneously.
55.PP
56Events can be enabled and disabled in two ways: via
57.BR ioctl (2)
58and via
0fe9e4b1 59.BR prctl (2).
f2b1d720
MK
60When an event is disabled it does not count or generate overflows but does
61continue to exist and maintain its count value.
62.PP
63Events come in two flavors: counting and sampled.
64A
65.I counting
66event is one that is used for counting the aggregate number of events
67that occur.
68In general, counting event results are gathered with a
69.BR read (2)
70call.
71A
72.I sampling
73event periodically writes measurements to a buffer that can then
74be accessed via
0fe9e4b1 75.BR mmap (2).
f2b1d720
MK
76.SS Arguments
77.P
f2b1d720 78The
a02a1737 79.I pid
f2b1d720 80and
a02a1737
VW
81.I cpu
82arguments allow specifying which process and CPU to monitor:
83.TP
f2d15dc9 84.BR "pid == 0" " and " "cpu == \-1"
ee7b0cbf 85This measures the calling process/thread on any CPU.
a02a1737 86.TP
f2d15dc9 87.BR "pid == 0" " and " "cpu >= 0"
ee7b0cbf 88This measures the calling process/thread only
a02a1737
VW
89when running on the specified CPU.
90.TP
f2d15dc9 91.BR "pid > 0" " and " "cpu == \-1"
a02a1737
VW
92This measures the specified process/thread on any CPU.
93.TP
f2d15dc9 94.BR "pid > 0" " and " "cpu >= 0"
a02a1737
VW
95This measures the specified process/thread only
96when running on the specified CPU.
97.TP
f2d15dc9 98.BR "pid == \-1" " and " "cpu >= 0"
a02a1737 99This measures all processes/threads on the specified CPU.
ce88f77b 100This requires
f2b1d720
MK
101.B CAP_SYS_ADMIN
102capability or a
103.I /proc/sys/kernel/perf_event_paranoid
104value of less than 1.
a02a1737 105.TP
ce88f77b 106.BR "pid == \-1" " and " "cpu == \-1"
a02a1737 107This setting is invalid and will return an error.
f2b1d720 108.P
13ec13dc
MK
109When
110.I pid
111is greater than zero, permission to perform this system call
112is governed by a ptrace access mode
113.B PTRACE_MODE_READ_REALCREDS
114check; see
115.BR ptrace (2).
116
f2b1d720
MK
117The
118.I group_fd
119argument allows event groups to be created.
120An event group has one event which is the group leader.
121The leader is created first, with
122.IR group_fd " = \-1."
123The rest of the group members are created with subsequent
124.BR perf_event_open ()
125calls with
126.IR group_fd
bec6277e 127being set to the file descriptor of the group leader.
f2b1d720
MK
128(A single event on its own is created with
129.IR group_fd " = \-1"
130and is considered to be a group with only 1 member.)
33a0ccb2 131An event group is scheduled onto the CPU as a unit: it will
d1007d14 132be put onto the CPU only if all of the events in the group can be put onto
f2b1d720
MK
133the CPU.
134This means that the values of the member events can be
ce88f77b 135meaningfully compared\(emadded, divided (to get ratios), and so on\(emwith each
f2b1d720
MK
136other, since they have counted events for the same set of executed
137instructions.
138.P
139The
140.I flags
08e325e8 141argument is formed by ORing together zero or more of the following values:
f2b1d720 142.TP
60dafbc1
MK
143.BR PERF_FLAG_FD_CLOEXEC " (since Linux 3.14)"
144.\" commit a21b0b354d4ac39be691f51c53562e2c24443d9e
e9b1ab78
MK
145This flag enables the close-on-exec flag for the created
146event file descriptor,
147so that the file descriptor is automatically closed on
148.BR execve (2).
8bad22e5
MK
149Setting the close-on-exec flags at creation time, rather than later with
150.BR fcntl (2),
e9b1ab78
MK
151avoids potential race conditions where the calling thread invokes
152.BR perf_event_open ()
a61dba34
MK
153and
154.BR fcntl (2)
e9b1ab78
MK
155at the same time as another thread calls
156.BR fork (2)
157then
158.BR execve (2).
159.TP
f2b1d720 160.BR PERF_FLAG_FD_NO_GROUP
31266c04
VW
161This flag tells the event to ignore the
162.IR group_fd
163parameter except for the purpose of setting up output redirection
164using the
165.B PERF_FLAG_FD_OUTPUT
166flag.
f2b1d720 167.TP
3117263f 168.BR PERF_FLAG_FD_OUTPUT " (broken since Linux 2.6.35)"
747a6e7c 169.\" commit ac9721f3f54b27a16c7e1afb2481e7ee95a70318
31266c04
VW
170This flag re-routes the event's sampled output to instead
171be included in the mmap buffer of the event specified by
172.IR group_fd .
f2b1d720 173.TP
3117263f 174.BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)"
60dafbc1 175.\" commit e5d1367f17ba6a6fed5fd8b74e4d5720923e0c25
f2b1d720
MK
176This flag activates per-container system-wide monitoring.
177A container
ce88f77b 178is an abstraction that isolates a set of resources for finer-grained
699893d8 179control (CPUs, memory, etc.).
f2b1d720
MK
180In this mode, the event is measured
181only if the thread running on the monitored CPU belongs to the designated
182container (cgroup).
183The cgroup is identified by passing a file descriptor
184opened on its directory in the cgroupfs filesystem.
185For instance, if the
186cgroup to monitor is called
187.IR test ,
188then a file descriptor opened on
189.I /dev/cgroup/test
190(assuming cgroupfs is mounted on
191.IR /dev/cgroup )
192must be passed as the
193.I pid
194parameter.
33a0ccb2 195cgroup monitoring is available only
f2b1d720
MK
196for system-wide events and may therefore require extra permissions.
197.P
198The
199.I perf_event_attr
200structure provides detailed configuration information
201for the event being created.
202
203.in +4n
204.nf
205struct perf_event_attr {
ce88f77b
MK
206 __u32 type; /* Type of event */
207 __u32 size; /* Size of attribute structure */
208 __u64 config; /* Type-specific configuration */
f2b1d720
MK
209
210 union {
211 __u64 sample_period; /* Period of sampling */
212 __u64 sample_freq; /* Frequency of sampling */
213 };
214
ce88f77b
MK
215 __u64 sample_type; /* Specifies values included in sample */
216 __u64 read_format; /* Specifies values returned in read */
217
218 __u64 disabled : 1, /* off by default */
219 inherit : 1, /* children inherit it */
220 pinned : 1, /* must always be on PMU */
221 exclusive : 1, /* only group on PMU */
222 exclude_user : 1, /* don't count user */
223 exclude_kernel : 1, /* don't count kernel */
224 exclude_hv : 1, /* don't count hypervisor */
225 exclude_idle : 1, /* don't count when idle */
226 mmap : 1, /* include mmap data */
227 comm : 1, /* include comm data */
228 freq : 1, /* use freq, not period */
229 inherit_stat : 1, /* per task counts */
230 enable_on_exec : 1, /* next exec enables */
231 task : 1, /* trace fork/exit */
232 watermark : 1, /* wakeup_watermark */
233 precise_ip : 2, /* skid constraint */
234 mmap_data : 1, /* non-exec mmap data */
235 sample_id_all : 1, /* sample_type all events */
236 exclude_host : 1, /* don't count in host */
237 exclude_guest : 1, /* don't count in guest */
238 exclude_callchain_kernel : 1,
239 /* exclude kernel callchains */
240 exclude_callchain_user : 1,
241 /* exclude user callchains */
9bfc542b 242 mmap2 : 1, /* include mmap with inode data */
49bc411c 243 comm_exec : 1, /* flag comm events that are due to exec */
6bd5186a
VW
244 use_clockid : 1, /* use clockid for time fields */
245
246 __reserved_1 : 38;
f2b1d720
MK
247
248 union {
249 __u32 wakeup_events; /* wakeup every n events */
7db515ef 250 __u32 wakeup_watermark; /* bytes before wakeup */
f2b1d720
MK
251 };
252
253 __u32 bp_type; /* breakpoint type */
254
255 union {
256 __u64 bp_addr; /* breakpoint address */
257 __u64 config1; /* extension of config */
258 };
259
260 union {
261 __u64 bp_len; /* breakpoint length */
262 __u64 config2; /* extension of config1 */
263 };
ce88f77b
MK
264 __u64 branch_sample_type; /* enum perf_branch_sample_type */
265 __u64 sample_regs_user; /* user regs to dump on samples */
266 __u32 sample_stack_user; /* size of stack to dump on
7db515ef 267 samples */
6bd5186a 268 __s32 clockid; /* clock to use for time fields */
f5281dfd 269 __u64 sample_regs_intr; /* regs to dump on samples */
cdc52f4a
VW
270 __u32 aux_watermark; /* aux bytes before wakeup */
271 __u32 __reserved_2; /* align to u64 */
272
f2b1d720
MK
273};
274.fi
275.in
276
277The fields of the
278.I perf_event_attr
279structure are described in more detail below:
f2b1d720
MK
280.TP
281.I type
282This field specifies the overall event type.
283It has one of the following values:
284.RS
285.TP
286.B PERF_TYPE_HARDWARE
287This indicates one of the "generalized" hardware events provided
288by the kernel.
289See the
290.I config
291field definition for more details.
292.TP
293.B PERF_TYPE_SOFTWARE
294This indicates one of the software-defined events provided by the kernel
295(even if no hardware support is available).
296.TP
297.B PERF_TYPE_TRACEPOINT
298This indicates a tracepoint
299provided by the kernel tracepoint infrastructure.
300.TP
301.B PERF_TYPE_HW_CACHE
302This indicates a hardware cache event.
303This has a special encoding, described in the
304.I config
305field definition.
306.TP
307.B PERF_TYPE_RAW
308This indicates a "raw" implementation-specific event in the
309.IR config " field."
310.TP
31c1f2b0 311.BR PERF_TYPE_BREAKPOINT " (since Linux 2.6.33)"
60dafbc1 312.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
313This indicates a hardware breakpoint as provided by the CPU.
314Breakpoints can be read/write accesses to an address as well as
315execution of an instruction address.
316.TP
317.RB "dynamic PMU"
747a6e7c
VW
318Since Linux 2.6.38,
319.\" commit 2e80a82a49c4c7eca4e35734380f28298ba5db19
7db515ef 320.BR perf_event_open ()
f2b1d720
MK
321can support multiple PMUs.
322To enable this, a value exported by the kernel can be used in the
323.I type
324field to indicate which PMU to use.
325The value to use can be found in the sysfs filesystem:
326there is a subdirectory per PMU instance under
327.IR /sys/bus/event_source/devices .
7d182bb6 328In each subdirectory there is a
f2b1d720
MK
329.I type
330file whose content is an integer that can be used in the
331.I type
332field.
333For instance,
334.I /sys/bus/event_source/devices/cpu/type
335contains the value for the core CPU PMU, which is usually 4.
336.RE
f2b1d720
MK
337.TP
338.I "size"
339The size of the
340.I perf_event_attr
341structure for forward/backward compatibility.
342Set this using
343.I sizeof(struct perf_event_attr)
344to allow the kernel to see
345the struct size at the time of compilation.
346
347The related define
348.B PERF_ATTR_SIZE_VER0
349is set to 64; this was the size of the first published struct.
350.B PERF_ATTR_SIZE_VER1
351is 72, corresponding to the addition of breakpoints in Linux 2.6.33.
747a6e7c
VW
352.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
353.\" this was added much later when PERF_ATTR_SIZE_VER2 happened
354.\" but the actual attr_size had increased in 2.6.33
f2b1d720
MK
355.B PERF_ATTR_SIZE_VER2
356is 80 corresponding to the addition of branch sampling in Linux 3.4.
747a6e7c 357.\" commit cb5d76999029ae7a517cb07dfa732c1b5a934fc2
d2a6be2f 358.B PERF_ATTR_SIZE_VER3
f2b1d720 359is 96 corresponding to the addition
7ede2f66
DP
360of
361.I sample_regs_user
362and
363.I sample_stack_user
364in Linux 3.7.
747a6e7c 365.\" commit 1659d129ed014b715b0b2120e6fd929bdd33ed03
f5281dfd
VW
366.B PERF_ATTR_SIZE_VER4
367is 104 corresponding to the addition of
368.I sample_regs_intr
369in Linux 3.19.
370.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
cdc52f4a
VW
371.B PERF_ATTR_SIZE_VER5
372is 112 corresponding to the addition of
2050c098 373.I aux_watermark
cdc52f4a
VW
374in Linux 4.1.
375.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
f2b1d720
MK
376.TP
377.I "config"
378This specifies which event you want, in conjunction with
379the
380.I type
381field.
382The
383.IR config1 " and " config2
384fields are also taken into account in cases where 64 bits is not
385enough to fully specify the event.
386The encoding of these fields are event dependent.
387
f2b1d720
MK
388There are various ways to set the
389.I config
390field that are dependent on the value of the previously
391described
392.I type
393field.
394What follows are various possible settings for
395.I config
396separated out by
397.IR type .
398
399If
400.I type
401is
402.BR PERF_TYPE_HARDWARE ,
403we are measuring one of the generalized hardware CPU events.
404Not all of these are available on all platforms.
405Set
406.I config
407to one of the following:
408.RS 12
409.TP
410.B PERF_COUNT_HW_CPU_CYCLES
411Total cycles.
2b538c3e 412Be wary of what happens during CPU frequency scaling.
f2b1d720
MK
413.TP
414.B PERF_COUNT_HW_INSTRUCTIONS
415Retired instructions.
416Be careful, these can be affected by various
2b538c3e 417issues, most notably hardware interrupt counts.
f2b1d720
MK
418.TP
419.B PERF_COUNT_HW_CACHE_REFERENCES
420Cache accesses.
421Usually this indicates Last Level Cache accesses but this may
422vary depending on your CPU.
423This may include prefetches and coherency messages; again this
424depends on the design of your CPU.
425.TP
426.B PERF_COUNT_HW_CACHE_MISSES
427Cache misses.
428Usually this indicates Last Level Cache misses; this is intended to be
429used in conjunction with the
430.B PERF_COUNT_HW_CACHE_REFERENCES
431event to calculate cache miss rates.
432.TP
433.B PERF_COUNT_HW_BRANCH_INSTRUCTIONS
434Retired branch instructions.
747a6e7c 435Prior to Linux 2.6.35, this used
f2b1d720 436the wrong event on AMD processors.
747a6e7c 437.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
f2b1d720
MK
438.TP
439.B PERF_COUNT_HW_BRANCH_MISSES
440Mispredicted branch instructions.
441.TP
442.B PERF_COUNT_HW_BUS_CYCLES
443Bus cycles, which can be different from total cycles.
444.TP
31c1f2b0 445.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (since Linux 3.0)"
747a6e7c 446.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
f2b1d720
MK
447Stalled cycles during issue.
448.TP
31c1f2b0 449.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (since Linux 3.0)"
747a6e7c 450.\" commit 8f62242246351b5a4bc0c1f00c0c7003edea128a
f2b1d720
MK
451Stalled cycles during retirement.
452.TP
31c1f2b0 453.BR PERF_COUNT_HW_REF_CPU_CYCLES " (since Linux 3.3)"
60dafbc1 454.\" commit c37e17497e01fc0f5d2d6feb5723b210b3ab8890
f2b1d720
MK
455Total cycles; not affected by CPU frequency scaling.
456.RE
457.IP
458If
459.I type
460is
461.BR PERF_TYPE_SOFTWARE ,
462we are measuring software events provided by the kernel.
463Set
464.I config
465to one of the following:
466.RS 12
467.TP
468.B PERF_COUNT_SW_CPU_CLOCK
469This reports the CPU clock, a high-resolution per-CPU timer.
470.TP
471.B PERF_COUNT_SW_TASK_CLOCK
472This reports a clock count specific to the task that is running.
473.TP
474.B PERF_COUNT_SW_PAGE_FAULTS
475This reports the number of page faults.
476.TP
477.B PERF_COUNT_SW_CONTEXT_SWITCHES
478This counts context switches.
479Until Linux 2.6.34, these were all reported as user-space
480events, after that they are reported as happening in the kernel.
747a6e7c 481.\" commit e49a5bd38159dfb1928fd25b173bc9de4bbadb21
f2b1d720
MK
482.TP
483.B PERF_COUNT_SW_CPU_MIGRATIONS
484This reports the number of times the process
485has migrated to a new CPU.
486.TP
487.B PERF_COUNT_SW_PAGE_FAULTS_MIN
488This counts the number of minor page faults.
489These did not require disk I/O to handle.
490.TP
491.B PERF_COUNT_SW_PAGE_FAULTS_MAJ
492This counts the number of major page faults.
493These required disk I/O to handle.
494.TP
31c1f2b0 495.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (since Linux 2.6.33)"
60dafbc1 496.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
f2b1d720
MK
497This counts the number of alignment faults.
498These happen when unaligned memory accesses happen; the kernel
499can handle these but it reduces performance.
33a0ccb2 500This happens only on some architectures (never on x86).
f2b1d720 501.TP
31c1f2b0 502.BR PERF_COUNT_SW_EMULATION_FAULTS " (since Linux 2.6.33)"
60dafbc1 503.\" commit f7d7986060b2890fc26db6ab5203efbd33aa2497
f2b1d720
MK
504This counts the number of emulation faults.
505The kernel sometimes traps on unimplemented instructions
7db515ef 506and emulates them for user space.
f2b1d720 507This can negatively impact performance.
dab38455 508.TP
31c1f2b0 509.BR PERF_COUNT_SW_DUMMY " (since Linux 3.12)"
60dafbc1 510.\" commit fa0097ee690693006ab1aea6c01ad3c851b65c77
dab38455
VW
511This is a placeholder event that counts nothing.
512Informational sample record types such as mmap or comm
513must be associated with an active event.
514This dummy event allows gathering such records without requiring
515a counting event.
f2b1d720 516.RE
f2b1d720 517
f2b1d720
MK
518.RS
519If
520.I type
521is
522.BR PERF_TYPE_TRACEPOINT ,
523then we are measuring kernel tracepoints.
524The value to use in
525.I config
526can be obtained from under debugfs
527.I tracing/events/*/*/id
528if ftrace is enabled in the kernel.
f2b1d720 529.RE
1f22e274 530
f2b1d720
MK
531.RS
532If
533.I type
534is
535.BR PERF_TYPE_HW_CACHE ,
536then we are measuring a hardware CPU cache event.
537To calculate the appropriate
538.I config
539value use the following equation:
540.RS 4
541.nf
542
543 (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
544 (perf_hw_cache_op_result_id << 16)
545.fi
546.P
547where
548.I perf_hw_cache_id
549is one of:
7db515ef 550.RS 4
f2b1d720
MK
551.TP
552.B PERF_COUNT_HW_CACHE_L1D
553for measuring Level 1 Data Cache
554.TP
555.B PERF_COUNT_HW_CACHE_L1I
556for measuring Level 1 Instruction Cache
557.TP
558.B PERF_COUNT_HW_CACHE_LL
559for measuring Last-Level Cache
560.TP
561.B PERF_COUNT_HW_CACHE_DTLB
562for measuring the Data TLB
563.TP
564.B PERF_COUNT_HW_CACHE_ITLB
565for measuring the Instruction TLB
566.TP
567.B PERF_COUNT_HW_CACHE_BPU
568for measuring the branch prediction unit
569.TP
5a69ce9c
MK
570.BR PERF_COUNT_HW_CACHE_NODE " (since Linux 3.1)"
571.\" commit 89d6c0b5bdbb1927775584dcf532d98b3efe1477
f2b1d720
MK
572for measuring local memory accesses
573.RE
f2b1d720
MK
574.P
575and
576.I perf_hw_cache_op_id
577is one of
7db515ef 578.RS 4
f2b1d720
MK
579.TP
580.B PERF_COUNT_HW_CACHE_OP_READ
581for read accesses
582.TP
583.B PERF_COUNT_HW_CACHE_OP_WRITE
584for write accesses
585.TP
586.B PERF_COUNT_HW_CACHE_OP_PREFETCH
587for prefetch accesses
588.RE
f2b1d720
MK
589.P
590and
591.I perf_hw_cache_op_result_id
592is one of
7db515ef 593.RS 4
f2b1d720
MK
594.TP
595.B PERF_COUNT_HW_CACHE_RESULT_ACCESS
596to measure accesses
597.TP
598.B PERF_COUNT_HW_CACHE_RESULT_MISS
599to measure misses
600.RE
601.RE
602
603If
604.I type
605is
606.BR PERF_TYPE_RAW ,
607then a custom "raw"
608.I config
609value is needed.
610Most CPUs support events that are not covered by the "generalized" events.
611These are implementation defined; see your CPU manual (for example
612the Intel Volume 3B documentation or the AMD BIOS and Kernel Developer
613Guide).
614The libpfm4 library can be used to translate from the name in the
615architectural manuals to the raw hex value
616.BR perf_event_open ()
617expects in this field.
618
619If
620.I type
621is
622.BR PERF_TYPE_BREAKPOINT ,
623then leave
624.I config
625set to zero.
626Its parameters are set in other places.
627.RE
628.TP
629.IR sample_period ", " sample_freq
21977c9d 630A "sampling" event is one that generates an overflow notification
f2b1d720
MK
631every N events, where N is given by
632.IR sample_period .
21977c9d 633A sampling event has
f2b1d720 634.IR sample_period " > 0."
21977c9d 635When an overflow occurs, requested data is recorded
f2b1d720
MK
636in the mmap buffer.
637The
638.I sample_type
21977c9d 639field controls what data is recorded on each overflow.
f2b1d720
MK
640
641.I sample_freq
642can be used if you wish to use frequency rather than period.
37bee118 643In this case, you set the
f2b1d720
MK
644.I freq
645flag.
646The kernel will adjust the sampling period
647to try and achieve the desired rate.
648The rate of adjustment is a
649timer tick.
f2b1d720
MK
650.TP
651.I "sample_type"
652The various bits in this field specify which values to include
653in the sample.
654They will be recorded in a ring-buffer,
ad73a2cc 655which is available to user space using
f2b1d720
MK
656.BR mmap (2).
657The order in which the values are saved in the
658sample are documented in the MMAP Layout subsection below;
659it is not the
660.I "enum perf_event_sample_format"
661order.
662.RS
663.TP
664.B PERF_SAMPLE_IP
665Records instruction pointer.
666.TP
667.B PERF_SAMPLE_TID
7db515ef 668Records the process and thread IDs.
f2b1d720
MK
669.TP
670.B PERF_SAMPLE_TIME
671Records a timestamp.
672.TP
673.B PERF_SAMPLE_ADDR
674Records an address, if applicable.
675.TP
676.B PERF_SAMPLE_READ
677Record counter values for all events in a group, not just the group leader.
678.TP
679.B PERF_SAMPLE_CALLCHAIN
680Records the callchain (stack backtrace).
681.TP
682.B PERF_SAMPLE_ID
683Records a unique ID for the opened event's group leader.
684.TP
685.B PERF_SAMPLE_CPU
686Records CPU number.
687.TP
688.B PERF_SAMPLE_PERIOD
689Records the current sampling period.
690.TP
691.B PERF_SAMPLE_STREAM_ID
692Records a unique ID for the opened event.
693Unlike
694.B PERF_SAMPLE_ID
695the actual ID is returned, not the group leader.
8859d3a9
DP
696This ID is the same as the one returned by
697.BR PERF_FORMAT_ID .
f2b1d720
MK
698.TP
699.B PERF_SAMPLE_RAW
700Records additional data, if applicable.
701Usually returned by tracepoint events.
702.TP
31c1f2b0 703.BR PERF_SAMPLE_BRANCH_STACK " (since Linux 3.4)"
60dafbc1 704.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
045bf4d3
VW
705This provides a record of recent branches, as provided
706by CPU branch sampling hardware (such as Intel Last Branch Record).
707Not all hardware supports this feature.
708
709See the
710.I branch_sample_type
711field for how to filter which branches are reported.
f2b1d720 712.TP
31c1f2b0 713.BR PERF_SAMPLE_REGS_USER " (since Linux 3.7)"
60dafbc1 714.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
d1007d14
VW
715Records the current user-level CPU register state
716(the values in the process before the kernel was called).
f2b1d720 717.TP
31c1f2b0 718.BR PERF_SAMPLE_STACK_USER " (since Linux 3.7)"
60dafbc1 719.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
d1007d14
VW
720Records the user level stack, allowing stack unwinding.
721.TP
31c1f2b0 722.BR PERF_SAMPLE_WEIGHT " (since Linux 3.10)"
60dafbc1 723.\" commit c3feedf2aaf9ac8bad6f19f5d21e4ee0b4b87e9c
d1007d14 724Records a hardware provided weight value that expresses how
51700fd7 725costly the sampled event was.
d1007d14
VW
726This allows the hardware to highlight expensive events in
727a profile.
728.TP
31c1f2b0 729.BR PERF_SAMPLE_DATA_SRC " (since Linux 3.10)"
60dafbc1 730.\" commit d6be9ad6c960f43800a6f118932bc8a5a4eadcd1
d1007d14
VW
731Records the data source: where in the memory hierarchy
732the data associated with the sampled instruction came from.
6170255e 733This is available only if the underlying hardware
d1007d14 734supports this feature.
7480dabb 735.TP
31c1f2b0 736.BR PERF_SAMPLE_IDENTIFIER " (since Linux 3.12)"
60dafbc1 737.\" commit ff3d527cebc1fa3707c617bfe9e74f53fcfb0955
8859d3a9
DP
738Places the
739.B SAMPLE_ID
740value in a fixed position in the record,
7480dabb
VW
741either at the beginning (for sample events) or at the end
742(if a non-sample event).
743
744This was necessary because a sample stream may have
745records from various different event sources with different
746.I sample_type
747settings.
e9bd9b2c 748Parsing the event stream properly was not possible because the
8859d3a9
DP
749format of the record was needed to find
750.BR SAMPLE_ID ,
751but
27f52b52 752the format could not be found without knowing what
7480dabb
VW
753event the sample belonged to (causing a circular
754dependency).
755
e41c36b2 756The
7480dabb
VW
757.B PERF_SAMPLE_IDENTIFIER
758setting makes the event stream always parsable
8859d3a9
DP
759by putting
760.B SAMPLE_ID
761in a fixed location, even though
762it means having duplicate
763.B SAMPLE_ID
764values in records.
1e043959 765.TP
60dafbc1
MK
766.BR PERF_SAMPLE_TRANSACTION " (since Linux 3.13)"
767.\" commit fdfbbd07e91f8fe387140776f3fd94605f0c89e5
84fc2a6e 768Records reasons for transactional memory abort events
1e043959
VW
769(for example, from Intel TSX transactional memory support).
770
771The
772.I precise_ip
b3f39642 773setting must be greater than 0 and a transactional memory abort
1e043959 774event must be measured or no values will be recorded.
84fc2a6e
MK
775Also note that some perf_event measurements, such as sampled
776cycle counting, may cause extraneous aborts (by causing an
1e043959 777interrupt during a transaction).
f5281dfd
VW
778.TP
779.BR PERF_SAMPLE_REGS_INTR " (since Linux 3.19)"
780.\" commit 60e2364e60e86e81bc6377f49779779e6120977f
781Records a subset of the current CPU register state
782as specified by
783.IR sample_regs_intr .
784Unlike
785.B PERF_SAMPLE_REGS_USER
786the register values will return kernel register
787state if the overflow happened while kernel
788code is running.
789If the CPU supports hardware sampling of
790register state (i.e. PEBS on Intel x86) and
791.I precise_ip
792is set higher than zero then the register
793values returned are those captured by
794hardware at the time of the sampled
795instruction's retirement.
f2b1d720 796.RE
f2b1d720
MK
797.TP
798.IR "read_format"
799This field specifies the format of the data returned by
800.BR read (2)
801on a
7db515ef 802.BR perf_event_open ()
f2b1d720
MK
803file descriptor.
804.RS
805.TP
806.B PERF_FORMAT_TOTAL_TIME_ENABLED
7ede2f66
DP
807Adds the 64-bit
808.I time_enabled
809field.
f2b1d720
MK
810This can be used to calculate estimated totals if
811the PMU is overcommitted and multiplexing is happening.
812.TP
813.B PERF_FORMAT_TOTAL_TIME_RUNNING
7ede2f66
DP
814Adds the 64-bit
815.I time_running
816field.
f2b1d720 817This can be used to calculate estimated totals if
3d1ee497 818the PMU is overcommitted and multiplexing is happening.
f2b1d720
MK
819.TP
820.B PERF_FORMAT_ID
821Adds a 64-bit unique value that corresponds to the event group.
822.TP
823.B PERF_FORMAT_GROUP
824Allows all counter values in an event group to be read with one read.
825.RE
f2b1d720
MK
826.TP
827.IR "disabled"
828The
829.I disabled
830bit specifies whether the counter starts out disabled or enabled.
831If disabled, the event can later be enabled by
832.BR ioctl (2),
833.BR prctl (2),
834or
835.IR enable_on_exec .
406650db
VW
836
837When creating an event group, typically the group leader is initialized
838with
839.I disabled
840set to 1 and any child events are initialized with
841.I disabled
842set to 0.
843Despite
844.I disabled
845being 0, the child events will not start until the group leader
846is enabled.
f2b1d720
MK
847.TP
848.IR "inherit"
849The
850.I inherit
851bit specifies that this counter should count events of child
852tasks as well as the task specified.
33a0ccb2 853This applies only to new children, not to any existing children at
f2b1d720
MK
854the time the counter is created (nor to any new children of
855existing children).
856
857Inherit does not work for some combinations of
858.IR read_format s,
859such as
860.BR PERF_FORMAT_GROUP .
f2b1d720
MK
861.TP
862.IR "pinned"
863The
864.I pinned
865bit specifies that the counter should always be on the CPU if at all
866possible.
33a0ccb2 867It applies only to hardware counters and only to group leaders.
f2b1d720
MK
868If a pinned counter cannot be put onto the CPU (e.g., because there are
869not enough hardware counters or because of a conflict with some other
870event), then the counter goes into an 'error' state, where reads
871return end-of-file (i.e.,
872.BR read (2)
873returns 0) until the counter is subsequently enabled or disabled.
f2b1d720
MK
874.TP
875.IR "exclusive"
876The
877.I exclusive
878bit specifies that when this counter's group is on the CPU,
879it should be the only group using the CPU's counters.
880In the future this may allow monitoring programs to
881support PMU features that need to run alone so that they do not
882disrupt other hardware counters.
bea10c8c
VW
883
884Note that many unexpected situations may prevent events with the
885.I exclusive
d3532647 886bit set from ever running.
bea10c8c 887This includes any users running a system-wide
d3532647 888measurement as well as any kernel use of the performance counters
bea10c8c 889(including the commonly enabled NMI Watchdog Timer interface).
f2b1d720
MK
890.TP
891.IR "exclude_user"
ad73a2cc 892If this bit is set, the count excludes events that happen in user space.
f2b1d720
MK
893.TP
894.IR "exclude_kernel"
895If this bit is set, the count excludes events that happen in kernel-space.
f2b1d720
MK
896.TP
897.IR "exclude_hv"
898If this bit is set, the count excludes events that happen in the
899hypervisor.
900This is mainly for PMUs that have built-in support for handling this
901(such as POWER).
902Extra support is needed for handling hypervisor measurements on most
903machines.
f2b1d720
MK
904.TP
905.IR "exclude_idle"
906If set, don't count when the CPU is idle.
f2b1d720
MK
907.TP
908.IR "mmap"
909The
910.I mmap
75ee11e5 911bit enables generation of
cd7c700a 912.B PERF_RECORD_MMAP
75ee11e5
VW
913samples for every
914.BR mmap (2)
915call that has
cd7c700a 916.B PROT_EXEC
75ee11e5
VW
917set.
918This allows tools to notice new executable code being mapped into
919a program (dynamic shared libraries for example)
920so that addresses can be mapped back to the original code.
f2b1d720
MK
921.TP
922.IR "comm"
923The
924.I comm
925bit enables tracking of process command name as modified by the
cd7c700a 926.BR exec (2)
f2b1d720 927and
cd7c700a 928.BR prctl (PR_SET_NAME)
49bc411c
VW
929system calls as well as writing to
930.IR /proc/self/comm .
790ee6d6 931If the
49bc411c 932.I comm_exec
790ee6d6 933flag is also successfully set (possible since Linux 3.16),
747a6e7c 934.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
49bc411c
VW
935then the misc flag
936.B PERF_RECORD_MISC_COMM_EXEC
937can be used to differentiate the
938.BR exec (2)
939case from the others.
f2b1d720
MK
940.TP
941.IR "freq"
942If this bit is set, then
943.I sample_frequency
944not
945.I sample_period
946is used when setting up the sampling interval.
f2b1d720
MK
947.TP
948.IR "inherit_stat"
949This bit enables saving of event counts on context switch for
950inherited tasks.
33a0ccb2 951This is meaningful only if the
f2b1d720
MK
952.I inherit
953field is set.
f2b1d720
MK
954.TP
955.IR "enable_on_exec"
956If this bit is set, a counter is automatically
957enabled after a call to
958.BR exec (2).
f2b1d720
MK
959.TP
960.IR "task"
961If this bit is set, then
962fork/exit notifications are included in the ring buffer.
f2b1d720
MK
963.TP
964.IR "watermark"
21977c9d 965If set, have an overflow notification happen when we cross the
f2b1d720
MK
966.I wakeup_watermark
967boundary.
21977c9d 968Otherwise, overflow notifications happen after
f2b1d720
MK
969.I wakeup_events
970samples.
f2b1d720 971.TP
31c1f2b0 972.IR "precise_ip" " (since Linux 2.6.35)"
747a6e7c 973.\" commit ab608344bcbde4f55ec4cd911b686b0ce3eae076
f2b1d720
MK
974This controls the amount of skid.
975Skid is how many instructions
976execute between an event of interest happening and the kernel
977being able to stop and record the event.
978Smaller skid is
979better and allows more accurate reporting of which events
980correspond to which instructions, but hardware is often limited
981with how small this can be.
982
983The values of this are the following:
984.RS
985.TP
9860 -
987.B SAMPLE_IP
2b538c3e 988can have arbitrary skid.
f2b1d720
MK
989.TP
9901 -
991.B SAMPLE_IP
2b538c3e 992must have constant skid.
f2b1d720
MK
993.TP
9942 -
995.B SAMPLE_IP
2b538c3e 996requested to have 0 skid.
f2b1d720
MK
997.TP
9983 -
999.B SAMPLE_IP
1000must have 0 skid.
1001See also
1002.BR PERF_RECORD_MISC_EXACT_IP .
1003.RE
f2b1d720 1004.TP
31c1f2b0 1005.IR "mmap_data" " (since Linux 2.6.36)"
747a6e7c 1006.\" commit 3af9e859281bda7eb7c20b51879cf43aa788ac2e
f2b1d720
MK
1007The counterpart of the
1008.I mmap
75ee11e5
VW
1009field.
1010This enables generation of
cd7c700a 1011.B PERF_RECORD_MMAP
75ee11e5
VW
1012samples for
1013.BR mmap (2)
1014calls that do not have
cd7c700a 1015.B PROT_EXEC
75ee11e5 1016set (for example data and SysV shared memory).
f2b1d720 1017.TP
31c1f2b0 1018.IR "sample_id_all" " (since Linux 2.6.38)"
747a6e7c 1019.\" commit c980d1091810df13f21aabbce545fd98f545bbf7
7480dabb 1020If set, then TID, TIME, ID, STREAM_ID, and CPU can
f2b1d720
MK
1021additionally be included in
1022.RB non- PERF_RECORD_SAMPLE s
1023if the corresponding
1024.I sample_type
1025is selected.
7480dabb 1026
e9bd9b2c 1027If
7480dabb 1028.B PERF_SAMPLE_IDENTIFIER
37bee118 1029is specified, then an additional ID value is included
7480dabb
VW
1030as the last value to ease parsing the record stream.
1031This may lead to the
e9bd9b2c 1032.I id
7480dabb
VW
1033value appearing twice.
1034
1035The layout is described by this pseudo-structure:
1036.in +4n
1037.nf
1038struct sample_id {
1039 { u32 pid, tid; } /* if PERF_SAMPLE_TID set */
1040 { u64 time; } /* if PERF_SAMPLE_TIME set */
1041 { u64 id; } /* if PERF_SAMPLE_ID set */
1042 { u64 stream_id;} /* if PERF_SAMPLE_STREAM_ID set */
1043 { u32 cpu, res; } /* if PERF_SAMPLE_CPU set */
1044 { u64 id; } /* if PERF_SAMPLE_IDENTIFIER set */
1045};
1046.fi
f2b1d720 1047.TP
31c1f2b0 1048.IR "exclude_host" " (since Linux 3.2)"
747a6e7c 1049.\" commit a240f76165e6255384d4bdb8139895fac7988799
e38fb93e
VW
1050When conducting measurements that include processes running
1051VM instances (i.e. have executed a
1052.I KVM_RUN
1053.BR ioctl (2)
1054) only measure events happening inside a guest instance.
1055This is only meaningful outside the guests; this setting does
1056not change counts gathered inside of a guest.
34d4e61d 1057Currently, this functionality is x86 only.
f2b1d720 1058.TP
31c1f2b0 1059.IR "exclude_guest" " (since Linux 3.2)"
747a6e7c 1060.\" commit a240f76165e6255384d4bdb8139895fac7988799
e38fb93e
VW
1061When conducting measurements that include processes running
1062VM instances (i.e. have executed a
1063.I KVM_RUN
1064.BR ioctl (2)
1065) do not measure events happening inside guest instances.
1066This is only meaningful outside the guests; this setting does
1067not change counts gathered inside of a guest.
34d4e61d 1068Currently, this functionality is x86 only.
f2b1d720 1069.TP
31c1f2b0 1070.IR "exclude_callchain_kernel" " (since Linux 3.7)"
747a6e7c 1071.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
f2b1d720 1072Do not include kernel callchains.
f2b1d720 1073.TP
31c1f2b0 1074.IR "exclude_callchain_user" " (since Linux 3.7)"
747a6e7c 1075.\" commit d077526485d5c9b12fe85d0b2b3b7041e6bc5f91
f2b1d720 1076Do not include user callchains.
f2b1d720 1077.TP
9bfc542b 1078.IR "mmap2" " (since Linux 3.16)"
747a6e7c
VW
1079.\" commit 13d7a2410fa637f450a29ecb515ac318ee40c741
1080.\" This is tricky; was committed during 3.12 development
1081.\" but right before release was disabled.
1082.\" So while you could select mmap2 starting with 3.12
1083.\" it did not work until 3.16
1084.\" commit a5a5ba72843dd05f991184d6cb9a4471acce1005
9bfc542b
VW
1085Generate an extended executable mmap record that contains enough
1086additional information to uniquely identify shared mappings.
1087The
1088.I mmap
1089flag must also be set for this to work.
1090.TP
49bc411c 1091.IR "comm_exec" " (since Linux 3.16)"
747a6e7c 1092.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
5ab35ae5 1093This is purely a feature-detection flag, it does not change
49bc411c 1094kernel behavior.
5ab35ae5 1095If this flag can successfully be set, then, when
49bc411c 1096.I comm
5ab35ae5 1097is enabled, the
49bc411c
VW
1098.B PERF_RECORD_MISC_COMM_EXEC
1099flag will be set in the
1100.I misc
1101field of a comm record header if the rename event being
1102reported was caused by a call to
1103.BR exec (2).
1104This allows tools to distinguish between the various
1105types of process renaming.
1106.TP
6bd5186a
VW
1107.IR "use_clockid" " (since Linux 4.1)"
1108.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
1109This allows selecting which internal Linux clock to use
1110when generating timestamps via the
1111.I clockid
1112field.
1113This can make it easier to correlate perf sample times with
1114timestamps generated by other tools.
1115.TP
f2b1d720
MK
1116.IR "wakeup_events" ", " "wakeup_watermark"
1117This union sets how many samples
1118.RI ( wakeup_events )
1119or bytes
1120.RI ( wakeup_watermark )
21977c9d 1121happen before an overflow notification happens.
f2b1d720
MK
1122Which one is used is selected by the
1123.I watermark
cb8a928f 1124bit flag.
751c0f1a
VW
1125
1126.I wakeup_events
6170255e 1127counts only
751c0f1a 1128.B PERF_RECORD_SAMPLE
51700fd7 1129record types.
21977c9d 1130To receive overflow notification for all
751c0f1a 1131.B PERF_RECORD
21977c9d 1132types choose watermark and set
751c0f1a
VW
1133.I wakeup_watermark
1134to 1.
21977c9d
VW
1135
1136Prior to Linux 3.0 setting
747a6e7c 1137.\" commit f506b3dc0ec454a16d40cab9ee5d75435b39dc50
21977c9d
VW
1138.I wakeup_events
1139to 0 resulted in no overflow notifications;
1140more recent kernels treat 0 the same as 1.
f2b1d720 1141.TP
31c1f2b0 1142.IR "bp_type" " (since Linux 2.6.33)"
747a6e7c 1143.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
1144This chooses the breakpoint type.
1145It is one of:
1146.RS
1147.TP
1148.BR HW_BREAKPOINT_EMPTY
2b538c3e 1149No breakpoint.
f2b1d720
MK
1150.TP
1151.BR HW_BREAKPOINT_R
2b538c3e 1152Count when we read the memory location.
f2b1d720
MK
1153.TP
1154.BR HW_BREAKPOINT_W
2b538c3e 1155Count when we write the memory location.
f2b1d720
MK
1156.TP
1157.BR HW_BREAKPOINT_RW
2b538c3e 1158Count when we read or write the memory location.
f2b1d720
MK
1159.TP
1160.BR HW_BREAKPOINT_X
2b538c3e 1161Count when we execute code at the memory location.
f2b1d720 1162.LP
7db515ef 1163The values can be combined via a bitwise or, but the
f2b1d720
MK
1164combination of
1165.B HW_BREAKPOINT_R
1166or
1167.B HW_BREAKPOINT_W
1168with
1169.B HW_BREAKPOINT_X
1170is not allowed.
1171.RE
f2b1d720 1172.TP
31c1f2b0 1173.IR "bp_addr" " (since Linux 2.6.33)"
747a6e7c 1174.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
1175.I bp_addr
1176address of the breakpoint.
1177For execution breakpoints this is the memory address of the instruction
1178of interest; for read and write breakpoints it is the memory address
1179of the memory location of interest.
f2b1d720 1180.TP
31c1f2b0 1181.IR "config1" " (since Linux 2.6.39)"
747a6e7c 1182.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
f2b1d720
MK
1183.I config1
1184is used for setting events that need an extra register or otherwise
1185do not fit in the regular config field.
1186Raw OFFCORE_EVENTS on Nehalem/Westmere/SandyBridge use this field
1187on 3.3 and later kernels.
f2b1d720 1188.TP
31c1f2b0 1189.IR "bp_len" " (since Linux 2.6.33)"
747a6e7c 1190.\" commit 24f1e32c60c45c89a997c73395b69c8af6f0a84e
f2b1d720
MK
1191.I bp_len
1192is the length of the breakpoint being measured if
1193.I type
1194is
1195.BR PERF_TYPE_BREAKPOINT .
1196Options are
1197.BR HW_BREAKPOINT_LEN_1 ,
1198.BR HW_BREAKPOINT_LEN_2 ,
1199.BR HW_BREAKPOINT_LEN_4 ,
1200.BR HW_BREAKPOINT_LEN_8 .
1201For an execution breakpoint, set this to
1202.IR sizeof(long) .
f2b1d720 1203.TP
31c1f2b0 1204.IR "config2" " (since Linux 2.6.39)"
747a6e7c 1205.\" commit a7e3ed1e470116c9d12c2f778431a481a6be8ab6
f2b1d720
MK
1206
1207.I config2
1208is a further extension of the
1209.I config1
1210field.
f2b1d720 1211.TP
31c1f2b0 1212.IR "branch_sample_type" " (since Linux 3.4)"
747a6e7c 1213.\" commit bce38cd53e5ddba9cb6d708c4ef3d04a4016ec7e
8a94e783 1214If
045bf4d3
VW
1215.B PERF_SAMPLE_BRANCH_STACK
1216is enabled, then this specifies what branches to include
1217in the branch record.
e3c9782b
VW
1218
1219The first part of the value is the privilege level, which
1220is a combination of one of the following values.
045bf4d3
VW
1221If the user does not set privilege level explicitly, the kernel
1222will use the event's privilege level.
1223Event and branch privilege levels do not have to match.
f2b1d720
MK
1224.RS
1225.TP
1226.B PERF_SAMPLE_BRANCH_USER
33d6e2c7 1227Branch target is in user space.
f2b1d720
MK
1228.TP
1229.B PERF_SAMPLE_BRANCH_KERNEL
33d6e2c7 1230Branch target is in kernel space.
f2b1d720
MK
1231.TP
1232.B PERF_SAMPLE_BRANCH_HV
33d6e2c7 1233Branch target is in hypervisor.
e3c9782b
VW
1234.TP
1235.B PERF_SAMPLE_BRANCH_PLM_ALL
1236A convenience value that is the three preceding values ORed together.
e3c9782b
VW
1237.P
1238In addition to the privilege value, at least one or more of the
1239following bits must be set.
f2b1d720
MK
1240.TP
1241.B PERF_SAMPLE_BRANCH_ANY
33d6e2c7 1242Any branch type.
f2b1d720
MK
1243.TP
1244.B PERF_SAMPLE_BRANCH_ANY_CALL
33d6e2c7 1245Any call branch.
f2b1d720
MK
1246.TP
1247.B PERF_SAMPLE_BRANCH_ANY_RETURN
33d6e2c7 1248Any return branch.
f2b1d720 1249.TP
e3c9782b 1250.B PERF_SAMPLE_BRANCH_IND_CALL
33d6e2c7 1251Indirect calls.
f2b1d720 1252.TP
aea60aad 1253.BR PERF_SAMPLE_BRANCH_COND " (since Linux 3.16)"
60dafbc1 1254.\" commit bac52139f0b7ab31330e98fd87fc5a2664951050
aea60aad
VW
1255Conditional branches.
1256.TP
31c1f2b0 1257.BR PERF_SAMPLE_BRANCH_ABORT_TX " (since Linux 3.11)"
60dafbc1 1258.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1259Transactional memory aborts.
e3c9782b 1260.TP
31c1f2b0 1261.BR PERF_SAMPLE_BRANCH_IN_TX " (since Linux 3.11)"
60dafbc1 1262.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1263Branch in transactional memory transaction.
e3c9782b 1264.TP
31c1f2b0 1265.BR PERF_SAMPLE_BRANCH_NO_TX " (since Linux 3.11)"
60dafbc1 1266.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
33d6e2c7 1267Branch not in transactional memory transaction.
bb7e6ff0
VW
1268.BR PERF_SAMPLE_BRANCH_CALL_STACK " (since Linux 4.1)"
1269.\" commit 2c44b1936bb3b135a3fac8b3493394d42e51cf70
95655a22 1270Branch is part of a hardware-generated call stack.
bb7e6ff0
VW
1271This requires hardware support, currently only found
1272on Intel x86 Haswell or newer.
f2b1d720 1273.RE
f2b1d720 1274.TP
31c1f2b0 1275.IR "sample_regs_user" " (since Linux 3.7)"
747a6e7c 1276.\" commit 4018994f3d8785275ef0e7391b75c3462c029e56
4651e412 1277This bit mask defines the set of user CPU registers to dump on samples.
76c637e1 1278The layout of the register mask is architecture-specific and
d1007d14
VW
1279described in the kernel header
1280.IR arch/ARCH/include/uapi/asm/perf_regs.h .
f2b1d720 1281.TP
31c1f2b0 1282.IR "sample_stack_user" " (since Linux 3.7)"
747a6e7c 1283.\" commit c5ebcedb566ef17bda7b02686e0d658a7bb42ee7
d1007d14
VW
1284This defines the size of the user stack to dump if
1285.B PERF_SAMPLE_STACK_USER
1286is specified.
6bd5186a
VW
1287.TP
1288.IR "clockid" " (since Linux 4.1)"
1289.\" commit 34f439278cef7b1177f8ce24f9fc81dfc6221d3b
1290If
1291.I use_clockid
1292is set, then this field selects which internal Linux timer to
1293use for timestamps.
1294The available timers are defined in
1295.IR linux/time.h ,
1296with
95655a22
MK
1297.BR CLOCK_MONOTONIC ,
1298.BR CLOCK_MONOTONIC_RAW ,
1299.BR CLOCK_REALTIME ,
1300.BR CLOCK_BOOTTIME ,
1301and
1302.B CLOCK_TAI
6bd5186a 1303currently supported.
cdc52f4a
VW
1304.TP
1305.IR "aux_watermark" " (since Linux 4.1)"
1306.\" commit 1a5941312414c71dece6717da9a0fa1303127afa
1307This specifies how much data is required to trigger a
1308.B PERF_RECORD_AUX
1309sample.
73d8cece 1310.SS Reading results
f2b1d720 1311Once a
7db515ef 1312.BR perf_event_open ()
3d1ee497 1313file descriptor has been opened, the values
f2b1d720
MK
1314of the events can be read from the file descriptor.
1315The values that are there are specified by the
1316.I read_format
7db515ef
MK
1317field in the
1318.I attr
1319structure at open time.
f2b1d720
MK
1320
1321If you attempt to read into a buffer that is not big enough to hold the
1322data
1323.B ENOSPC
1324is returned
1325
1326Here is the layout of the data returned by a read:
e525b89f 1327.IP * 2
f2b1d720
MK
1328If
1329.B PERF_FORMAT_GROUP
1330was specified to allow reading all events in a group at once:
1331
1332.in +4n
1333.nf
1334struct read_format {
e525b89f
MK
1335 u64 nr; /* The number of events */
1336 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1337 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
e307112d 1338 struct {
e525b89f
MK
1339 u64 value; /* The value of the event */
1340 u64 id; /* if PERF_FORMAT_ID */
f2b1d720
MK
1341 } values[nr];
1342};
1343.fi
1344.in
e525b89f 1345.IP *
f2b1d720
MK
1346If
1347.B PERF_FORMAT_GROUP
1348was
1349.I not
e525b89f 1350specified:
f2b1d720
MK
1351
1352.in +4n
1353.nf
1354struct read_format {
1355 u64 value; /* The value of the event */
1356 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1357 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
1358 u64 id; /* if PERF_FORMAT_ID */
1359};
1360.fi
1361.in
e525b89f
MK
1362.PP
1363The values read are as follows:
f2b1d720
MK
1364.TP
1365.I nr
1366The number of events in this file descriptor.
1367Only available if
1368.B PERF_FORMAT_GROUP
1369was specified.
f2b1d720
MK
1370.TP
1371.IR time_enabled ", " time_running
1372Total time the event was enabled and running.
1373Normally these are the same.
37bee118
MK
1374If more events are started,
1375then available counter slots on the PMU, then multiplexing
33a0ccb2 1376happens and events run only part of the time.
37bee118 1377In that case, the
f2b1d720
MK
1378.I time_enabled
1379and
1380.I time running
1381values can be used to scale an estimated value for the count.
f2b1d720
MK
1382.TP
1383.I value
1384An unsigned 64-bit value containing the counter result.
f2b1d720
MK
1385.TP
1386.I id
6170255e 1387A globally unique value for this particular event, only present if
f2b1d720 1388.B PERF_FORMAT_ID
e525b89f
MK
1389was specified in
1390.IR read_format .
73d8cece 1391.SS MMAP layout
f2b1d720 1392When using
7db515ef 1393.BR perf_event_open ()
f2b1d720
MK
1394in sampled mode, asynchronous events
1395(like counter overflow or
1396.B PROT_EXEC
1397mmap tracking)
1398are logged into a ring-buffer.
1399This ring-buffer is created and accessed through
1400.BR mmap (2).
1401
1402The mmap size should be 1+2^n pages, where the first page is a
1403metadata page
e525b89f 1404.RI ( "struct perf_event_mmap_page" )
f2b1d720
MK
1405that contains various
1406bits of information such as where the ring-buffer head is.
1407
95655a22 1408Before kernel 2.6.39, there is a bug that means you must allocate an mmap
f2b1d720
MK
1409ring buffer when sampling even if you do not plan to access it.
1410
1411The structure of the first metadata mmap page is as follows:
1412
1413.in +4n
1414.nf
1415struct perf_event_mmap_page {
ce88f77b
MK
1416 __u32 version; /* version number of this structure */
1417 __u32 compat_version; /* lowest version this is compat with */
1418 __u32 lock; /* seqlock for synchronization */
1419 __u32 index; /* hardware counter identifier */
1420 __s64 offset; /* add to hardware counter value */
1421 __u64 time_enabled; /* time event active */
1422 __u64 time_running; /* time event on CPU */
f2b1d720
MK
1423 union {
1424 __u64 capabilities;
135cba8b 1425 struct {
ce88f77b
MK
1426 __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1,
1427 cap_bit0_is_deprecated : 1,
1428 cap_user_rdpmc : 1,
1429 cap_user_time : 1,
1430 cap_user_time_zero : 1,
135cba8b 1431 };
f2b1d720 1432 };
ce88f77b
MK
1433 __u16 pmc_width;
1434 __u16 time_shift;
1435 __u32 time_mult;
1436 __u64 time_offset;
1437 __u64 __reserved[120]; /* Pad to 1k */
1438 __u64 data_head; /* head in the data section */
1439 __u64 data_tail; /* user-space written tail */
21d9849a
VW
1440 __u64 data_offset; /* where the buffer starts */
1441 __u64 data_size; /* data buffer size */
4e47c6e5
VW
1442 __u64 aux_head;
1443 __u64 aux_tail;
1444 __u64 aux_offset;
1445 __u64 aux_size;
21d9849a 1446
f2b1d720
MK
1447}
1448.fi
1449.in
1450
ce88f77b 1451The following list describes the fields in the
f2b1d720 1452.I perf_event_mmap_page
e525b89f 1453structure in more detail:
f2b1d720
MK
1454.TP
1455.I version
1456Version number of this structure.
f2b1d720
MK
1457.TP
1458.I compat_version
1459The lowest version this is compatible with.
f2b1d720
MK
1460.TP
1461.I lock
1462A seqlock for synchronization.
f2b1d720
MK
1463.TP
1464.I index
1465A unique hardware counter identifier.
f2b1d720
MK
1466.TP
1467.I offset
135cba8b
VW
1468When using rdpmc for reads this offset value
1469must be added to the one returned by rdpmc to get
1470the current total event count.
f2b1d720
MK
1471.TP
1472.I time_enabled
1473Time the event was active.
f2b1d720
MK
1474.TP
1475.I time_running
1476Time the event was running.
f2b1d720 1477.TP
31c1f2b0 1478.IR cap_usr_time " / " cap_usr_rdpmc " / " cap_bit0 " (since Linux 3.4)"
747a6e7c 1479.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
e9bd9b2c 1480There was a bug in the definition of
f2b1d720 1481.I cap_usr_time
135cba8b
VW
1482and
1483.I cap_usr_rdpmc
1484from Linux 3.4 until Linux 3.11.
1485Both bits were defined to point to the same location, so it was
e9bd9b2c 1486impossible to know if
135cba8b
VW
1487.I cap_usr_time
1488or
1489.I cap_usr_rdpmc
1490were actually set.
1491
4010bc07 1492Starting with Linux 3.12, these are renamed to
747a6e7c 1493.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b 1494.I cap_bit0
e41c36b2 1495and you should use the
135cba8b
VW
1496.I cap_user_time
1497and
1498.I cap_user_rdpmc
1499fields instead.
f2b1d720 1500.TP
31c1f2b0 1501.IR cap_bit0_is_deprecated " (since Linux 3.12)"
747a6e7c 1502.\" commit fa7315871046b9a4c48627905691dbde57e51033
37bee118 1503If set, this bit indicates that the kernel supports
135cba8b
VW
1504the properly separated
1505.I cap_user_time
1506and
1507.I cap_user_rdpmc
1508bits.
1509
1510If not-set, it indicates an older kernel where
1511.I cap_usr_time
1512and
f2b1d720 1513.I cap_usr_rdpmc
135cba8b
VW
1514map to the same bit and thus both features should
1515be used with caution.
135cba8b 1516.TP
31c1f2b0 1517.IR cap_user_rdpmc " (since Linux 3.12)"
747a6e7c 1518.\" commit fa7315871046b9a4c48627905691dbde57e51033
f2b1d720
MK
1519If the hardware supports user-space read of performance counters
1520without syscall (this is the "rdpmc" instruction on x86), then
1521the following code can be used to do a read:
1522
1523.in +4n
1524.nf
1525u32 seq, time_mult, time_shift, idx, width;
1526u64 count, enabled, running;
1527u64 cyc, time_offset;
f2b1d720
MK
1528
1529do {
1530 seq = pc\->lock;
1531 barrier();
1532 enabled = pc\->time_enabled;
1533 running = pc\->time_running;
1534
1535 if (pc\->cap_usr_time && enabled != running) {
1536 cyc = rdtsc();
1537 time_offset = pc\->time_offset;
1538 time_mult = pc\->time_mult;
1539 time_shift = pc\->time_shift;
1540 }
1541
1542 idx = pc\->index;
1543 count = pc\->offset;
1544
1545 if (pc\->cap_usr_rdpmc && idx) {
1546 width = pc\->pmc_width;
135cba8b 1547 count += rdpmc(idx \- 1);
f2b1d720
MK
1548 }
1549
1550 barrier();
1551} while (pc\->lock != seq);
1552.fi
1553.in
f2b1d720 1554.TP
cc19ea28 1555.IR cap_user_time " (since Linux 3.12)"
747a6e7c 1556.\" commit fa7315871046b9a4c48627905691dbde57e51033
7d182bb6 1557This bit indicates the hardware has a constant, nonstop
135cba8b
VW
1558timestamp counter (TSC on x86).
1559.TP
31c1f2b0 1560.IR cap_user_time_zero " (since Linux 3.12)"
747a6e7c 1561.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b
VW
1562Indicates the presence of
1563.I time_zero
1564which allows mapping timestamp values to
1565the hardware clock.
1566.TP
f2b1d720
MK
1567.I pmc_width
1568If
1569.IR cap_usr_rdpmc ,
1570this field provides the bit-width of the value
1571read using the rdpmc or equivalent instruction.
1572This can be used to sign extend the result like:
1573
1574.in +4n
1575.nf
1576pmc <<= 64 \- pmc_width;
1577pmc >>= 64 \- pmc_width; // signed shift right
1578count += pmc;
1579.fi
1580.in
f2b1d720
MK
1581.TP
1582.IR time_shift ", " time_mult ", " time_offset
1583
1584If
1585.IR cap_usr_time ,
1586these fields can be used to compute the time
7db515ef 1587delta since time_enabled (in nanoseconds) using rdtsc or similar.
f2b1d720
MK
1588.nf
1589
1590 u64 quot, rem;
1591 u64 delta;
1592 quot = (cyc >> time_shift);
1593 rem = cyc & ((1 << time_shift) \- 1);
1594 delta = time_offset + quot * time_mult +
1595 ((rem * time_mult) >> time_shift);
1596.fi
1597
7db515ef
MK
1598Where
1599.IR time_offset ,
1600.IR time_mult ,
1601.IR time_shift ,
1602and
1603.IR cyc
1604are read in the
f2b1d720
MK
1605seqcount loop described above.
1606This delta can then be added to
1607enabled and possible running (if idx), improving the scaling:
1608.nf
1609
1610 enabled += delta;
1611 if (idx)
1612 running += delta;
1613 quot = count / running;
1614 rem = count % running;
1615 count = quot * enabled + (rem * enabled) / running;
1616.fi
f2b1d720 1617.TP
31c1f2b0 1618.IR time_zero " (since Linux 3.12)"
747a6e7c 1619.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b 1620
e9bd9b2c 1621If
135cba8b 1622.I cap_usr_time_zero
37bee118 1623is set, then the hardware clock (the TSC timestamp counter on x86)
135cba8b
VW
1624can be calculated from the
1625.IR time_zero ", " time_mult ", and " time_shift " values:"
ce88f77b 1626
135cba8b
VW
1627.nf
1628 time = timestamp - time_zero;
1629 quot = time / time_mult;
1630 rem = time % time_mult;
1631 cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
1632.fi
ce88f77b 1633
135cba8b 1634And vice versa:
ce88f77b 1635
135cba8b
VW
1636.nf
1637 quot = cyc >> time_shift;
1638 rem = cyc & ((1 << time_shift) - 1);
1639 timestamp = time_zero + quot * time_mult +
1640 ((rem * time_mult) >> time_shift);
1641.fi
1642.TP
f2b1d720
MK
1643.I data_head
1644This points to the head of the data section.
7db515ef
MK
1645The value continuously increases, it does not wrap.
1646The value needs to be manually wrapped by the size of the mmap buffer
f2b1d720
MK
1647before accessing the samples.
1648
ce88f77b
MK
1649On SMP-capable platforms, after reading the
1650.I data_head
1651value,
ad73a2cc 1652user space should issue an rmb().
f2b1d720 1653.TP
fecd584f 1654.I data_tail
f2b1d720
MK
1655When the mapping is
1656.BR PROT_WRITE ,
7db515ef
MK
1657the
1658.I data_tail
1659value should be written by user space to reflect the last read data.
31020de9 1660In this case, the kernel will not overwrite unread data.
21d9849a
VW
1661.TP
1662.IR data_offset " (since Linux 4.1)"
1663.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
1664Contains the offset of the location in the mmap buffer
1665where perf sample data begins.
1666.TP
1667.IR data_size " (since Linux 4.1)"
1668.\" commit e8c6deac69629c0cb97c3d3272f8631ef17f8f0f
1669Contains the size of the perf sample region within
1670the mmap buffer.
4e47c6e5
VW
1671.TP
1672.IR aux_head ", " aux_tail ", " aux_offset ", " aux_size " (since Linux 4.1)
1673.\" commit 45bfb2e50471abbbfd83d40d28c986078b0d24ff
95655a22
MK
1674The AUX region allows mmaping a separate sample buffer for
1675high-bandwidth data streams (separate from the main perf sample buffer).
1676An example of a high-bandwidth stream is instruction tracing support,
4e47c6e5
VW
1677as is found in newer Intel processors.
1678
1679To set up an AUX area, first
1680.I aux_offset
1681needs to be set with an offset greater than
1682.IR data_offset + data_size
1683and
1684.I aux_size
1685needs to be set to the desired buffer size.
1686The desired offset and size must be page aligned, and the size
1687must be a power of two.
1688These values are then passed to mmap in order to map the AUX buffer.
95655a22
MK
1689Pages in the AUX buffer are included as part of the
1690.BR RLIMIT_MEMLOCK
1691resource limit (see
1692.BR setrlimit (2)),
1693and also as part of the
4e47c6e5
VW
1694.I perf_event_mlock_kb
1695allowance.
1696
95655a22 1697By default, the AUX buffer will be truncated if it will not fit
b1355f6a
VW
1698in the available space in the ring buffer.
1699If the AUX buffer is mapped as a read only buffer, then it will
1700operate in ring buffer mode where old data will be overwritten
1701by new.
95655a22 1702In overwrite mode, it might not be possible to infer where the
b1355f6a
VW
1703new data began, and it is the consumer's job to disable
1704measurement while reading to avoid possible data races.
1705
4e47c6e5
VW
1706The
1707.IR aux_head " and " aux_tail
1708ring buffer pointers have the same behavior and ordering
1709rules as the previous described
1710.IR data_head " and " data_tail .
e525b89f 1711.PP
f2b1d720
MK
1712The following 2^n ring-buffer pages have the layout described below.
1713
1714If
1715.I perf_event_attr.sample_id_all
1716is set, then all event types will
1717have the sample_type selected fields related to where/when (identity)
1718an event took place (TID, TIME, ID, CPU, STREAM_ID) described in
1719.B PERF_RECORD_SAMPLE
1720below, it will be stashed just after the
7db515ef
MK
1721.I perf_event_header
1722and the fields already present for the existing
3d1ee497 1723fields, that is, at the end of the payload.
f2b1d720
MK
1724That way a newer perf.data
1725file will be supported by older perf tools, with these new optional
1726fields being ignored.
1727
1728The mmap values start with a header:
1729
1730.in +4n
1731.nf
1732struct perf_event_header {
1733 __u32 type;
1734 __u16 misc;
1735 __u16 size;
1736};
1737.fi
1738.in
1739
1740Below, we describe the
1741.I perf_event_header
1742fields in more detail.
4047bc6c
MK
1743For ease of reading,
1744the fields with shorter descriptions are presented first.
1745.TP
1746.I size
1747This indicates the size of the record.
1748.TP
1749.I misc
1750The
1751.I misc
1752field contains additional information about the sample.
1753
1754The CPU mode can be determined from this value by masking with
1755.B PERF_RECORD_MISC_CPUMODE_MASK
1756and looking for one of the following (note these are not
1757bit masks, only one can be set at a time):
1758.RS
1759.TP
1760.B PERF_RECORD_MISC_CPUMODE_UNKNOWN
1761Unknown CPU mode.
1762.TP
1763.B PERF_RECORD_MISC_KERNEL
1764Sample happened in the kernel.
1765.TP
1766.B PERF_RECORD_MISC_USER
1767Sample happened in user code.
1768.TP
1769.B PERF_RECORD_MISC_HYPERVISOR
1770Sample happened in the hypervisor.
1771.TP
747a6e7c 1772.BR PERF_RECORD_MISC_GUEST_KERNEL " (since Linux 2.6.35)"
60dafbc1 1773.\" commit 39447b386c846bbf1c56f6403c5282837486200f
4047bc6c
MK
1774Sample happened in the guest kernel.
1775.TP
747a6e7c 1776.B PERF_RECORD_MISC_GUEST_USER " (since Linux 2.6.35)"
60dafbc1 1777.\" commit 39447b386c846bbf1c56f6403c5282837486200f
4047bc6c
MK
1778Sample happened in guest user code.
1779.RE
1780
1781.RS
1782In addition, one of the following bits can be set:
1783.TP
60dafbc1
MK
1784.BR PERF_RECORD_MISC_MMAP_DATA " (since Linux 3.10)"
1785.\" commit 2fe85427e3bf65d791700d065132772fc26e4d75
4047bc6c
MK
1786This is set when the mapping is not executable;
1787otherwise the mapping is executable.
1788.TP
60dafbc1
MK
1789.BR PERF_RECORD_MISC_COMM_EXEC " (since Linux 3.16)"
1790.\" commit 82b897782d10fcc4930c9d4a15b175348fdd2871
49bc411c
VW
1791This is set for a
1792.B PERF_RECORD_COMM
1793record on kernels more recent than Linux 3.16
1794if a process name change was caused by an
1795.BR exec (2)
1796system call.
1797It is an alias for
1798.B PERF_RECORD_MISC_MMAP_DATA
1799since the two values would not be set in the same record.
1800.TP
4047bc6c
MK
1801.B PERF_RECORD_MISC_EXACT_IP
1802This indicates that the content of
1803.B PERF_SAMPLE_IP
1804points
1805to the actual instruction that triggered the event.
1806See also
1807.IR perf_event_attr.precise_ip .
1808.TP
60dafbc1
MK
1809.BR PERF_RECORD_MISC_EXT_RESERVED " (since Linux 2.6.35)"
1810.\" commit 1676b8a077c352085d52578fb4f29350b58b6e74
4047bc6c
MK
1811This indicates there is extended data available (currently not used).
1812.RE
f2b1d720
MK
1813.TP
1814.I type
1815The
1816.I type
1817value is one of the below.
1818The values in the corresponding record (that follows the header)
1819depend on the
1820.I type
1821selected as shown.
f2b1d720 1822.RS
7db515ef 1823.TP 4
f2b1d720
MK
1824.B PERF_RECORD_MMAP
1825The MMAP events record the
1826.B PROT_EXEC
1827mappings so that we can correlate
ad73a2cc 1828user-space IPs to code.
f2b1d720
MK
1829They have the following structure:
1830
1831.in +4n
1832.nf
1833struct {
1834 struct perf_event_header header;
1835 u32 pid, tid;
1836 u64 addr;
1837 u64 len;
1838 u64 pgoff;
1839 char filename[];
1840};
1841.fi
1842.in
9bfc542b
VW
1843.RS
1844.TP
1845.I pid
3a058284 1846is the process ID.
9bfc542b
VW
1847.TP
1848.I tid
3a058284 1849is the thread ID.
9bfc542b
VW
1850.TP
1851.I addr
1852is the address of the allocated memory.
1853.I len
1854is the length of the allocated memory.
1855.I pgoff
1856is the page offset of the allocated memory.
1857.I filename
1858is a string describing the backing of the allocated memory.
1859.RE
f2b1d720
MK
1860.TP
1861.B PERF_RECORD_LOST
1862This record indicates when events are lost.
1863
1864.in +4n
1865.nf
1866struct {
1867 struct perf_event_header header;
1868 u64 id;
1869 u64 lost;
7480dabb 1870 struct sample_id sample_id;
f2b1d720
MK
1871};
1872.fi
1873.in
f2b1d720
MK
1874.RS
1875.TP
1876.I id
1877is the unique event ID for the samples that were lost.
1878.TP
1879.I lost
1880is the number of events that were lost.
1881.RE
f2b1d720
MK
1882.TP
1883.B PERF_RECORD_COMM
1884This record indicates a change in the process name.
1885
1886.in +4n
1887.nf
1888struct {
1889 struct perf_event_header header;
5ab35ae5
MK
1890 u32 pid;
1891 u32 tid;
f2b1d720 1892 char comm[];
7480dabb 1893 struct sample_id sample_id;
f2b1d720
MK
1894};
1895.fi
1896.in
49bc411c
VW
1897.RS
1898.TP
1899.I pid
5ab35ae5 1900is the process ID.
49bc411c
VW
1901.TP
1902.I tid
5ab35ae5 1903is the thread ID.
49bc411c
VW
1904.TP
1905.I comm
1906is a string containing the new name of the process.
1907.RE
f2b1d720
MK
1908.TP
1909.B PERF_RECORD_EXIT
1910This record indicates a process exit event.
1911
1912.in +4n
1913.nf
1914struct {
1915 struct perf_event_header header;
1916 u32 pid, ppid;
1917 u32 tid, ptid;
1918 u64 time;
7480dabb 1919 struct sample_id sample_id;
f2b1d720
MK
1920};
1921.fi
1922.in
f2b1d720
MK
1923.TP
1924.BR PERF_RECORD_THROTTLE ", " PERF_RECORD_UNTHROTTLE
1925This record indicates a throttle/unthrottle event.
1926
1927.in +4n
1928.nf
1929struct {
1930 struct perf_event_header header;
1931 u64 time;
1932 u64 id;
1933 u64 stream_id;
7480dabb 1934 struct sample_id sample_id;
f2b1d720
MK
1935};
1936.fi
1937.in
f2b1d720
MK
1938.TP
1939.B PERF_RECORD_FORK
1940This record indicates a fork event.
1941
1942.in +4n
1943.nf
1944struct {
1945 struct perf_event_header header;
1946 u32 pid, ppid;
1947 u32 tid, ptid;
1948 u64 time;
7480dabb 1949 struct sample_id sample_id;
f2b1d720
MK
1950};
1951.fi
1952.in
f2b1d720
MK
1953.TP
1954.B PERF_RECORD_READ
1955This record indicates a read event.
1956
1957.in +4n
1958.nf
1959struct {
1960 struct perf_event_header header;
1961 u32 pid, tid;
1962 struct read_format values;
7480dabb 1963 struct sample_id sample_id;
f2b1d720
MK
1964};
1965.fi
1966.in
f2b1d720
MK
1967.TP
1968.B PERF_RECORD_SAMPLE
1969This record indicates a sample.
1970
1971.in +4n
1972.nf
1973struct {
1974 struct perf_event_header header;
7480dabb 1975 u64 sample_id; /* if PERF_SAMPLE_IDENTIFIER */
7db515ef
MK
1976 u64 ip; /* if PERF_SAMPLE_IP */
1977 u32 pid, tid; /* if PERF_SAMPLE_TID */
1978 u64 time; /* if PERF_SAMPLE_TIME */
1979 u64 addr; /* if PERF_SAMPLE_ADDR */
1980 u64 id; /* if PERF_SAMPLE_ID */
1981 u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
1982 u32 cpu, res; /* if PERF_SAMPLE_CPU */
1983 u64 period; /* if PERF_SAMPLE_PERIOD */
f2b1d720 1984 struct read_format v; /* if PERF_SAMPLE_READ */
7db515ef
MK
1985 u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
1986 u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
1987 u32 size; /* if PERF_SAMPLE_RAW */
1988 char data[size]; /* if PERF_SAMPLE_RAW */
1989 u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
1990 struct perf_branch_entry lbr[bnr];
1991 /* if PERF_SAMPLE_BRANCH_STACK */
1992 u64 abi; /* if PERF_SAMPLE_REGS_USER */
1993 u64 regs[weight(mask)];
1994 /* if PERF_SAMPLE_REGS_USER */
1995 u64 size; /* if PERF_SAMPLE_STACK_USER */
1996 char data[size]; /* if PERF_SAMPLE_STACK_USER */
4dc411dd 1997 u64 dyn_size; /* if PERF_SAMPLE_STACK_USER && size != 0 */
d1007d14
VW
1998 u64 weight; /* if PERF_SAMPLE_WEIGHT */
1999 u64 data_src; /* if PERF_SAMPLE_DATA_SRC */
1e043959 2000 u64 transaction;/* if PERF_SAMPLE_TRANSACTION */
f5281dfd
VW
2001 u64 abi; /* if PERF_SAMPLE_REGS_INTR */
2002 u64 regs[weight(mask)];
2003 /* if PERF_SAMPLE_REGS_INTR */
f2b1d720
MK
2004};
2005.fi
4047bc6c
MK
2006.RS 4
2007.TP 4
7480dabb
VW
2008.I sample_id
2009If
2010.B PERF_SAMPLE_IDENTIFIER
2011is enabled, a 64-bit unique ID is included.
e9bd9b2c 2012This is a duplication of the
7480dabb
VW
2013.B PERF_SAMPLE_ID
2014.I id
2015value, but included at the beginning of the sample
2016so parsers can easily obtain the value.
2017.TP
f2b1d720 2018.I ip
7db515ef
MK
2019If
2020.B PERF_SAMPLE_IP
2021is enabled, then a 64-bit instruction
f2b1d720 2022pointer value is included.
f2b1d720 2023.TP
7db515ef
MK
2024.IR pid ", " tid
2025If
2026.B PERF_SAMPLE_TID
2027is enabled, then a 32-bit process ID
2028and 32-bit thread ID are included.
f2b1d720
MK
2029.TP
2030.I time
7db515ef
MK
2031If
2032.B PERF_SAMPLE_TIME
2033is enabled, then a 64-bit timestamp
f2b1d720
MK
2034is included.
2035This is obtained via local_clock() which is a hardware timestamp
2036if available and the jiffies value if not.
f2b1d720
MK
2037.TP
2038.I addr
7db515ef
MK
2039If
2040.B PERF_SAMPLE_ADDR
2041is enabled, then a 64-bit address is included.
f2b1d720
MK
2042This is usually the address of a tracepoint,
2043breakpoint, or software event; otherwise the value is 0.
f2b1d720
MK
2044.TP
2045.I id
7db515ef
MK
2046If
2047.B PERF_SAMPLE_ID
2048is enabled, a 64-bit unique ID is included.
f2b1d720 2049If the event is a member of an event group, the group leader ID is returned.
7db515ef
MK
2050This ID is the same as the one returned by
2051.BR PERF_FORMAT_ID .
f2b1d720
MK
2052.TP
2053.I stream_id
7db515ef
MK
2054If
2055.B PERF_SAMPLE_STREAM_ID
2056is enabled, a 64-bit unique ID is included.
f2b1d720
MK
2057Unlike
2058.B PERF_SAMPLE_ID
2059the actual ID is returned, not the group leader.
7db515ef
MK
2060This ID is the same as the one returned by
2061.BR PERF_FORMAT_ID .
f2b1d720 2062.TP
7db515ef
MK
2063.IR cpu ", " res
2064If
2065.B PERF_SAMPLE_CPU
2066is enabled, this is a 32-bit value indicating
f2b1d720
MK
2067which CPU was being used, in addition to a reserved (unused)
206832-bit value.
f2b1d720
MK
2069.TP
2070.I period
7db515ef
MK
2071If
2072.B PERF_SAMPLE_PERIOD
2073is enabled, a 64-bit value indicating
f2b1d720 2074the current sampling period is written.
f2b1d720
MK
2075.TP
2076.I v
7db515ef
MK
2077If
2078.B PERF_SAMPLE_READ
2079is enabled, a structure of type read_format
f2b1d720
MK
2080is included which has values for all events in the event group.
2081The values included depend on the
2082.I read_format
7db515ef
MK
2083value used at
2084.BR perf_event_open ()
2085time.
f2b1d720 2086.TP
7db515ef
MK
2087.IR nr ", " ips[nr]
2088If
2089.B PERF_SAMPLE_CALLCHAIN
2090is enabled, then a 64-bit number is included
f2b1d720 2091which indicates how many following 64-bit instruction pointers will
7db515ef
MK
2092follow.
2093This is the current callchain.
f2b1d720 2094.TP
7ede2f66 2095.IR size ", " data[size]
7db515ef
MK
2096If
2097.B PERF_SAMPLE_RAW
2098is enabled, then a 32-bit value indicating size
f2b1d720
MK
2099is included followed by an array of 8-bit values of length size.
2100The values are padded with 0 to have 64-bit alignment.
2101
2102This RAW record data is opaque with respect to the ABI.
2103The ABI doesn't make any promises with respect to the stability
2104of its content, it may vary depending
2105on event, hardware, and kernel version.
f2b1d720 2106.TP
7db515ef
MK
2107.IR bnr ", " lbr[bnr]
2108If
2109.B PERF_SAMPLE_BRANCH_STACK
2110is enabled, then a 64-bit value indicating
2111the number of records is included, followed by
2112.I bnr
2113.I perf_branch_entry
045bf4d3
VW
2114structures which each include the fields:
2115.RS
2116.TP
2117.I from
2b538c3e 2118This indicates the source instruction (may not be a branch).
045bf4d3
VW
2119.TP
2120.I to
2b538c3e 2121The branch target.
045bf4d3
VW
2122.TP
2123.I mispred
2b538c3e 2124The branch target was mispredicted.
045bf4d3
VW
2125.TP
2126.I predicted
2b538c3e 2127The branch target was predicted.
e3c9782b 2128.TP
31c1f2b0 2129.IR in_tx " (since Linux 3.11)"
747a6e7c 2130.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
2b538c3e 2131The branch was in a transactional memory transaction.
e3c9782b 2132.TP
31c1f2b0 2133.IR abort " (since Linux 3.11)"
747a6e7c 2134.\" commit 135c5612c460f89657c4698fe2ea753f6f667963
2b538c3e 2135The branch was in an aborted transactional memory transaction.
e3c9782b 2136.P
045bf4d3
VW
2137The entries are from most to least recent, so the first entry
2138has the most recent branch.
2139
8a94e783
MK
2140Support for
2141.I mispred
2142and
2143.I predicted
baf7029b 2144is optional; if not supported, both
045bf4d3
VW
2145values will be 0.
2146
e3c9782b
VW
2147The type of branches recorded is specified by the
2148.I branch_sample_type
2149field.
2150.RE
f2b1d720 2151.TP
7db515ef
MK
2152.IR abi ", " regs[weight(mask)]
2153If
2154.B PERF_SAMPLE_REGS_USER
d1007d14 2155is enabled, then the user CPU registers are recorded.
f2b1d720
MK
2156
2157The
2158.I abi
2159field is one of
2160.BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or "
7db515ef 2161.BR PERF_SAMPLE_REGS_ABI_64 .
d1007d14
VW
2162
2163The
2164.I regs
2165field is an array of the CPU registers that were specified by
2166the
2167.I sample_regs_user
2168attr field.
2169The number of values is the number of bits set in the
51700fd7 2170.I sample_regs_user
4651e412 2171bit mask.
f2b1d720 2172.TP
7db515ef
MK
2173.IR size ", " data[size] ", " dyn_size
2174If
2175.B PERF_SAMPLE_STACK_USER
02ca78a0
VW
2176is enabled, then the user stack is recorded.
2177This can be used to generate stack backtraces.
d1007d14
VW
2178.I size
2179is the size requested by the user in
02ca78a0 2180.I sample_stack_user
d1007d14
VW
2181or else the maximum record size.
2182.I data
02ca78a0
VW
2183is the stack data (a raw dump of the memory pointed to by the
2184stack pointer at the time of sampling).
d1007d14
VW
2185.I dyn_size
2186is the amount of data actually dumped (can be less than
460e3d7a 2187.IR size ).
4dc411dd
KF
2188Note that
2189.I dyn_size
2190is omitted if
2191.I size
2192is 0.
d1007d14 2193.TP
51700fd7 2194.I weight
d1007d14
VW
2195If
2196.B PERF_SAMPLE_WEIGHT
7de4a1e3 2197is enabled, then a 64-bit value provided by the hardware
d1007d14
VW
2198is recorded that indicates how costly the event was.
2199This allows expensive events to stand out more clearly
2200in profiles.
2201.TP
2202.I data_src
51700fd7 2203If
d1007d14 2204.B PERF_SAMPLE_DATA_SRC
7de4a1e3 2205is enabled, then a 64-bit value is recorded that is made up of
d1007d14
VW
2206the following fields:
2207.RS
2b538c3e 2208.TP 4
d1007d14 2209.I mem_op
2b538c3e
MK
2210Type of opcode, a bitwise combination of:
2211
2212.PD 0
2213.RS
2214.TP 24
d1007d14 2215.B PERF_MEM_OP_NA
2b538c3e
MK
2216Not available
2217.TP
d1007d14 2218.B PERF_MEM_OP_LOAD
2b538c3e
MK
2219Load instruction
2220.TP
d1007d14 2221.B PERF_MEM_OP_STORE
2b538c3e
MK
2222Store instruction
2223.TP
d1007d14 2224.B PERF_MEM_OP_PFETCH
2b538c3e
MK
2225Prefetch
2226.TP
d1007d14 2227.B PERF_MEM_OP_EXEC
2b538c3e
MK
2228Executable code
2229.RE
2230.PD
d1007d14
VW
2231.TP
2232.I mem_lvl
bc9d90b5 2233Memory hierarchy level hit or miss, a bitwise combination of
ef4f4031 2234the following, shifted left by
bc9d90b5 2235.BR PERF_MEM_LVL_SHIFT :
2b538c3e
MK
2236
2237.PD 0
2238.RS
2239.TP 24
d1007d14 2240.B PERF_MEM_LVL_NA
2b538c3e
MK
2241Not available
2242.TP
d1007d14 2243.B PERF_MEM_LVL_HIT
2b538c3e
MK
2244Hit
2245.TP
d1007d14 2246.B PERF_MEM_LVL_MISS
2b538c3e
MK
2247Miss
2248.TP
d1007d14 2249.B PERF_MEM_LVL_L1
2b538c3e
MK
2250Level 1 cache
2251.TP
d1007d14 2252.B PERF_MEM_LVL_LFB
2b538c3e
MK
2253Line fill buffer
2254.TP
d1007d14 2255.B PERF_MEM_LVL_L2
2b538c3e
MK
2256Level 2 cache
2257.TP
d1007d14 2258.B PERF_MEM_LVL_L3
2b538c3e
MK
2259Level 3 cache
2260.TP
d1007d14 2261.B PERF_MEM_LVL_LOC_RAM
2b538c3e
MK
2262Local DRAM
2263.TP
d1007d14 2264.B PERF_MEM_LVL_REM_RAM1
2b538c3e
MK
2265Remote DRAM 1 hop
2266.TP
d1007d14 2267.B PERF_MEM_LVL_REM_RAM2
2b538c3e
MK
2268Remote DRAM 2 hops
2269.TP
d1007d14 2270.B PERF_MEM_LVL_REM_CCE1
2b538c3e
MK
2271Remote cache 1 hop
2272.TP
d1007d14 2273.B PERF_MEM_LVL_REM_CCE2
2b538c3e
MK
2274Remote cache 2 hops
2275.TP
d1007d14 2276.B PERF_MEM_LVL_IO
2b538c3e
MK
2277I/O memory
2278.TP
d1007d14 2279.B PERF_MEM_LVL_UNC
2b538c3e
MK
2280Uncached memory
2281.RE
2282.PD
d1007d14
VW
2283.TP
2284.I mem_snoop
bc9d90b5
VW
2285Snoop mode, a bitwise combination of the following, shifted left by
2286.BR PERF_MEM_SNOOP_SHIFT :
2b538c3e
MK
2287
2288.PD 0
2289.RS
2290.TP 24
d1007d14 2291.B PERF_MEM_SNOOP_NA
2b538c3e
MK
2292Not available
2293.TP
d1007d14 2294.B PERF_MEM_SNOOP_NONE
2b538c3e
MK
2295No snoop
2296.TP
d1007d14 2297.B PERF_MEM_SNOOP_HIT
2b538c3e
MK
2298Snoop hit
2299.TP
d1007d14 2300.B PERF_MEM_SNOOP_MISS
2b538c3e
MK
2301Snoop miss
2302.TP
d1007d14 2303.B PERF_MEM_SNOOP_HITM
2b538c3e
MK
2304Snoop hit modified
2305.RE
2306.PD
d1007d14
VW
2307.TP
2308.I mem_lock
bc9d90b5
VW
2309Lock instruction, a bitwise combination of the following, shifted left by
2310.BR PERF_MEM_LOCK_SHIFT :
2b538c3e
MK
2311
2312.PD 0
2313.RS
2314.TP 24
d1007d14 2315.B PERF_MEM_LOCK_NA
2b538c3e
MK
2316Not available
2317.TP
d1007d14 2318.B PERF_MEM_LOCK_LOCKED
2b538c3e
MK
2319Locked transaction
2320.RE
2321.PD
d1007d14
VW
2322.TP
2323.I mem_dtlb
bc9d90b5
VW
2324TLB access hit or miss, a bitwise combination of the following, shifted
2325left by
2326.BR PERF_MEM_TLB_SHIFT :
2b538c3e
MK
2327
2328.PD 0
2329.RS
2330.TP 24
d1007d14 2331.B PERF_MEM_TLB_NA
2b538c3e
MK
2332Not available
2333.TP
d1007d14 2334.B PERF_MEM_TLB_HIT
2b538c3e
MK
2335Hit
2336.TP
d1007d14 2337.B PERF_MEM_TLB_MISS
2b538c3e
MK
2338Miss
2339.TP
d1007d14 2340.B PERF_MEM_TLB_L1
2b538c3e
MK
2341Level 1 TLB
2342.TP
d1007d14 2343.B PERF_MEM_TLB_L2
2b538c3e
MK
2344Level 2 TLB
2345.TP
d1007d14 2346.B PERF_MEM_TLB_WK
2b538c3e
MK
2347Hardware walker
2348.TP
d1007d14 2349.B PERF_MEM_TLB_OS
2b538c3e
MK
2350OS fault handler
2351.RE
2352.PD
d1007d14 2353.RE
1e043959
VW
2354.TP
2355.I transaction
2356If the
2357.B PERF_SAMPLE_TRANSACTION
37bee118 2358flag is set, then a 64-bit field is recorded describing
1e043959
VW
2359the sources of any transactional memory aborts.
2360
2361The field is a bitwise combination of the following values:
2362.RS
2363.TP
2364.B PERF_TXN_ELISION
b3f39642 2365Abort from an elision type transaction (Intel-CPU-specific).
1e043959
VW
2366.TP
2367.B PERF_TXN_TRANSACTION
b3f39642 2368Abort from a generic transaction.
1e043959
VW
2369.TP
2370.B PERF_TXN_SYNC
b3f39642 2371Synchronous abort (related to the reported instruction).
1e043959
VW
2372.TP
2373.B PERF_TXN_ASYNC
b3f39642 2374Asynchronous abort (not related to the reported instruction).
1e043959
VW
2375.TP
2376.B PERF_TXN_RETRY
053a3e08 2377Retryable abort (retrying the transaction may have succeeded).
1e043959
VW
2378.TP
2379.B PERF_TXN_CONFLICT
b3f39642 2380Abort due to memory conflicts with other threads.
1e043959
VW
2381.TP
2382.B PERF_TXN_CAPACITY_WRITE
b3f39642 2383Abort due to write capacity overflow.
1e043959
VW
2384.TP
2385.B PERF_TXN_CAPACITY_READ
b3f39642 2386Abort due to read capacity overflow.
1e043959 2387.RE
b3f39642
MK
2388.IP
2389In addition, a user-specified abort code can be obtained from
2390the high 32 bits of the field by shifting right by
1e043959
VW
2391.B PERF_TXN_ABORT_SHIFT
2392and masking with
2393.BR PERF_TXN_ABORT_MASK .
f5281dfd
VW
2394.TP
2395.IR abi ", " regs[weight(mask)]
2396If
2397.B PERF_SAMPLE_REGS_INTR
2398is enabled, then the user CPU registers are recorded.
2399
2400The
2401.I abi
2402field is one of
2403.BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or "
2404.BR PERF_SAMPLE_REGS_ABI_64 .
2405
2406The
2407.I regs
2408field is an array of the CPU registers that were specified by
2409the
2410.I sample_regs_intr
2411attr field.
2412The number of values is the number of bits set in the
2413.I sample_regs_intr
2414bit mask.
f2b1d720 2415.RE
9bfc542b
VW
2416.TP
2417.B PERF_RECORD_MMAP2
2418This record includes extended information on
2419.BR mmap (2)
2420calls returning executable mappings.
2421The format is similar to that of the
2422.B PERF_RECORD_MMAP
3a058284 2423record, but includes extra values that allow uniquely identifying
9bfc542b 2424shared mappings.
3a058284 2425
9bfc542b
VW
2426.in +4n
2427.nf
2428struct {
2429 struct perf_event_header header;
3a058284
MK
2430 u32 pid;
2431 u32 tid;
9bfc542b
VW
2432 u64 addr;
2433 u64 len;
2434 u64 pgoff;
2435 u32 maj;
2436 u32 min;
2437 u64 ino;
2438 u64 ino_generation;
3a058284
MK
2439 u32 prot;
2440 u32 flags;
9bfc542b
VW
2441 char filename[];
2442 struct sample_id sample_id;
2443};
2444.fi
2445.RS
2446.TP
2447.I pid
3a058284 2448is the process ID.
9bfc542b
VW
2449.TP
2450.I tid
3a058284 2451is the thread ID.
9bfc542b
VW
2452.TP
2453.I addr
2454is the address of the allocated memory.
2455.TP
2456.I len
2457is the length of the allocated memory.
2458.TP
2459.I pgoff
2460is the page offset of the allocated memory.
2461.TP
2462.I maj
3a058284 2463is the major ID of the underlying device.
9bfc542b
VW
2464.TP
2465.I min
3a058284 2466is the minor ID of the underlying device.
9bfc542b
VW
2467.TP
2468.I ino
3a058284 2469is the inode number.
9bfc542b
VW
2470.TP
2471.I ino_generation
2472is the inode generation.
2473.TP
2474.I prot
2475is the protection information.
2476.TP
2477.I flags
2478is the flags information.
2479.TP
2480.I filename
2481is a string describing the backing of the allocated memory.
2482.RE
1fda209c
VW
2483.TP
2484.BR PERF_RECORD_AUX " (since Linux 4.1)"
2485\" commit 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0
2486This record reports that new data is available in the separate
2487AUX buffer region.
2488
2489.in +4n
2490.nf
2491struct {
2492 struct perf_event_header header;
2493 u64 aux_offset;
2494 u64 aux_size;
2495 u64 flags;
2496 struct sample_id sample_id;
2497};
2498.fi
2499.RS
2500.TP
2501.I aux_offset
2502offset in the AUX mmap region where the new data begins.
2503.TP
2504.I aux_size
2505size of the data made available.
2506.TP
2507.I flags
95655a22 2508describes the AUX update.
1fda209c
VW
2509.RS
2510.TP
2511.B PERF_AUX_FLAG_TRUNCATED
95655a22 2512if set, then the data returned was truncated to fit the available
1fda209c 2513buffer size.
b1355f6a
VW
2514.TP
2515.B PERF_AUX_FLAG_OVERWRITE
2516.\" commit 2023a0d2829e521fe6ad6b9907f3f90bfbf57142
95655a22 2517if set, then the data returned has overwritten previous data.
1fda209c
VW
2518.RE
2519.RE
6932aac3
VW
2520.TP
2521.BR PERF_RECORD_ITRACE_START " (since Linux 4.1)"
2522\" ec0d7729bbaed4b9d2d3fada693278e13a3d1368
2523This record indicates which process has initiated an instruction
2524trace event, allowing tools to properly correlate the instruction
2525addresses in the AUX buffer with the proper executable.
2526
2527.in +4n
2528.nf
2529struct {
2530 struct perf_event_header header;
2531 u32 pid;
2532 u32 tid;
2533};
2534.fi
2535.RS
2536.TP
2537.I pid
95655a22 2538process ID of the thread starting an instruction trace.
6932aac3
VW
2539.TP
2540.I tid
95655a22 2541thread ID of the thread starting an instruction trace.
6932aac3 2542.RE
f2b1d720 2543.RE
21977c9d
VW
2544.SS Overflow handling
2545Events can be set to notify when a threshold is crossed,
2546indicating an overflow.
2547Overflow conditions can be captured by monitoring the
2548event file descriptor with
f2b1d720
MK
2549.BR poll (2),
2550.BR select (2),
21977c9d
VW
2551or
2552.BR epoll (2).
2553Alternately, a SIGIO signal handler can be created and
2554the event configured with
2555.BR fcntl (2)
2556to generate SIGIO signals.
f2b1d720 2557
6170255e 2558Overflows are generated only by sampling events
f2b1d720 2559.RI ( sample_period
7d182bb6 2560must have a nonzero value).
f2b1d720 2561
21977c9d 2562There are two ways to generate overflow notifications.
f2b1d720
MK
2563
2564The first is to set a
2565.I wakeup_events
2566or
2567.I wakeup_watermark
21977c9d 2568value that will trigger if a certain number of samples
f2b1d720 2569or bytes have been written to the mmap ring buffer.
21977c9d 2570In this case
7db515ef 2571.B POLL_IN
21977c9d 2572is indicated.
f2b1d720
MK
2573
2574The other way is by use of the
7db515ef 2575.B PERF_EVENT_IOC_REFRESH
f2b1d720
MK
2576ioctl.
2577This ioctl adds to a counter that decrements each time the event overflows.
21977c9d 2578When nonzero,
7db515ef 2579.B POLL_IN
21977c9d
VW
2580is indicated, but
2581once the counter reaches 0
7db515ef 2582.B POLL_HUP
21977c9d 2583is indicated and
f2b1d720
MK
2584the underlying event is disabled.
2585
50e4319c
VW
2586Refreshing an event group leader refreshes all siblings and
2587refreshing with a parameter of 0 currently enables infinite
2588refreshes;
2589these behaviors are unsupported and should not be relied on.
2590.\" See https://lkml.org/lkml/2011/5/24/337
2591
4010bc07 2592Starting with Linux 3.18,
747a6e7c 2593.\" commit 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883
21977c9d
VW
2594.B POLL_HUP
2595is indicated if the event being monitored is attached to a different
2596process and that process exits.
73d8cece 2597.SS rdpmc instruction
f2b1d720 2598Starting with Linux 3.4 on x86, you can use the
747a6e7c 2599.\" commit c7206205d00ab375839bd6c7ddb247d600693c09
f2b1d720
MK
2600.I rdpmc
2601instruction to get low-latency reads without having to enter the kernel.
2602Note that using
2603.I rdpmc
2604is not necessarily faster than other methods for reading event values.
2605
2606Support for this can be detected with the
2607.I cap_usr_rdpmc
2608field in the mmap page; documentation on how
2609to calculate event values can be found in that section.
562c69f6
VW
2610
2611Originally, when rdpmc support was enabled, any process (not just ones
2612with an active perf event) could use the rdpmc instruction to access
2613the counters.
2614Starting with Linux 4.0
2615.\" 7911d3f7af14a614617e38245fedf98a724e46a9
2616rdpmc support is only allowed if an event is currently enabled
95655a22 2617in a process's context.
562c69f6
VW
2618To restore the old behavior, write the value 2 to
2619.IR /sys/devices/cpu/rdpmc .
73d8cece 2620.SS perf_event ioctl calls
f2b1d720
MK
2621.PP
2622Various ioctls act on
7db515ef 2623.BR perf_event_open ()
ce88f77b 2624file descriptors:
f2b1d720
MK
2625.TP
2626.B PERF_EVENT_IOC_ENABLE
ce88f77b 2627This enables the individual event or event group specified by the
7db515ef 2628file descriptor argument.
f2b1d720 2629
51700fd7 2630If the
8cc8b90d 2631.B PERF_IOC_FLAG_GROUP
51700fd7 2632bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2633enabled, even if the event specified is not the group leader
2634(but see BUGS).
f2b1d720
MK
2635.TP
2636.B PERF_EVENT_IOC_DISABLE
ce88f77b 2637This disables the individual counter or event group specified by the
7db515ef 2638file descriptor argument.
f2b1d720
MK
2639
2640Enabling or disabling the leader of a group enables or disables the
2641entire group; that is, while the group leader is disabled, none of the
2642counters in the group will count.
33a0ccb2
MK
2643Enabling or disabling a member of a group other than the leader
2644affects only that counter; disabling a non-leader
f2b1d720
MK
2645stops that counter from counting but doesn't affect any other counter.
2646
51700fd7 2647If the
8cc8b90d 2648.B PERF_IOC_FLAG_GROUP
51700fd7 2649bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2650disabled, even if the event specified is not the group leader
2651(but see BUGS).
f2b1d720
MK
2652.TP
2653.B PERF_EVENT_IOC_REFRESH
2654Non-inherited overflow counters can use this
2655to enable a counter for a number of overflows specified by the argument,
2656after which it is disabled.
2657Subsequent calls of this ioctl add the argument value to the current
2658count.
21977c9d 2659An overflow notification with
7db515ef
MK
2660.B POLL_IN
2661set will happen on each overflow until the
21977c9d
VW
2662count reaches 0; when that happens a notification with
2663.B POLL_HUP
7db515ef 2664set is sent and the event is disabled.
f2b1d720 2665Using an argument of 0 is considered undefined behavior.
f2b1d720
MK
2666.TP
2667.B PERF_EVENT_IOC_RESET
36127c0e 2668Reset the event count specified by the
6061d29f 2669file descriptor argument to zero.
33a0ccb2 2670This resets only the counts; there is no way to reset the
f2b1d720
MK
2671multiplexing
2672.I time_enabled
2673or
2674.I time_running
2675values.
f2b1d720 2676
51700fd7 2677If the
8cc8b90d 2678.B PERF_IOC_FLAG_GROUP
51700fd7 2679bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2680reset, even if the event specified is not the group leader
2681(but see BUGS).
f2b1d720
MK
2682.TP
2683.B PERF_EVENT_IOC_PERIOD
e6cf5694 2684This updates the overflow period for the event.
3f118a29 2685
747a6e7c
VW
2686Since Linux 3.7 (on ARM)
2687.\" commit 3581fe0ef37ce12ac7a4f74831168352ae848edc
2688and Linux 3.14 (all other architectures),
2689.\" commit bad7192b842c83e580747ca57104dd51fe08c223
3f118a29 2690the new period takes effect immediately.
ed81fdd9 2691On older kernels, the new period did not take effect until
3f118a29 2692after the next overflow.
f2b1d720
MK
2693
2694The argument is a pointer to a 64-bit value containing the
2695desired new period.
e6cf5694 2696
747a6e7c
VW
2697Prior to Linux 2.6.36
2698.\" commit ad0cf3478de8677f720ee06393b3147819568d6a
2699this ioctl always failed due to a bug
e6cf5694 2700in the kernel.
f2b1d720
MK
2701.TP
2702.B PERF_EVENT_IOC_SET_OUTPUT
2703This tells the kernel to report event notifications to the specified
2704file descriptor rather than the default one.
2705The file descriptors must all be on the same CPU.
2706
2707The argument specifies the desired file descriptor, or \-1 if
2708output should be ignored.
f2b1d720 2709.TP
31c1f2b0 2710.BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
60dafbc1 2711.\" commit 6fb2915df7f0747d9044da9dbff5b46dc2e20830
f2b1d720
MK
2712This adds an ftrace filter to this event.
2713
2714The argument is a pointer to the desired ftrace filter.
a0dcc8dd 2715.TP
31c1f2b0 2716.BR PERF_EVENT_IOC_ID " (since Linux 3.12)"
60dafbc1 2717.\" commit cf4957f17f2a89984915ea808876d9c82225b862
bec6277e 2718This returns the event ID value for the given event file descriptor.
a0dcc8dd
VW
2719
2720The argument is a pointer to a 64-bit unsigned integer
2721to hold the result.
b0f7b411
VW
2722.TP
2723.BR PERF_EVENT_IOC_SET_BPF " (since Linux 4.1)"
2724.\" commit 2541517c32be2531e0da59dfd7efc1ce844644f5
2725This allows attaching a Berkeley Packet Filter (BPF)
2726program to an existing kprobe tracepoint event.
2727You need
2728.B CAP_SYS_ADMIN
2729privileges to use this ioctl.
2730
2731The argument is a BPF program file descriptor that was created by
2732a previous
2733.BR bpf (2)
2734system call.
73d8cece 2735.SS Using prctl
f2b1d720
MK
2736A process can enable or disable all the event groups that are
2737attached to it using the
2738.BR prctl (2)
2739.B PR_TASK_PERF_EVENTS_ENABLE
2740and
2741.B PR_TASK_PERF_EVENTS_DISABLE
2742operations.
ee7b0cbf 2743This applies to all counters on the calling process, whether created by
f2b1d720
MK
2744this process or by another, and does not affect any counters that this
2745process has created on other processes.
33a0ccb2 2746It enables or disables only
f2b1d720 2747the group leaders, not any other members in the groups.
f2b1d720 2748.SS perf_event related configuration files
7db515ef
MK
2749Files in
2750.I /proc/sys/kernel/
7db515ef 2751.RS 4
f2b1d720 2752.TP
7db515ef 2753.I /proc/sys/kernel/perf_event_paranoid
f2b1d720 2754
3eb95192 2755.\" default changed in commit 0161028b7c8aebef64194d3d73e43bc3b53b5c66
f2b1d720
MK
2756The
2757.I perf_event_paranoid
2758file can be set to restrict access to the performance counters.
2b538c3e
MK
2759.RS
2760.IP 2 4
3eb95192 2761allow only user-space measurements (default since Linux 4.6).
2b538c3e 2762.IP 1
3eb95192 2763allow both kernel and user measurements (default before Linux 4.6).
2b538c3e
MK
2764.IP 0
2765allow access to CPU-specific data but not raw tracepoint samples.
2766.IP \-1
2767no restrictions.
2768.RE
2769.IP
f2b1d720
MK
2770The existence of the
2771.I perf_event_paranoid
2772file is the official method for determining if a kernel supports
7db515ef 2773.BR perf_event_open ().
f2b1d720
MK
2774.TP
2775.I /proc/sys/kernel/perf_event_max_sample_rate
2776
7db515ef
MK
2777This sets the maximum sample rate.
2778Setting this too high can allow
f2b1d720 2779users to sample at a rate that impacts overall machine performance
7db515ef
MK
2780and potentially lock up the machine.
2781The default value is
f2b1d720 2782100000 (samples per second).
f2b1d720
MK
2783.TP
2784.I /proc/sys/kernel/perf_event_mlock_kb
2785
ce88f77b
MK
2786Maximum number of pages an unprivileged user can
2787.BR mlock (2).
f2b1d720 2788The default is 516 (kB).
e30dc77f 2789
f2b1d720 2790.RE
7db515ef
MK
2791Files in
2792.I /sys/bus/event_source/devices/
7db515ef 2793.RS 4
ce88f77b 2794Since Linux 2.6.34, the kernel supports having multiple PMUs
f2b1d720
MK
2795available for monitoring.
2796Information on how to program these PMUs can be found under
2797.IR /sys/bus/event_source/devices/ .
2798Each subdirectory corresponds to a different PMU.
f2b1d720 2799.TP
31c1f2b0 2800.IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)"
747a6e7c 2801.\" commit abe43400579d5de0078c2d3a760e6598e183f871
f2b1d720
MK
2802This contains an integer that can be used in the
2803.I type
ce88f77b
MK
2804field of
2805.I perf_event_attr
2806to indicate that you wish to use this PMU.
f2b1d720 2807.TP
562c69f6 2808.IR /sys/bus/event_source/devices/cpu/rdpmc " (since Linux 3.4)"
747a6e7c 2809.\" commit 0c9d42ed4cee2aa1dfc3a260b741baae8615744f
8a94e783 2810If this file is 1, then direct user-space access to the
e30dc77f
VW
2811performance counter registers is allowed via the rdpmc instruction.
2812This can be disabled by echoing 0 to the file.
562c69f6
VW
2813
2814As of Linux 4.0
2815.\" a66734297f78707ce39d756b656bfae861d53f62
2816.\" 7911d3f7af14a614617e38245fedf98a724e46a9
2817the behavior has changed, so that 1 now means only allow access
2818to processes with active perf events, with 2 indicating the old
2819allow-anyone-access behavior.
f2b1d720 2820.TP
31c1f2b0 2821.IR /sys/bus/event_source/devices/*/format/ " (since Linux 3.4)"
747a6e7c 2822.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
7d182bb6
MK
2823This subdirectory contains information on the architecture-specific
2824subfields available for programming the various
f2b1d720 2825.I config
ce88f77b
MK
2826fields in the
2827.I perf_event_attr
2828struct.
e30dc77f
VW
2829
2830The content of each file is the name of the config field, followed
2831by a colon, followed by a series of integer bit ranges separated by
2832commas.
8a94e783 2833For example, the file
e30dc77f
VW
2834.I event
2835may contain the value
2836.I config1:1,6-10,44
2837which indicates that event is an attribute that occupies bits 1,6-10, and 44
ce88f77b
MK
2838of
2839.IR perf_event_attr::config1 .
e30dc77f 2840.TP
31c1f2b0 2841.IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)"
747a6e7c 2842.\" commit 641cc938815dfd09f8fa1ec72deb814f0938ac33
7d182bb6 2843This subdirectory contains files with predefined events.
f2b1d720 2844The contents are strings describing the event settings
e30dc77f 2845expressed in terms of the fields found in the previously mentioned
f2b1d720
MK
2846.I ./format/
2847directory.
2848These are not necessarily complete lists of all events supported by
2849a PMU, but usually a subset of events deemed useful or interesting.
e30dc77f
VW
2850
2851The content of each file is a list of attribute names
8a94e783
MK
2852separated by commas.
2853Each entry has an optional value (either hex or decimal).
37bee118 2854If no value is specified, then it is assumed to be a single-bit
e30dc77f
VW
2855field with a value of 1.
2856An example entry may look like this:
699893d8 2857.IR event=0x2,inv,ldlat=3 .
f2b1d720
MK
2858.TP
2859.I /sys/bus/event_source/devices/*/uevent
e30dc77f
VW
2860This file is the standard kernel device interface
2861for injecting hotplug events.
2862.TP
31c1f2b0 2863.IR /sys/bus/event_source/devices/*/cpumask " (since Linux 3.7)"
747a6e7c 2864.\" commit 314d9f63f385096580e9e2a06eaa0745d92fe4ac
699893d8
DP
2865The
2866.I cpumask
2867file contains a comma-separated list of integers that
2868indicate a representative CPU number for each socket (package)
e30dc77f
VW
2869on the motherboard.
2870This is needed when setting up uncore or northbridge events, as
2871those PMUs present socket-wide events.
f2b1d720 2872.RE
47297adb 2873.SH RETURN VALUE
f2b1d720
MK
2874.BR perf_event_open ()
2875returns the new file descriptor, or \-1 if an error occurred
2876(in which case,
2877.I errno
2878is set appropriately).
2879.SH ERRORS
d8b7d950
VW
2880The errors returned by
2881.BR perf_event_open ()
2882can be inconsistent, and may
2883vary across processor architectures and performance monitoring units.
f2b1d720 2884.TP
82b09254 2885.B E2BIG
ce88f77b
MK
2886Returned if the
2887.I perf_event_attr
82b09254
VW
2888.I size
2889value is too small
2890(smaller than
2891.BR PERF_ATTR_SIZE_VER0 ),
2892too big (larger than the page size),
2893or larger than the kernel supports and the extra bytes are not zero.
2894When
2895.B E2BIG
ce88f77b
MK
2896is returned, the
2897.I perf_event_attr
e9bd9b2c 2898.I size
d6af98f8 2899field is overwritten by the kernel to be the size of the structure
82b09254
VW
2900it was expecting.
2901.TP
d8b7d950 2902.B EACCES
27f0af8e
VW
2903Returned when the requested event requires
2904.B CAP_SYS_ADMIN
2905permissions (or a more permissive perf_event paranoid setting).
2906Some common cases where an unprivileged process
2907may encounter this error:
2908attaching to a process owned by a different user;
2b23ecbd
MK
2909monitoring all processes on a given CPU (i.e., specifying the
2910.I pid
2911argument as \-1);
079928f3 2912and not setting
accec051 2913.I exclude_kernel
079928f3 2914when the paranoid setting requires it.
d8b7d950
VW
2915.TP
2916.B EBADF
2917Returned if the
2918.I group_fd
accec051
MK
2919file descriptor is not valid, or, if
2920.B PERF_FLAG_PID_CGROUP
2921is set,
d8b7d950
VW
2922the cgroup file descriptor in
2923.I pid
2924is not valid.
2925.TP
f27486cb
VW
2926.BR EBUSY " (since Linux 4.1)"
2927.\" bed5b25ad9c8a2f5d735ef0bc746ec870c01c1b0
2928Returned if another event already has exclusive
2929access to the PMU.
2930.TP
d8b7d950
VW
2931.B EFAULT
2932Returned if the
2933.I attr
2934pointer points at an invalid memory address.
2935.TP
f2b1d720 2936.B EINVAL
d8b7d950
VW
2937Returned if the specified event is invalid.
2938There are many possible reasons for this.
2939A not-exhaustive list:
2940.I sample_freq
accec051 2941is higher than the maximum setting;
d8b7d950
VW
2942the
2943.I cpu
accec051 2944to monitor does not exist;
d8b7d950 2945.I read_format
accec051 2946is out of range;
d8b7d950 2947.I sample_type
accec051 2948is out of range;
d8b7d950
VW
2949the
2950.I flags
accec051 2951value is out of range;
d8b7d950
VW
2952.I exclusive
2953or
2954.I pinned
accec051 2955set and the event is not a group leader;
d8b7d950
VW
2956the event
2957.I config
accec051
MK
2958values are out of range or set reserved bits;
2959the generic event selected is not supported; or
d8b7d950
VW
2960there is not enough room to add the selected event.
2961.TP
2962.B EMFILE
2963Each opened event uses one file descriptor.
26c32fab
MK
2964If a large number of events are opened,
2965the per-process limit on the number of open file descriptors will be reached,
2966and no more events can be created.
d8b7d950
VW
2967.TP
2968.B ENODEV
2969Returned when the event involves a feature not supported
accec051 2970by the current CPU.
d8b7d950
VW
2971.TP
2972.B ENOENT
2973Returned if the
2974.I type
2975setting is not valid.
accec051 2976This error is also returned for
d8b7d950 2977some unsupported generic events.
f2b1d720
MK
2978.TP
2979.B ENOSPC
2980Prior to Linux 3.3, if there was not enough room for the event,
747a6e7c 2981.\" commit aa2bc1ade59003a379ffc485d6da2d92ea3370a6
f2b1d720
MK
2982.B ENOSPC
2983was returned.
accec051 2984In Linux 3.3, this was changed to
f2b1d720
MK
2985.BR EINVAL .
2986.B ENOSPC
d8b7d950 2987is still returned if you try to add more breakpoint events
accec051 2988than supported by the hardware.
d8b7d950
VW
2989.TP
2990.B ENOSYS
2991Returned if
2992.B PERF_SAMPLE_STACK_USER
2993is set in
2994.I sample_type
2995and it is not supported by hardware.
2996.TP
2997.B EOPNOTSUPP
2998Returned if an event requiring a specific hardware feature is
2999requested but there is no hardware support.
3000This includes requesting low-skid events if not supported,
3001branch tracing if it is not available, sampling if no PMU
3002interrupt is available, and branch stacks for software events.
3003.TP
3004.B EPERM
27f0af8e
VW
3005Returned on many (but not all) architectures when an unsupported
3006.IR exclude_hv ", " exclude_idle ", " exclude_user ", or " exclude_kernel
3007setting is specified.
3008
3009It can also happen, as with
3010.BR EACCES ,
3011when the requested event requires
3012.B CAP_SYS_ADMIN
3013permissions (or a more permissive perf_event paranoid setting).
3014This includes setting a breakpoint on a kernel address,
3015and (since Linux 3.13) setting a kernel function-trace tracepoint.
747a6e7c 3016.\" commit a4e95fc2cbb31d70a65beffeaf8773f881328c34
d8b7d950
VW
3017.TP
3018.B ESRCH
3019Returned if attempting to attach to a process that does not exist.
f2b1d720 3020.SH VERSION
f2b1d720
MK
3021.BR perf_event_open ()
3022was introduced in Linux 2.6.31 but was called
747a6e7c 3023.\" commit 0793a61d4df8daeac6492dbf8d2f3e5713caae5e
ffd4dec0 3024.BR perf_counter_open ().
f2b1d720 3025It was renamed in Linux 2.6.32.
747a6e7c 3026.\" commit cdd6c482c9ff9c55475ee7392ec8f672eddb7be6
f2b1d720 3027.SH CONFORMING TO
7db515ef
MK
3028This
3029.BR perf_event_open ()
3030system call Linux- specific
f2b1d720 3031and should not be used in programs intended to be portable.
f2b1d720
MK
3032.SH NOTES
3033Glibc does not provide a wrapper for this system call; call it using
3034.BR syscall (2).
7db515ef 3035See the example below.
f2b1d720
MK
3036
3037The official way of knowing if
7db515ef 3038.BR perf_event_open ()
f2b1d720
MK
3039support is enabled is checking
3040for the existence of the file
7db515ef 3041.IR /proc/sys/kernel/perf_event_paranoid .
f2b1d720 3042.SH BUGS
f2b1d720
MK
3043The
3044.B F_SETOWN_EX
3045option to
7db515ef 3046.BR fcntl (2)
f2b1d720
MK
3047is needed to properly get overflow signals in threads.
3048This was introduced in Linux 2.6.32.
747a6e7c 3049.\" commit ba0a6c9f6fceed11c6a99e8326f0477fe383e6b5
f2b1d720 3050
747a6e7c
VW
3051Prior to Linux 2.6.33 (at least for x86),
3052.\" commit b690081d4d3f6a23541493f1682835c3cd5c54a1
3053the kernel did not check
f2b1d720
MK
3054if events could be scheduled together until read time.
3055The same happens on all known kernels if the NMI watchdog is enabled.
3056This means to see if a given set of events works you have to
3057.BR perf_event_open (),
3058start, then read before you know for sure you
3059can get valid measurements.
3060
ce88f77b 3061Prior to Linux 2.6.34, event constraints were not enforced by the kernel.
f2b1d720
MK
3062In that case, some events would silently return "0" if the kernel
3063scheduled them in an improper counter slot.
747a6e7c 3064.\" FIXME: cannot find a kernel commit for this one
f2b1d720 3065
ce88f77b 3066Prior to Linux 2.6.34, there was a bug when multiplexing where the
f2b1d720 3067wrong results could be returned.
747a6e7c 3068.\" commit 45e16a6834b6af098702e5ea6c9a40de42ff77d8
f2b1d720
MK
3069
3070Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if
3071"inherit" is enabled and many threads are started.
747a6e7c 3072.\" commit 38b435b16c36b0d863efcf3f07b34a6fac9873fd
f2b1d720
MK
3073
3074Prior to Linux 2.6.35,
747a6e7c 3075.\" commit 050735b08ca8a016bbace4445fa025b88fee770b
f2b1d720
MK
3076.B PERF_FORMAT_GROUP
3077did not work with attached processes.
3078
f2b1d720
MK
3079There is a bug in the kernel code between
3080Linux 2.6.36 and Linux 3.0 that ignores the
3081"watermark" field and acts as if a wakeup_event
3082was chosen if the union has a
7d182bb6 3083nonzero value in it.
747a6e7c 3084.\" commit 4ec8363dfc1451f8c8f86825731fe712798ada02
f2b1d720 3085
8a94e783 3086From Linux 2.6.31 to Linux 3.4, the
dbc01ecd
VW
3087.B PERF_IOC_FLAG_GROUP
3088ioctl argument was broken and would repeatedly operate
3089on the event specified rather than iterating across
3090all sibling events in a group.
747a6e7c 3091.\" commit 724b6daa13e100067c30cfc4d1ad06629609dc4e
dbc01ecd 3092
7205b8df 3093From Linux 3.4 to Linux 3.11, the mmap
747a6e7c 3094.\" commit fa7315871046b9a4c48627905691dbde57e51033
135cba8b
VW
3095.I cap_usr_rdpmc
3096and
3097.I cap_usr_time
3098bits mapped to the same location.
3099Code should migrate to the new
3100.I cap_user_rdpmc
3101and
3102.I cap_user_time
3103fields instead.
3104
7db515ef
MK
3105Always double-check your results!
3106Various generalized events have had wrong values.
f2b1d720
MK
3107For example, retired branches measured
3108the wrong thing on AMD machines until Linux 2.6.35.
747a6e7c 3109.\" commit f287d332ce835f77a4f5077d2c0ef1e3f9ea42d2
f2b1d720
MK
3110.SH EXAMPLE
3111The following is a short example that measures the total
7db515ef
MK
3112instruction count of a call to
3113.BR printf (3).
f2b1d720
MK
3114.nf
3115
3116#include <stdlib.h>
3117#include <stdio.h>
3118#include <unistd.h>
3119#include <string.h>
3120#include <sys/ioctl.h>
3121#include <linux/perf_event.h>
3122#include <asm/unistd.h>
3123
571767ca 3124static long
7db515ef
MK
3125perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
3126 int cpu, int group_fd, unsigned long flags)
f2b1d720
MK
3127{
3128 int ret;
3129
7db515ef
MK
3130 ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
3131 group_fd, flags);
f2b1d720
MK
3132 return ret;
3133}
3134
f2b1d720
MK
3135int
3136main(int argc, char **argv)
3137{
f2b1d720
MK
3138 struct perf_event_attr pe;
3139 long long count;
3140 int fd;
3141
3142 memset(&pe, 0, sizeof(struct perf_event_attr));
3143 pe.type = PERF_TYPE_HARDWARE;
3144 pe.size = sizeof(struct perf_event_attr);
3145 pe.config = PERF_COUNT_HW_INSTRUCTIONS;
3146 pe.disabled = 1;
3147 pe.exclude_kernel = 1;
3148 pe.exclude_hv = 1;
3149
3150 fd = perf_event_open(&pe, 0, \-1, \-1, 0);
7db515ef 3151 if (fd == \-1) {
f2b1d720 3152 fprintf(stderr, "Error opening leader %llx\\n", pe.config);
7db515ef 3153 exit(EXIT_FAILURE);
f2b1d720
MK
3154 }
3155
3156 ioctl(fd, PERF_EVENT_IOC_RESET, 0);
3157 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
3158
3159 printf("Measuring instruction count for this printf\\n");
3160
3161 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
3162 read(fd, &count, sizeof(long long));
3163
3164 printf("Used %lld instructions\\n", count);
3165
3166 close(fd);
3167}
3168.fi
47297adb 3169.SH SEE ALSO
f2b1d720
MK
3170.BR fcntl (2),
3171.BR mmap (2),
3172.BR open (2),
3173.BR prctl (2),
3174.BR read (2)