]> git.ipfire.org Git - thirdparty/man-pages.git/blame - man2/perf_event_open.2
fgetgrent.3, getgrent.3, getgrent_r.3, getgrnam.3: Clarify that 'gr_mem' is a NULL...
[thirdparty/man-pages.git] / man2 / perf_event_open.2
CommitLineData
f2b1d720
MK
1.\" Copyright (c) 2012, Vincent Weaver
2.\"
1dd72f9c 3.\" %%%LICENSE_START(GPLv2+_DOC_FULL)
f2b1d720
MK
4.\" This is free documentation; you can redistribute it and/or
5.\" modify it under the terms of the GNU General Public License as
6.\" published by the Free Software Foundation; either version 2 of
7.\" the License, or (at your option) any later version.
8.\"
9.\" The GNU General Public License's references to "object code"
10.\" and "executables" are to be interpreted as the output of any
11.\" document formatting or typesetting system, including
12.\" intermediate and printed output.
13.\"
14.\" This manual is distributed in the hope that it will be useful,
15.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
16.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17.\" GNU General Public License for more details.
18.\"
19.\" You should have received a copy of the GNU General Public
20.\" License along with this manual; if not, see
21.\" <http://www.gnu.org/licenses/>.
6a8d8745 22.\" %%%LICENSE_END
f2b1d720
MK
23.\"
24.\" This document is based on the perf_event.h header file, the
25.\" tools/perf/design.txt file, and a lot of bitter experience.
26.\"
2b23ecbd 27.TH PERF_EVENT_OPEN 2 2014-04-17 "Linux" "Linux Programmer's Manual"
f2b1d720
MK
28.SH NAME
29perf_event_open \- set up performance monitoring
30.SH SYNOPSIS
31.nf
32.B #include <linux/perf_event.h>
33.B #include <linux/hw_breakpoint.h>
34.sp
35.BI "int perf_event_open(struct perf_event_attr *" attr ,
36.BI " pid_t " pid ", int " cpu ", int " group_fd ,
37.BI " unsigned long " flags );
38.fi
39
40.IR Note :
41There is no glibc wrapper for this system call; see NOTES.
42.SH DESCRIPTION
43Given a list of parameters,
44.BR perf_event_open ()
45returns a file descriptor, for use in subsequent system calls
46.RB ( read "(2), " mmap "(2), " prctl "(2), " fcntl "(2), etc.)."
47.PP
48A call to
49.BR perf_event_open ()
50creates a file descriptor that allows measuring performance
51information.
52Each file descriptor corresponds to one
53event that is measured; these can be grouped together
54to measure multiple events simultaneously.
55.PP
56Events can be enabled and disabled in two ways: via
57.BR ioctl (2)
58and via
0fe9e4b1 59.BR prctl (2).
f2b1d720
MK
60When an event is disabled it does not count or generate overflows but does
61continue to exist and maintain its count value.
62.PP
63Events come in two flavors: counting and sampled.
64A
65.I counting
66event is one that is used for counting the aggregate number of events
67that occur.
68In general, counting event results are gathered with a
69.BR read (2)
70call.
71A
72.I sampling
73event periodically writes measurements to a buffer that can then
74be accessed via
0fe9e4b1 75.BR mmap (2).
f2b1d720
MK
76.SS Arguments
77.P
f2b1d720 78The
a02a1737 79.I pid
f2b1d720 80and
a02a1737
VW
81.I cpu
82arguments allow specifying which process and CPU to monitor:
83.TP
f2d15dc9 84.BR "pid == 0" " and " "cpu == \-1"
ee7b0cbf 85This measures the calling process/thread on any CPU.
a02a1737 86.TP
f2d15dc9 87.BR "pid == 0" " and " "cpu >= 0"
ee7b0cbf 88This measures the calling process/thread only
a02a1737
VW
89when running on the specified CPU.
90.TP
f2d15dc9 91.BR "pid > 0" " and " "cpu == \-1"
a02a1737
VW
92This measures the specified process/thread on any CPU.
93.TP
f2d15dc9 94.BR "pid > 0" " and " "cpu >= 0"
a02a1737
VW
95This measures the specified process/thread only
96when running on the specified CPU.
97.TP
f2d15dc9 98.BR "pid == \-1" " and " "cpu >= 0"
a02a1737 99This measures all processes/threads on the specified CPU.
ce88f77b 100This requires
f2b1d720
MK
101.B CAP_SYS_ADMIN
102capability or a
103.I /proc/sys/kernel/perf_event_paranoid
104value of less than 1.
a02a1737 105.TP
ce88f77b 106.BR "pid == \-1" " and " "cpu == \-1"
a02a1737 107This setting is invalid and will return an error.
f2b1d720
MK
108.P
109The
110.I group_fd
111argument allows event groups to be created.
112An event group has one event which is the group leader.
113The leader is created first, with
114.IR group_fd " = \-1."
115The rest of the group members are created with subsequent
116.BR perf_event_open ()
117calls with
118.IR group_fd
bec6277e 119being set to the file descriptor of the group leader.
f2b1d720
MK
120(A single event on its own is created with
121.IR group_fd " = \-1"
122and is considered to be a group with only 1 member.)
33a0ccb2 123An event group is scheduled onto the CPU as a unit: it will
d1007d14 124be put onto the CPU only if all of the events in the group can be put onto
f2b1d720
MK
125the CPU.
126This means that the values of the member events can be
ce88f77b 127meaningfully compared\(emadded, divided (to get ratios), and so on\(emwith each
f2b1d720
MK
128other, since they have counted events for the same set of executed
129instructions.
130.P
131The
132.I flags
08e325e8 133argument is formed by ORing together zero or more of the following values:
f2b1d720 134.TP
e9b1ab78
MK
135.BR PERF_FLAG_FD_CLOEXEC " (since Linux 3.14)."
136This flag enables the close-on-exec flag for the created
137event file descriptor,
138so that the file descriptor is automatically closed on
139.BR execve (2).
8bad22e5
MK
140Setting the close-on-exec flags at creation time, rather than later with
141.BR fcntl (2),
e9b1ab78
MK
142avoids potential race conditions where the calling thread invokes
143.BR perf_event_open ()
a61dba34
MK
144and
145.BR fcntl (2)
e9b1ab78
MK
146at the same time as another thread calls
147.BR fork (2)
148then
149.BR execve (2).
150.TP
f2b1d720
MK
151.BR PERF_FLAG_FD_NO_GROUP
152.\" FIXME The following sentence is unclear
153This flag allows creating an event as part of an event group but
154having no group leader.
155It is unclear why this is useful.
156.\" FIXME So, why is it useful?
157.TP
158.BR PERF_FLAG_FD_OUTPUT
7d182bb6 159This flag reroutes the output from an event to the group leader.
f2b1d720 160.TP
31c1f2b0 161.BR PERF_FLAG_PID_CGROUP " (since Linux 2.6.39)."
f2b1d720
MK
162This flag activates per-container system-wide monitoring.
163A container
ce88f77b 164is an abstraction that isolates a set of resources for finer-grained
699893d8 165control (CPUs, memory, etc.).
f2b1d720
MK
166In this mode, the event is measured
167only if the thread running on the monitored CPU belongs to the designated
168container (cgroup).
169The cgroup is identified by passing a file descriptor
170opened on its directory in the cgroupfs filesystem.
171For instance, if the
172cgroup to monitor is called
173.IR test ,
174then a file descriptor opened on
175.I /dev/cgroup/test
176(assuming cgroupfs is mounted on
177.IR /dev/cgroup )
178must be passed as the
179.I pid
180parameter.
33a0ccb2 181cgroup monitoring is available only
f2b1d720
MK
182for system-wide events and may therefore require extra permissions.
183.P
184The
185.I perf_event_attr
186structure provides detailed configuration information
187for the event being created.
188
189.in +4n
190.nf
191struct perf_event_attr {
ce88f77b
MK
192 __u32 type; /* Type of event */
193 __u32 size; /* Size of attribute structure */
194 __u64 config; /* Type-specific configuration */
f2b1d720
MK
195
196 union {
197 __u64 sample_period; /* Period of sampling */
198 __u64 sample_freq; /* Frequency of sampling */
199 };
200
ce88f77b
MK
201 __u64 sample_type; /* Specifies values included in sample */
202 __u64 read_format; /* Specifies values returned in read */
203
204 __u64 disabled : 1, /* off by default */
205 inherit : 1, /* children inherit it */
206 pinned : 1, /* must always be on PMU */
207 exclusive : 1, /* only group on PMU */
208 exclude_user : 1, /* don't count user */
209 exclude_kernel : 1, /* don't count kernel */
210 exclude_hv : 1, /* don't count hypervisor */
211 exclude_idle : 1, /* don't count when idle */
212 mmap : 1, /* include mmap data */
213 comm : 1, /* include comm data */
214 freq : 1, /* use freq, not period */
215 inherit_stat : 1, /* per task counts */
216 enable_on_exec : 1, /* next exec enables */
217 task : 1, /* trace fork/exit */
218 watermark : 1, /* wakeup_watermark */
219 precise_ip : 2, /* skid constraint */
220 mmap_data : 1, /* non-exec mmap data */
221 sample_id_all : 1, /* sample_type all events */
222 exclude_host : 1, /* don't count in host */
223 exclude_guest : 1, /* don't count in guest */
224 exclude_callchain_kernel : 1,
225 /* exclude kernel callchains */
226 exclude_callchain_user : 1,
227 /* exclude user callchains */
228 __reserved_1 : 41;
f2b1d720
MK
229
230 union {
231 __u32 wakeup_events; /* wakeup every n events */
7db515ef 232 __u32 wakeup_watermark; /* bytes before wakeup */
f2b1d720
MK
233 };
234
235 __u32 bp_type; /* breakpoint type */
236
237 union {
238 __u64 bp_addr; /* breakpoint address */
239 __u64 config1; /* extension of config */
240 };
241
242 union {
243 __u64 bp_len; /* breakpoint length */
244 __u64 config2; /* extension of config1 */
245 };
ce88f77b
MK
246 __u64 branch_sample_type; /* enum perf_branch_sample_type */
247 __u64 sample_regs_user; /* user regs to dump on samples */
248 __u32 sample_stack_user; /* size of stack to dump on
7db515ef 249 samples */
ce88f77b 250 __u32 __reserved_2; /* Align to u64 */
f2b1d720
MK
251
252};
253.fi
254.in
255
256The fields of the
257.I perf_event_attr
258structure are described in more detail below:
f2b1d720
MK
259.TP
260.I type
261This field specifies the overall event type.
262It has one of the following values:
263.RS
264.TP
265.B PERF_TYPE_HARDWARE
266This indicates one of the "generalized" hardware events provided
267by the kernel.
268See the
269.I config
270field definition for more details.
271.TP
272.B PERF_TYPE_SOFTWARE
273This indicates one of the software-defined events provided by the kernel
274(even if no hardware support is available).
275.TP
276.B PERF_TYPE_TRACEPOINT
277This indicates a tracepoint
278provided by the kernel tracepoint infrastructure.
279.TP
280.B PERF_TYPE_HW_CACHE
281This indicates a hardware cache event.
282This has a special encoding, described in the
283.I config
284field definition.
285.TP
286.B PERF_TYPE_RAW
287This indicates a "raw" implementation-specific event in the
288.IR config " field."
289.TP
31c1f2b0 290.BR PERF_TYPE_BREAKPOINT " (since Linux 2.6.33)"
f2b1d720
MK
291This indicates a hardware breakpoint as provided by the CPU.
292Breakpoints can be read/write accesses to an address as well as
293execution of an instruction address.
294.TP
295.RB "dynamic PMU"
296Since Linux 2.6.39,
7db515ef 297.BR perf_event_open ()
f2b1d720
MK
298can support multiple PMUs.
299To enable this, a value exported by the kernel can be used in the
300.I type
301field to indicate which PMU to use.
302The value to use can be found in the sysfs filesystem:
303there is a subdirectory per PMU instance under
304.IR /sys/bus/event_source/devices .
7d182bb6 305In each subdirectory there is a
f2b1d720
MK
306.I type
307file whose content is an integer that can be used in the
308.I type
309field.
310For instance,
311.I /sys/bus/event_source/devices/cpu/type
312contains the value for the core CPU PMU, which is usually 4.
313.RE
f2b1d720
MK
314.TP
315.I "size"
316The size of the
317.I perf_event_attr
318structure for forward/backward compatibility.
319Set this using
320.I sizeof(struct perf_event_attr)
321to allow the kernel to see
322the struct size at the time of compilation.
323
324The related define
325.B PERF_ATTR_SIZE_VER0
326is set to 64; this was the size of the first published struct.
327.B PERF_ATTR_SIZE_VER1
328is 72, corresponding to the addition of breakpoints in Linux 2.6.33.
329.B PERF_ATTR_SIZE_VER2
330is 80 corresponding to the addition of branch sampling in Linux 3.4.
331.B PERF_ATR_SIZE_VER3
332is 96 corresponding to the addition
7ede2f66
DP
333of
334.I sample_regs_user
335and
336.I sample_stack_user
337in Linux 3.7.
f2b1d720
MK
338.TP
339.I "config"
340This specifies which event you want, in conjunction with
341the
342.I type
343field.
344The
345.IR config1 " and " config2
346fields are also taken into account in cases where 64 bits is not
347enough to fully specify the event.
348The encoding of these fields are event dependent.
349
350The most significant bit (bit 63) of
351.I config
352signifies CPU-specific (raw) counter configuration data;
353if the most significant bit is unset, the next 7 bits are an event
354type and the rest of the bits are the event identifier.
355
356There are various ways to set the
357.I config
358field that are dependent on the value of the previously
359described
360.I type
361field.
362What follows are various possible settings for
363.I config
364separated out by
365.IR type .
366
367If
368.I type
369is
370.BR PERF_TYPE_HARDWARE ,
371we are measuring one of the generalized hardware CPU events.
372Not all of these are available on all platforms.
373Set
374.I config
375to one of the following:
376.RS 12
377.TP
378.B PERF_COUNT_HW_CPU_CYCLES
379Total cycles.
2b538c3e 380Be wary of what happens during CPU frequency scaling.
f2b1d720
MK
381.TP
382.B PERF_COUNT_HW_INSTRUCTIONS
383Retired instructions.
384Be careful, these can be affected by various
2b538c3e 385issues, most notably hardware interrupt counts.
f2b1d720
MK
386.TP
387.B PERF_COUNT_HW_CACHE_REFERENCES
388Cache accesses.
389Usually this indicates Last Level Cache accesses but this may
390vary depending on your CPU.
391This may include prefetches and coherency messages; again this
392depends on the design of your CPU.
393.TP
394.B PERF_COUNT_HW_CACHE_MISSES
395Cache misses.
396Usually this indicates Last Level Cache misses; this is intended to be
397used in conjunction with the
398.B PERF_COUNT_HW_CACHE_REFERENCES
399event to calculate cache miss rates.
400.TP
401.B PERF_COUNT_HW_BRANCH_INSTRUCTIONS
402Retired branch instructions.
403Prior to Linux 2.6.34, this used
404the wrong event on AMD processors.
405.TP
406.B PERF_COUNT_HW_BRANCH_MISSES
407Mispredicted branch instructions.
408.TP
409.B PERF_COUNT_HW_BUS_CYCLES
410Bus cycles, which can be different from total cycles.
411.TP
31c1f2b0 412.BR PERF_COUNT_HW_STALLED_CYCLES_FRONTEND " (since Linux 3.0)"
f2b1d720
MK
413Stalled cycles during issue.
414.TP
31c1f2b0 415.BR PERF_COUNT_HW_STALLED_CYCLES_BACKEND " (since Linux 3.0)"
f2b1d720
MK
416Stalled cycles during retirement.
417.TP
31c1f2b0 418.BR PERF_COUNT_HW_REF_CPU_CYCLES " (since Linux 3.3)"
f2b1d720
MK
419Total cycles; not affected by CPU frequency scaling.
420.RE
421.IP
422If
423.I type
424is
425.BR PERF_TYPE_SOFTWARE ,
426we are measuring software events provided by the kernel.
427Set
428.I config
429to one of the following:
430.RS 12
431.TP
432.B PERF_COUNT_SW_CPU_CLOCK
433This reports the CPU clock, a high-resolution per-CPU timer.
434.TP
435.B PERF_COUNT_SW_TASK_CLOCK
436This reports a clock count specific to the task that is running.
437.TP
438.B PERF_COUNT_SW_PAGE_FAULTS
439This reports the number of page faults.
440.TP
441.B PERF_COUNT_SW_CONTEXT_SWITCHES
442This counts context switches.
443Until Linux 2.6.34, these were all reported as user-space
444events, after that they are reported as happening in the kernel.
445.TP
446.B PERF_COUNT_SW_CPU_MIGRATIONS
447This reports the number of times the process
448has migrated to a new CPU.
449.TP
450.B PERF_COUNT_SW_PAGE_FAULTS_MIN
451This counts the number of minor page faults.
452These did not require disk I/O to handle.
453.TP
454.B PERF_COUNT_SW_PAGE_FAULTS_MAJ
455This counts the number of major page faults.
456These required disk I/O to handle.
457.TP
31c1f2b0 458.BR PERF_COUNT_SW_ALIGNMENT_FAULTS " (since Linux 2.6.33)"
f2b1d720
MK
459This counts the number of alignment faults.
460These happen when unaligned memory accesses happen; the kernel
461can handle these but it reduces performance.
33a0ccb2 462This happens only on some architectures (never on x86).
f2b1d720 463.TP
31c1f2b0 464.BR PERF_COUNT_SW_EMULATION_FAULTS " (since Linux 2.6.33)"
f2b1d720
MK
465This counts the number of emulation faults.
466The kernel sometimes traps on unimplemented instructions
7db515ef 467and emulates them for user space.
f2b1d720 468This can negatively impact performance.
dab38455 469.TP
31c1f2b0 470.BR PERF_COUNT_SW_DUMMY " (since Linux 3.12)"
dab38455
VW
471This is a placeholder event that counts nothing.
472Informational sample record types such as mmap or comm
473must be associated with an active event.
474This dummy event allows gathering such records without requiring
475a counting event.
f2b1d720 476.RE
f2b1d720 477
f2b1d720
MK
478.RS
479If
480.I type
481is
482.BR PERF_TYPE_TRACEPOINT ,
483then we are measuring kernel tracepoints.
484The value to use in
485.I config
486can be obtained from under debugfs
487.I tracing/events/*/*/id
488if ftrace is enabled in the kernel.
f2b1d720 489.RE
1f22e274 490
f2b1d720
MK
491.RS
492If
493.I type
494is
495.BR PERF_TYPE_HW_CACHE ,
496then we are measuring a hardware CPU cache event.
497To calculate the appropriate
498.I config
499value use the following equation:
500.RS 4
501.nf
502
503 (perf_hw_cache_id) | (perf_hw_cache_op_id << 8) |
504 (perf_hw_cache_op_result_id << 16)
505.fi
506.P
507where
508.I perf_hw_cache_id
509is one of:
7db515ef 510.RS 4
f2b1d720
MK
511.TP
512.B PERF_COUNT_HW_CACHE_L1D
513for measuring Level 1 Data Cache
514.TP
515.B PERF_COUNT_HW_CACHE_L1I
516for measuring Level 1 Instruction Cache
517.TP
518.B PERF_COUNT_HW_CACHE_LL
519for measuring Last-Level Cache
520.TP
521.B PERF_COUNT_HW_CACHE_DTLB
522for measuring the Data TLB
523.TP
524.B PERF_COUNT_HW_CACHE_ITLB
525for measuring the Instruction TLB
526.TP
527.B PERF_COUNT_HW_CACHE_BPU
528for measuring the branch prediction unit
529.TP
31c1f2b0 530.BR PERF_COUNT_HW_CACHE_NODE " (since Linux 3.0)"
f2b1d720
MK
531for measuring local memory accesses
532.RE
f2b1d720
MK
533.P
534and
535.I perf_hw_cache_op_id
536is one of
7db515ef 537.RS 4
f2b1d720
MK
538.TP
539.B PERF_COUNT_HW_CACHE_OP_READ
540for read accesses
541.TP
542.B PERF_COUNT_HW_CACHE_OP_WRITE
543for write accesses
544.TP
545.B PERF_COUNT_HW_CACHE_OP_PREFETCH
546for prefetch accesses
547.RE
f2b1d720
MK
548.P
549and
550.I perf_hw_cache_op_result_id
551is one of
7db515ef 552.RS 4
f2b1d720
MK
553.TP
554.B PERF_COUNT_HW_CACHE_RESULT_ACCESS
555to measure accesses
556.TP
557.B PERF_COUNT_HW_CACHE_RESULT_MISS
558to measure misses
559.RE
560.RE
561
562If
563.I type
564is
565.BR PERF_TYPE_RAW ,
566then a custom "raw"
567.I config
568value is needed.
569Most CPUs support events that are not covered by the "generalized" events.
570These are implementation defined; see your CPU manual (for example
571the Intel Volume 3B documentation or the AMD BIOS and Kernel Developer
572Guide).
573The libpfm4 library can be used to translate from the name in the
574architectural manuals to the raw hex value
575.BR perf_event_open ()
576expects in this field.
577
578If
579.I type
580is
581.BR PERF_TYPE_BREAKPOINT ,
582then leave
583.I config
584set to zero.
585Its parameters are set in other places.
586.RE
587.TP
588.IR sample_period ", " sample_freq
589A "sampling" counter is one that generates an interrupt
590every N events, where N is given by
591.IR sample_period .
592A sampling counter has
593.IR sample_period " > 0."
594When an overflow interrupt occurs, requested data is recorded
595in the mmap buffer.
596The
597.I sample_type
598field controls what data is recorded on each interrupt.
599
600.I sample_freq
601can be used if you wish to use frequency rather than period.
37bee118 602In this case, you set the
f2b1d720
MK
603.I freq
604flag.
605The kernel will adjust the sampling period
606to try and achieve the desired rate.
607The rate of adjustment is a
608timer tick.
f2b1d720
MK
609.TP
610.I "sample_type"
611The various bits in this field specify which values to include
612in the sample.
613They will be recorded in a ring-buffer,
ad73a2cc 614which is available to user space using
f2b1d720
MK
615.BR mmap (2).
616The order in which the values are saved in the
617sample are documented in the MMAP Layout subsection below;
618it is not the
619.I "enum perf_event_sample_format"
620order.
621.RS
622.TP
623.B PERF_SAMPLE_IP
624Records instruction pointer.
625.TP
626.B PERF_SAMPLE_TID
7db515ef 627Records the process and thread IDs.
f2b1d720
MK
628.TP
629.B PERF_SAMPLE_TIME
630Records a timestamp.
631.TP
632.B PERF_SAMPLE_ADDR
633Records an address, if applicable.
634.TP
635.B PERF_SAMPLE_READ
636Record counter values for all events in a group, not just the group leader.
637.TP
638.B PERF_SAMPLE_CALLCHAIN
639Records the callchain (stack backtrace).
640.TP
641.B PERF_SAMPLE_ID
642Records a unique ID for the opened event's group leader.
643.TP
644.B PERF_SAMPLE_CPU
645Records CPU number.
646.TP
647.B PERF_SAMPLE_PERIOD
648Records the current sampling period.
649.TP
650.B PERF_SAMPLE_STREAM_ID
651Records a unique ID for the opened event.
652Unlike
653.B PERF_SAMPLE_ID
654the actual ID is returned, not the group leader.
8859d3a9
DP
655This ID is the same as the one returned by
656.BR PERF_FORMAT_ID .
f2b1d720
MK
657.TP
658.B PERF_SAMPLE_RAW
659Records additional data, if applicable.
660Usually returned by tracepoint events.
661.TP
31c1f2b0 662.BR PERF_SAMPLE_BRANCH_STACK " (since Linux 3.4)"
045bf4d3
VW
663This provides a record of recent branches, as provided
664by CPU branch sampling hardware (such as Intel Last Branch Record).
665Not all hardware supports this feature.
666
667See the
668.I branch_sample_type
669field for how to filter which branches are reported.
f2b1d720 670.TP
31c1f2b0 671.BR PERF_SAMPLE_REGS_USER " (since Linux 3.7)"
d1007d14
VW
672Records the current user-level CPU register state
673(the values in the process before the kernel was called).
f2b1d720 674.TP
31c1f2b0 675.BR PERF_SAMPLE_STACK_USER " (since Linux 3.7)"
d1007d14
VW
676Records the user level stack, allowing stack unwinding.
677.TP
31c1f2b0 678.BR PERF_SAMPLE_WEIGHT " (since Linux 3.10)"
d1007d14 679Records a hardware provided weight value that expresses how
51700fd7 680costly the sampled event was.
d1007d14
VW
681This allows the hardware to highlight expensive events in
682a profile.
683.TP
31c1f2b0 684.BR PERF_SAMPLE_DATA_SRC " (since Linux 3.10)"
d1007d14
VW
685Records the data source: where in the memory hierarchy
686the data associated with the sampled instruction came from.
687This is only available if the underlying hardware
688supports this feature.
7480dabb 689.TP
31c1f2b0 690.BR PERF_SAMPLE_IDENTIFIER " (since Linux 3.12)"
8859d3a9
DP
691Places the
692.B SAMPLE_ID
693value in a fixed position in the record,
7480dabb
VW
694either at the beginning (for sample events) or at the end
695(if a non-sample event).
696
697This was necessary because a sample stream may have
698records from various different event sources with different
699.I sample_type
700settings.
e9bd9b2c 701Parsing the event stream properly was not possible because the
8859d3a9
DP
702format of the record was needed to find
703.BR SAMPLE_ID ,
704but
27f52b52 705the format could not be found without knowing what
7480dabb
VW
706event the sample belonged to (causing a circular
707dependency).
708
709This new
710.B PERF_SAMPLE_IDENTIFIER
711setting makes the event stream always parsable
8859d3a9
DP
712by putting
713.B SAMPLE_ID
714in a fixed location, even though
715it means having duplicate
716.B SAMPLE_ID
717values in records.
1e043959
VW
718.TP
719.BR PERF_SAMPLE_TRANSACTION " (Since Linux 3.13)"
84fc2a6e 720Records reasons for transactional memory abort events
1e043959
VW
721(for example, from Intel TSX transactional memory support).
722
723The
724.I precise_ip
b3f39642 725setting must be greater than 0 and a transactional memory abort
1e043959 726event must be measured or no values will be recorded.
84fc2a6e
MK
727Also note that some perf_event measurements, such as sampled
728cycle counting, may cause extraneous aborts (by causing an
1e043959 729interrupt during a transaction).
f2b1d720 730.RE
f2b1d720
MK
731.TP
732.IR "read_format"
733This field specifies the format of the data returned by
734.BR read (2)
735on a
7db515ef 736.BR perf_event_open ()
f2b1d720
MK
737file descriptor.
738.RS
739.TP
740.B PERF_FORMAT_TOTAL_TIME_ENABLED
7ede2f66
DP
741Adds the 64-bit
742.I time_enabled
743field.
f2b1d720
MK
744This can be used to calculate estimated totals if
745the PMU is overcommitted and multiplexing is happening.
746.TP
747.B PERF_FORMAT_TOTAL_TIME_RUNNING
7ede2f66
DP
748Adds the 64-bit
749.I time_running
750field.
f2b1d720 751This can be used to calculate estimated totals if
3d1ee497 752the PMU is overcommitted and multiplexing is happening.
f2b1d720
MK
753.TP
754.B PERF_FORMAT_ID
755Adds a 64-bit unique value that corresponds to the event group.
756.TP
757.B PERF_FORMAT_GROUP
758Allows all counter values in an event group to be read with one read.
759.RE
f2b1d720
MK
760.TP
761.IR "disabled"
762The
763.I disabled
764bit specifies whether the counter starts out disabled or enabled.
765If disabled, the event can later be enabled by
766.BR ioctl (2),
767.BR prctl (2),
768or
769.IR enable_on_exec .
406650db
VW
770
771When creating an event group, typically the group leader is initialized
772with
773.I disabled
774set to 1 and any child events are initialized with
775.I disabled
776set to 0.
777Despite
778.I disabled
779being 0, the child events will not start until the group leader
780is enabled.
f2b1d720
MK
781.TP
782.IR "inherit"
783The
784.I inherit
785bit specifies that this counter should count events of child
786tasks as well as the task specified.
33a0ccb2 787This applies only to new children, not to any existing children at
f2b1d720
MK
788the time the counter is created (nor to any new children of
789existing children).
790
791Inherit does not work for some combinations of
792.IR read_format s,
793such as
794.BR PERF_FORMAT_GROUP .
f2b1d720
MK
795.TP
796.IR "pinned"
797The
798.I pinned
799bit specifies that the counter should always be on the CPU if at all
800possible.
33a0ccb2 801It applies only to hardware counters and only to group leaders.
f2b1d720
MK
802If a pinned counter cannot be put onto the CPU (e.g., because there are
803not enough hardware counters or because of a conflict with some other
804event), then the counter goes into an 'error' state, where reads
805return end-of-file (i.e.,
806.BR read (2)
807returns 0) until the counter is subsequently enabled or disabled.
f2b1d720
MK
808.TP
809.IR "exclusive"
810The
811.I exclusive
812bit specifies that when this counter's group is on the CPU,
813it should be the only group using the CPU's counters.
814In the future this may allow monitoring programs to
815support PMU features that need to run alone so that they do not
816disrupt other hardware counters.
bea10c8c
VW
817
818Note that many unexpected situations may prevent events with the
819.I exclusive
d3532647 820bit set from ever running.
bea10c8c 821This includes any users running a system-wide
d3532647 822measurement as well as any kernel use of the performance counters
bea10c8c 823(including the commonly enabled NMI Watchdog Timer interface).
f2b1d720
MK
824.TP
825.IR "exclude_user"
ad73a2cc 826If this bit is set, the count excludes events that happen in user space.
f2b1d720
MK
827.TP
828.IR "exclude_kernel"
829If this bit is set, the count excludes events that happen in kernel-space.
f2b1d720
MK
830.TP
831.IR "exclude_hv"
832If this bit is set, the count excludes events that happen in the
833hypervisor.
834This is mainly for PMUs that have built-in support for handling this
835(such as POWER).
836Extra support is needed for handling hypervisor measurements on most
837machines.
f2b1d720
MK
838.TP
839.IR "exclude_idle"
840If set, don't count when the CPU is idle.
f2b1d720
MK
841.TP
842.IR "mmap"
843The
844.I mmap
75ee11e5 845bit enables generation of
cd7c700a 846.B PERF_RECORD_MMAP
75ee11e5
VW
847samples for every
848.BR mmap (2)
849call that has
cd7c700a 850.B PROT_EXEC
75ee11e5
VW
851set.
852This allows tools to notice new executable code being mapped into
853a program (dynamic shared libraries for example)
854so that addresses can be mapped back to the original code.
f2b1d720
MK
855.TP
856.IR "comm"
857The
858.I comm
859bit enables tracking of process command name as modified by the
cd7c700a 860.BR exec (2)
f2b1d720 861and
cd7c700a 862.BR prctl (PR_SET_NAME)
f2b1d720
MK
863system calls.
864Unfortunately for tools,
865there is no way to distinguish one system call versus the other.
f2b1d720
MK
866.TP
867.IR "freq"
868If this bit is set, then
869.I sample_frequency
870not
871.I sample_period
872is used when setting up the sampling interval.
f2b1d720
MK
873.TP
874.IR "inherit_stat"
875This bit enables saving of event counts on context switch for
876inherited tasks.
33a0ccb2 877This is meaningful only if the
f2b1d720
MK
878.I inherit
879field is set.
f2b1d720
MK
880.TP
881.IR "enable_on_exec"
882If this bit is set, a counter is automatically
883enabled after a call to
884.BR exec (2).
f2b1d720
MK
885.TP
886.IR "task"
887If this bit is set, then
888fork/exit notifications are included in the ring buffer.
f2b1d720
MK
889.TP
890.IR "watermark"
891If set, have a sampling interrupt happen when we cross the
892.I wakeup_watermark
893boundary.
2b9b829d 894Otherwise, interrupts happen after
f2b1d720
MK
895.I wakeup_events
896samples.
f2b1d720 897.TP
31c1f2b0 898.IR "precise_ip" " (since Linux 2.6.35)"
f2b1d720
MK
899This controls the amount of skid.
900Skid is how many instructions
901execute between an event of interest happening and the kernel
902being able to stop and record the event.
903Smaller skid is
904better and allows more accurate reporting of which events
905correspond to which instructions, but hardware is often limited
906with how small this can be.
907
908The values of this are the following:
909.RS
910.TP
9110 -
912.B SAMPLE_IP
2b538c3e 913can have arbitrary skid.
f2b1d720
MK
914.TP
9151 -
916.B SAMPLE_IP
2b538c3e 917must have constant skid.
f2b1d720
MK
918.TP
9192 -
920.B SAMPLE_IP
2b538c3e 921requested to have 0 skid.
f2b1d720
MK
922.TP
9233 -
924.B SAMPLE_IP
925must have 0 skid.
926See also
927.BR PERF_RECORD_MISC_EXACT_IP .
928.RE
f2b1d720 929.TP
31c1f2b0 930.IR "mmap_data" " (since Linux 2.6.36)"
f2b1d720
MK
931The counterpart of the
932.I mmap
75ee11e5
VW
933field.
934This enables generation of
cd7c700a 935.B PERF_RECORD_MMAP
75ee11e5
VW
936samples for
937.BR mmap (2)
938calls that do not have
cd7c700a 939.B PROT_EXEC
75ee11e5 940set (for example data and SysV shared memory).
f2b1d720 941.TP
31c1f2b0 942.IR "sample_id_all" " (since Linux 2.6.38)"
7480dabb 943If set, then TID, TIME, ID, STREAM_ID, and CPU can
f2b1d720
MK
944additionally be included in
945.RB non- PERF_RECORD_SAMPLE s
946if the corresponding
947.I sample_type
948is selected.
7480dabb 949
e9bd9b2c 950If
7480dabb 951.B PERF_SAMPLE_IDENTIFIER
37bee118 952is specified, then an additional ID value is included
7480dabb
VW
953as the last value to ease parsing the record stream.
954This may lead to the
e9bd9b2c 955.I id
7480dabb
VW
956value appearing twice.
957
958The layout is described by this pseudo-structure:
959.in +4n
960.nf
961struct sample_id {
962 { u32 pid, tid; } /* if PERF_SAMPLE_TID set */
963 { u64 time; } /* if PERF_SAMPLE_TIME set */
964 { u64 id; } /* if PERF_SAMPLE_ID set */
965 { u64 stream_id;} /* if PERF_SAMPLE_STREAM_ID set */
966 { u32 cpu, res; } /* if PERF_SAMPLE_CPU set */
967 { u64 id; } /* if PERF_SAMPLE_IDENTIFIER set */
968};
969.fi
f2b1d720 970.TP
31c1f2b0 971.IR "exclude_host" " (since Linux 3.2)"
33d6e2c7 972Do not measure time spent in VM host.
f2b1d720 973.TP
31c1f2b0 974.IR "exclude_guest" " (since Linux 3.2)"
33d6e2c7 975Do not measure time spent in VM guest.
f2b1d720 976.TP
31c1f2b0 977.IR "exclude_callchain_kernel" " (since Linux 3.7)"
f2b1d720 978Do not include kernel callchains.
f2b1d720 979.TP
31c1f2b0 980.IR "exclude_callchain_user" " (since Linux 3.7)"
f2b1d720 981Do not include user callchains.
f2b1d720
MK
982.TP
983.IR "wakeup_events" ", " "wakeup_watermark"
984This union sets how many samples
985.RI ( wakeup_events )
986or bytes
987.RI ( wakeup_watermark )
988happen before an overflow signal happens.
989Which one is used is selected by the
990.I watermark
cb8a928f 991bit flag.
751c0f1a
VW
992
993.I wakeup_events
994only counts
995.B PERF_RECORD_SAMPLE
51700fd7 996record types.
3d1ee497 997To receive a signal for every incoming
751c0f1a
VW
998.B PERF_RECORD
999type set
1000.I wakeup_watermark
1001to 1.
f2b1d720 1002.TP
31c1f2b0 1003.IR "bp_type" " (since Linux 2.6.33)"
f2b1d720
MK
1004This chooses the breakpoint type.
1005It is one of:
1006.RS
1007.TP
1008.BR HW_BREAKPOINT_EMPTY
2b538c3e 1009No breakpoint.
f2b1d720
MK
1010.TP
1011.BR HW_BREAKPOINT_R
2b538c3e 1012Count when we read the memory location.
f2b1d720
MK
1013.TP
1014.BR HW_BREAKPOINT_W
2b538c3e 1015Count when we write the memory location.
f2b1d720
MK
1016.TP
1017.BR HW_BREAKPOINT_RW
2b538c3e 1018Count when we read or write the memory location.
f2b1d720
MK
1019.TP
1020.BR HW_BREAKPOINT_X
2b538c3e 1021Count when we execute code at the memory location.
f2b1d720 1022.LP
7db515ef 1023The values can be combined via a bitwise or, but the
f2b1d720
MK
1024combination of
1025.B HW_BREAKPOINT_R
1026or
1027.B HW_BREAKPOINT_W
1028with
1029.B HW_BREAKPOINT_X
1030is not allowed.
1031.RE
f2b1d720 1032.TP
31c1f2b0 1033.IR "bp_addr" " (since Linux 2.6.33)"
f2b1d720
MK
1034.I bp_addr
1035address of the breakpoint.
1036For execution breakpoints this is the memory address of the instruction
1037of interest; for read and write breakpoints it is the memory address
1038of the memory location of interest.
f2b1d720 1039.TP
31c1f2b0 1040.IR "config1" " (since Linux 2.6.39)"
f2b1d720
MK
1041.I config1
1042is used for setting events that need an extra register or otherwise
1043do not fit in the regular config field.
1044Raw OFFCORE_EVENTS on Nehalem/Westmere/SandyBridge use this field
1045on 3.3 and later kernels.
f2b1d720 1046.TP
31c1f2b0 1047.IR "bp_len" " (since Linux 2.6.33)"
f2b1d720
MK
1048.I bp_len
1049is the length of the breakpoint being measured if
1050.I type
1051is
1052.BR PERF_TYPE_BREAKPOINT .
1053Options are
1054.BR HW_BREAKPOINT_LEN_1 ,
1055.BR HW_BREAKPOINT_LEN_2 ,
1056.BR HW_BREAKPOINT_LEN_4 ,
1057.BR HW_BREAKPOINT_LEN_8 .
1058For an execution breakpoint, set this to
1059.IR sizeof(long) .
f2b1d720 1060.TP
31c1f2b0 1061.IR "config2" " (since Linux 2.6.39)"
f2b1d720
MK
1062
1063.I config2
1064is a further extension of the
1065.I config1
1066field.
f2b1d720 1067.TP
31c1f2b0 1068.IR "branch_sample_type" " (since Linux 3.4)"
8a94e783 1069If
045bf4d3
VW
1070.B PERF_SAMPLE_BRANCH_STACK
1071is enabled, then this specifies what branches to include
1072in the branch record.
e3c9782b
VW
1073
1074The first part of the value is the privilege level, which
1075is a combination of one of the following values.
045bf4d3
VW
1076If the user does not set privilege level explicitly, the kernel
1077will use the event's privilege level.
1078Event and branch privilege levels do not have to match.
f2b1d720
MK
1079.RS
1080.TP
1081.B PERF_SAMPLE_BRANCH_USER
33d6e2c7 1082Branch target is in user space.
f2b1d720
MK
1083.TP
1084.B PERF_SAMPLE_BRANCH_KERNEL
33d6e2c7 1085Branch target is in kernel space.
f2b1d720
MK
1086.TP
1087.B PERF_SAMPLE_BRANCH_HV
33d6e2c7 1088Branch target is in hypervisor.
e3c9782b
VW
1089.TP
1090.B PERF_SAMPLE_BRANCH_PLM_ALL
1091A convenience value that is the three preceding values ORed together.
1092
1093.P
1094In addition to the privilege value, at least one or more of the
1095following bits must be set.
1096
f2b1d720
MK
1097.TP
1098.B PERF_SAMPLE_BRANCH_ANY
33d6e2c7 1099Any branch type.
f2b1d720
MK
1100.TP
1101.B PERF_SAMPLE_BRANCH_ANY_CALL
33d6e2c7 1102Any call branch.
f2b1d720
MK
1103.TP
1104.B PERF_SAMPLE_BRANCH_ANY_RETURN
33d6e2c7 1105Any return branch.
f2b1d720 1106.TP
e3c9782b 1107.B PERF_SAMPLE_BRANCH_IND_CALL
33d6e2c7 1108Indirect calls.
f2b1d720 1109.TP
31c1f2b0 1110.BR PERF_SAMPLE_BRANCH_ABORT_TX " (since Linux 3.11)"
33d6e2c7 1111Transactional memory aborts.
e3c9782b 1112.TP
31c1f2b0 1113.BR PERF_SAMPLE_BRANCH_IN_TX " (since Linux 3.11)"
33d6e2c7 1114Branch in transactional memory transaction.
e3c9782b 1115.TP
31c1f2b0 1116.BR PERF_SAMPLE_BRANCH_NO_TX " (since Linux 3.11)"
33d6e2c7 1117Branch not in transactional memory transaction.
f2b1d720 1118.RE
e3c9782b 1119
f2b1d720 1120.TP
31c1f2b0 1121.IR "sample_regs_user" " (since Linux 3.7)"
4651e412 1122This bit mask defines the set of user CPU registers to dump on samples.
76c637e1 1123The layout of the register mask is architecture-specific and
d1007d14
VW
1124described in the kernel header
1125.IR arch/ARCH/include/uapi/asm/perf_regs.h .
f2b1d720 1126.TP
31c1f2b0 1127.IR "sample_stack_user" " (since Linux 3.7)"
d1007d14
VW
1128This defines the size of the user stack to dump if
1129.B PERF_SAMPLE_STACK_USER
1130is specified.
73d8cece 1131.SS Reading results
f2b1d720 1132Once a
7db515ef 1133.BR perf_event_open ()
3d1ee497 1134file descriptor has been opened, the values
f2b1d720
MK
1135of the events can be read from the file descriptor.
1136The values that are there are specified by the
1137.I read_format
7db515ef
MK
1138field in the
1139.I attr
1140structure at open time.
f2b1d720
MK
1141
1142If you attempt to read into a buffer that is not big enough to hold the
1143data
1144.B ENOSPC
1145is returned
1146
1147Here is the layout of the data returned by a read:
e525b89f 1148.IP * 2
f2b1d720
MK
1149If
1150.B PERF_FORMAT_GROUP
1151was specified to allow reading all events in a group at once:
1152
1153.in +4n
1154.nf
1155struct read_format {
e525b89f
MK
1156 u64 nr; /* The number of events */
1157 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1158 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
50b2aa27 1159 struct
e525b89f
MK
1160 u64 value; /* The value of the event */
1161 u64 id; /* if PERF_FORMAT_ID */
f2b1d720
MK
1162 } values[nr];
1163};
1164.fi
1165.in
e525b89f 1166.IP *
f2b1d720
MK
1167If
1168.B PERF_FORMAT_GROUP
1169was
1170.I not
e525b89f 1171specified:
f2b1d720
MK
1172
1173.in +4n
1174.nf
1175struct read_format {
1176 u64 value; /* The value of the event */
1177 u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
1178 u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
1179 u64 id; /* if PERF_FORMAT_ID */
1180};
1181.fi
1182.in
e525b89f
MK
1183.PP
1184The values read are as follows:
f2b1d720
MK
1185.TP
1186.I nr
1187The number of events in this file descriptor.
1188Only available if
1189.B PERF_FORMAT_GROUP
1190was specified.
f2b1d720
MK
1191.TP
1192.IR time_enabled ", " time_running
1193Total time the event was enabled and running.
1194Normally these are the same.
37bee118
MK
1195If more events are started,
1196then available counter slots on the PMU, then multiplexing
33a0ccb2 1197happens and events run only part of the time.
37bee118 1198In that case, the
f2b1d720
MK
1199.I time_enabled
1200and
1201.I time running
1202values can be used to scale an estimated value for the count.
f2b1d720
MK
1203.TP
1204.I value
1205An unsigned 64-bit value containing the counter result.
f2b1d720
MK
1206.TP
1207.I id
1208A globally unique value for this particular event, only there if
1209.B PERF_FORMAT_ID
e525b89f
MK
1210was specified in
1211.IR read_format .
73d8cece 1212.SS MMAP layout
f2b1d720 1213When using
7db515ef 1214.BR perf_event_open ()
f2b1d720
MK
1215in sampled mode, asynchronous events
1216(like counter overflow or
1217.B PROT_EXEC
1218mmap tracking)
1219are logged into a ring-buffer.
1220This ring-buffer is created and accessed through
1221.BR mmap (2).
1222
1223The mmap size should be 1+2^n pages, where the first page is a
1224metadata page
e525b89f 1225.RI ( "struct perf_event_mmap_page" )
f2b1d720
MK
1226that contains various
1227bits of information such as where the ring-buffer head is.
1228
1229Before kernel 2.6.39, there is a bug that means you must allocate a mmap
1230ring buffer when sampling even if you do not plan to access it.
1231
1232The structure of the first metadata mmap page is as follows:
1233
1234.in +4n
1235.nf
1236struct perf_event_mmap_page {
ce88f77b
MK
1237 __u32 version; /* version number of this structure */
1238 __u32 compat_version; /* lowest version this is compat with */
1239 __u32 lock; /* seqlock for synchronization */
1240 __u32 index; /* hardware counter identifier */
1241 __s64 offset; /* add to hardware counter value */
1242 __u64 time_enabled; /* time event active */
1243 __u64 time_running; /* time event on CPU */
f2b1d720
MK
1244 union {
1245 __u64 capabilities;
135cba8b 1246 struct {
ce88f77b
MK
1247 __u64 cap_usr_time / cap_usr_rdpmc / cap_bit0 : 1,
1248 cap_bit0_is_deprecated : 1,
1249 cap_user_rdpmc : 1,
1250 cap_user_time : 1,
1251 cap_user_time_zero : 1,
135cba8b 1252 };
f2b1d720 1253 };
ce88f77b
MK
1254 __u16 pmc_width;
1255 __u16 time_shift;
1256 __u32 time_mult;
1257 __u64 time_offset;
1258 __u64 __reserved[120]; /* Pad to 1k */
1259 __u64 data_head; /* head in the data section */
1260 __u64 data_tail; /* user-space written tail */
f2b1d720
MK
1261}
1262.fi
1263.in
1264
ce88f77b 1265The following list describes the fields in the
f2b1d720 1266.I perf_event_mmap_page
e525b89f 1267structure in more detail:
f2b1d720
MK
1268.TP
1269.I version
1270Version number of this structure.
f2b1d720
MK
1271.TP
1272.I compat_version
1273The lowest version this is compatible with.
f2b1d720
MK
1274.TP
1275.I lock
1276A seqlock for synchronization.
f2b1d720
MK
1277.TP
1278.I index
1279A unique hardware counter identifier.
f2b1d720
MK
1280.TP
1281.I offset
135cba8b
VW
1282When using rdpmc for reads this offset value
1283must be added to the one returned by rdpmc to get
1284the current total event count.
f2b1d720
MK
1285.TP
1286.I time_enabled
1287Time the event was active.
f2b1d720
MK
1288.TP
1289.I time_running
1290Time the event was running.
f2b1d720 1291.TP
31c1f2b0 1292.IR cap_usr_time " / " cap_usr_rdpmc " / " cap_bit0 " (since Linux 3.4)"
e9bd9b2c 1293There was a bug in the definition of
f2b1d720 1294.I cap_usr_time
135cba8b
VW
1295and
1296.I cap_usr_rdpmc
1297from Linux 3.4 until Linux 3.11.
1298Both bits were defined to point to the same location, so it was
e9bd9b2c 1299impossible to know if
135cba8b
VW
1300.I cap_usr_time
1301or
1302.I cap_usr_rdpmc
1303were actually set.
1304
1305Starting with 3.12 these are renamed to
1306.I cap_bit0
1307and you should use the new
1308.I cap_user_time
1309and
1310.I cap_user_rdpmc
1311fields instead.
1312
f2b1d720 1313.TP
31c1f2b0 1314.IR cap_bit0_is_deprecated " (since Linux 3.12)"
37bee118 1315If set, this bit indicates that the kernel supports
135cba8b
VW
1316the properly separated
1317.I cap_user_time
1318and
1319.I cap_user_rdpmc
1320bits.
1321
1322If not-set, it indicates an older kernel where
1323.I cap_usr_time
1324and
f2b1d720 1325.I cap_usr_rdpmc
135cba8b
VW
1326map to the same bit and thus both features should
1327be used with caution.
1328
1329.TP
31c1f2b0 1330.IR cap_user_rdpmc " (since Linux 3.12)"
f2b1d720
MK
1331If the hardware supports user-space read of performance counters
1332without syscall (this is the "rdpmc" instruction on x86), then
1333the following code can be used to do a read:
1334
1335.in +4n
1336.nf
1337u32 seq, time_mult, time_shift, idx, width;
1338u64 count, enabled, running;
1339u64 cyc, time_offset;
f2b1d720
MK
1340
1341do {
1342 seq = pc\->lock;
1343 barrier();
1344 enabled = pc\->time_enabled;
1345 running = pc\->time_running;
1346
1347 if (pc\->cap_usr_time && enabled != running) {
1348 cyc = rdtsc();
1349 time_offset = pc\->time_offset;
1350 time_mult = pc\->time_mult;
1351 time_shift = pc\->time_shift;
1352 }
1353
1354 idx = pc\->index;
1355 count = pc\->offset;
1356
1357 if (pc\->cap_usr_rdpmc && idx) {
1358 width = pc\->pmc_width;
135cba8b 1359 count += rdpmc(idx \- 1);
f2b1d720
MK
1360 }
1361
1362 barrier();
1363} while (pc\->lock != seq);
1364.fi
1365.in
f2b1d720 1366.TP
31c1f2b0 1367.I cap_user_time " (since Linux 3.12)"
7d182bb6 1368This bit indicates the hardware has a constant, nonstop
135cba8b
VW
1369timestamp counter (TSC on x86).
1370.TP
31c1f2b0 1371.IR cap_user_time_zero " (since Linux 3.12)"
135cba8b
VW
1372Indicates the presence of
1373.I time_zero
1374which allows mapping timestamp values to
1375the hardware clock.
1376.TP
f2b1d720
MK
1377.I pmc_width
1378If
1379.IR cap_usr_rdpmc ,
1380this field provides the bit-width of the value
1381read using the rdpmc or equivalent instruction.
1382This can be used to sign extend the result like:
1383
1384.in +4n
1385.nf
1386pmc <<= 64 \- pmc_width;
1387pmc >>= 64 \- pmc_width; // signed shift right
1388count += pmc;
1389.fi
1390.in
f2b1d720
MK
1391.TP
1392.IR time_shift ", " time_mult ", " time_offset
1393
1394If
1395.IR cap_usr_time ,
1396these fields can be used to compute the time
7db515ef 1397delta since time_enabled (in nanoseconds) using rdtsc or similar.
f2b1d720
MK
1398.nf
1399
1400 u64 quot, rem;
1401 u64 delta;
1402 quot = (cyc >> time_shift);
1403 rem = cyc & ((1 << time_shift) \- 1);
1404 delta = time_offset + quot * time_mult +
1405 ((rem * time_mult) >> time_shift);
1406.fi
1407
7db515ef
MK
1408Where
1409.IR time_offset ,
1410.IR time_mult ,
1411.IR time_shift ,
1412and
1413.IR cyc
1414are read in the
f2b1d720
MK
1415seqcount loop described above.
1416This delta can then be added to
1417enabled and possible running (if idx), improving the scaling:
1418.nf
1419
1420 enabled += delta;
1421 if (idx)
1422 running += delta;
1423 quot = count / running;
1424 rem = count % running;
1425 count = quot * enabled + (rem * enabled) / running;
1426.fi
f2b1d720 1427.TP
31c1f2b0 1428.IR time_zero " (since Linux 3.12)"
135cba8b 1429
e9bd9b2c 1430If
135cba8b 1431.I cap_usr_time_zero
37bee118 1432is set, then the hardware clock (the TSC timestamp counter on x86)
135cba8b
VW
1433can be calculated from the
1434.IR time_zero ", " time_mult ", and " time_shift " values:"
ce88f77b 1435
135cba8b
VW
1436.nf
1437 time = timestamp - time_zero;
1438 quot = time / time_mult;
1439 rem = time % time_mult;
1440 cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
1441.fi
ce88f77b 1442
135cba8b 1443And vice versa:
ce88f77b 1444
135cba8b
VW
1445.nf
1446 quot = cyc >> time_shift;
1447 rem = cyc & ((1 << time_shift) - 1);
1448 timestamp = time_zero + quot * time_mult +
1449 ((rem * time_mult) >> time_shift);
1450.fi
1451.TP
f2b1d720
MK
1452.I data_head
1453This points to the head of the data section.
7db515ef
MK
1454The value continuously increases, it does not wrap.
1455The value needs to be manually wrapped by the size of the mmap buffer
f2b1d720
MK
1456before accessing the samples.
1457
ce88f77b
MK
1458On SMP-capable platforms, after reading the
1459.I data_head
1460value,
ad73a2cc 1461user space should issue an rmb().
f2b1d720 1462.TP
fecd584f 1463.I data_tail
f2b1d720
MK
1464When the mapping is
1465.BR PROT_WRITE ,
7db515ef
MK
1466the
1467.I data_tail
1468value should be written by user space to reflect the last read data.
31020de9 1469In this case, the kernel will not overwrite unread data.
e525b89f 1470.PP
f2b1d720
MK
1471The following 2^n ring-buffer pages have the layout described below.
1472
1473If
1474.I perf_event_attr.sample_id_all
1475is set, then all event types will
1476have the sample_type selected fields related to where/when (identity)
1477an event took place (TID, TIME, ID, CPU, STREAM_ID) described in
1478.B PERF_RECORD_SAMPLE
1479below, it will be stashed just after the
7db515ef
MK
1480.I perf_event_header
1481and the fields already present for the existing
3d1ee497 1482fields, that is, at the end of the payload.
f2b1d720
MK
1483That way a newer perf.data
1484file will be supported by older perf tools, with these new optional
1485fields being ignored.
1486
1487The mmap values start with a header:
1488
1489.in +4n
1490.nf
1491struct perf_event_header {
1492 __u32 type;
1493 __u16 misc;
1494 __u16 size;
1495};
1496.fi
1497.in
1498
1499Below, we describe the
1500.I perf_event_header
1501fields in more detail.
4047bc6c
MK
1502For ease of reading,
1503the fields with shorter descriptions are presented first.
1504.TP
1505.I size
1506This indicates the size of the record.
1507.TP
1508.I misc
1509The
1510.I misc
1511field contains additional information about the sample.
1512
1513The CPU mode can be determined from this value by masking with
1514.B PERF_RECORD_MISC_CPUMODE_MASK
1515and looking for one of the following (note these are not
1516bit masks, only one can be set at a time):
1517.RS
1518.TP
1519.B PERF_RECORD_MISC_CPUMODE_UNKNOWN
1520Unknown CPU mode.
1521.TP
1522.B PERF_RECORD_MISC_KERNEL
1523Sample happened in the kernel.
1524.TP
1525.B PERF_RECORD_MISC_USER
1526Sample happened in user code.
1527.TP
1528.B PERF_RECORD_MISC_HYPERVISOR
1529Sample happened in the hypervisor.
1530.TP
1531.B PERF_RECORD_MISC_GUEST_KERNEL
1532Sample happened in the guest kernel.
1533.TP
1534.B PERF_RECORD_MISC_GUEST_USER
1535Sample happened in guest user code.
1536.RE
1537
1538.RS
1539In addition, one of the following bits can be set:
1540.TP
1541.B PERF_RECORD_MISC_MMAP_DATA
1542This is set when the mapping is not executable;
1543otherwise the mapping is executable.
1544.TP
1545.B PERF_RECORD_MISC_EXACT_IP
1546This indicates that the content of
1547.B PERF_SAMPLE_IP
1548points
1549to the actual instruction that triggered the event.
1550See also
1551.IR perf_event_attr.precise_ip .
1552.TP
1553.B PERF_RECORD_MISC_EXT_RESERVED
1554This indicates there is extended data available (currently not used).
1555.RE
f2b1d720
MK
1556.TP
1557.I type
1558The
1559.I type
1560value is one of the below.
1561The values in the corresponding record (that follows the header)
1562depend on the
1563.I type
1564selected as shown.
7480dabb 1565
f2b1d720 1566.RS
7db515ef 1567.TP 4
f2b1d720
MK
1568.B PERF_RECORD_MMAP
1569The MMAP events record the
1570.B PROT_EXEC
1571mappings so that we can correlate
ad73a2cc 1572user-space IPs to code.
f2b1d720
MK
1573They have the following structure:
1574
1575.in +4n
1576.nf
1577struct {
1578 struct perf_event_header header;
1579 u32 pid, tid;
1580 u64 addr;
1581 u64 len;
1582 u64 pgoff;
1583 char filename[];
1584};
1585.fi
1586.in
f2b1d720
MK
1587.TP
1588.B PERF_RECORD_LOST
1589This record indicates when events are lost.
1590
1591.in +4n
1592.nf
1593struct {
1594 struct perf_event_header header;
1595 u64 id;
1596 u64 lost;
7480dabb 1597 struct sample_id sample_id;
f2b1d720
MK
1598};
1599.fi
1600.in
f2b1d720
MK
1601.RS
1602.TP
1603.I id
1604is the unique event ID for the samples that were lost.
1605.TP
1606.I lost
1607is the number of events that were lost.
1608.RE
f2b1d720
MK
1609.TP
1610.B PERF_RECORD_COMM
1611This record indicates a change in the process name.
1612
1613.in +4n
1614.nf
1615struct {
1616 struct perf_event_header header;
1617 u32 pid, tid;
1618 char comm[];
7480dabb 1619 struct sample_id sample_id;
f2b1d720
MK
1620};
1621.fi
1622.in
f2b1d720
MK
1623.TP
1624.B PERF_RECORD_EXIT
1625This record indicates a process exit event.
1626
1627.in +4n
1628.nf
1629struct {
1630 struct perf_event_header header;
1631 u32 pid, ppid;
1632 u32 tid, ptid;
1633 u64 time;
7480dabb 1634 struct sample_id sample_id;
f2b1d720
MK
1635};
1636.fi
1637.in
f2b1d720
MK
1638.TP
1639.BR PERF_RECORD_THROTTLE ", " PERF_RECORD_UNTHROTTLE
1640This record indicates a throttle/unthrottle event.
1641
1642.in +4n
1643.nf
1644struct {
1645 struct perf_event_header header;
1646 u64 time;
1647 u64 id;
1648 u64 stream_id;
7480dabb 1649 struct sample_id sample_id;
f2b1d720
MK
1650};
1651.fi
1652.in
f2b1d720
MK
1653.TP
1654.B PERF_RECORD_FORK
1655This record indicates a fork event.
1656
1657.in +4n
1658.nf
1659struct {
1660 struct perf_event_header header;
1661 u32 pid, ppid;
1662 u32 tid, ptid;
1663 u64 time;
7480dabb 1664 struct sample_id sample_id;
f2b1d720
MK
1665};
1666.fi
1667.in
f2b1d720
MK
1668.TP
1669.B PERF_RECORD_READ
1670This record indicates a read event.
1671
1672.in +4n
1673.nf
1674struct {
1675 struct perf_event_header header;
1676 u32 pid, tid;
1677 struct read_format values;
7480dabb 1678 struct sample_id sample_id;
f2b1d720
MK
1679};
1680.fi
1681.in
f2b1d720
MK
1682.TP
1683.B PERF_RECORD_SAMPLE
1684This record indicates a sample.
1685
1686.in +4n
1687.nf
1688struct {
1689 struct perf_event_header header;
7480dabb 1690 u64 sample_id; /* if PERF_SAMPLE_IDENTIFIER */
7db515ef
MK
1691 u64 ip; /* if PERF_SAMPLE_IP */
1692 u32 pid, tid; /* if PERF_SAMPLE_TID */
1693 u64 time; /* if PERF_SAMPLE_TIME */
1694 u64 addr; /* if PERF_SAMPLE_ADDR */
1695 u64 id; /* if PERF_SAMPLE_ID */
1696 u64 stream_id; /* if PERF_SAMPLE_STREAM_ID */
1697 u32 cpu, res; /* if PERF_SAMPLE_CPU */
1698 u64 period; /* if PERF_SAMPLE_PERIOD */
f2b1d720 1699 struct read_format v; /* if PERF_SAMPLE_READ */
7db515ef
MK
1700 u64 nr; /* if PERF_SAMPLE_CALLCHAIN */
1701 u64 ips[nr]; /* if PERF_SAMPLE_CALLCHAIN */
1702 u32 size; /* if PERF_SAMPLE_RAW */
1703 char data[size]; /* if PERF_SAMPLE_RAW */
1704 u64 bnr; /* if PERF_SAMPLE_BRANCH_STACK */
1705 struct perf_branch_entry lbr[bnr];
1706 /* if PERF_SAMPLE_BRANCH_STACK */
1707 u64 abi; /* if PERF_SAMPLE_REGS_USER */
1708 u64 regs[weight(mask)];
1709 /* if PERF_SAMPLE_REGS_USER */
1710 u64 size; /* if PERF_SAMPLE_STACK_USER */
1711 char data[size]; /* if PERF_SAMPLE_STACK_USER */
1712 u64 dyn_size; /* if PERF_SAMPLE_STACK_USER */
d1007d14
VW
1713 u64 weight; /* if PERF_SAMPLE_WEIGHT */
1714 u64 data_src; /* if PERF_SAMPLE_DATA_SRC */
1e043959 1715 u64 transaction;/* if PERF_SAMPLE_TRANSACTION */
f2b1d720
MK
1716};
1717.fi
4047bc6c
MK
1718.RS 4
1719.TP 4
7480dabb
VW
1720.I sample_id
1721If
1722.B PERF_SAMPLE_IDENTIFIER
1723is enabled, a 64-bit unique ID is included.
e9bd9b2c 1724This is a duplication of the
7480dabb
VW
1725.B PERF_SAMPLE_ID
1726.I id
1727value, but included at the beginning of the sample
1728so parsers can easily obtain the value.
1729.TP
f2b1d720 1730.I ip
7db515ef
MK
1731If
1732.B PERF_SAMPLE_IP
1733is enabled, then a 64-bit instruction
f2b1d720 1734pointer value is included.
f2b1d720 1735.TP
7db515ef
MK
1736.IR pid ", " tid
1737If
1738.B PERF_SAMPLE_TID
1739is enabled, then a 32-bit process ID
1740and 32-bit thread ID are included.
f2b1d720
MK
1741.TP
1742.I time
7db515ef
MK
1743If
1744.B PERF_SAMPLE_TIME
1745is enabled, then a 64-bit timestamp
f2b1d720
MK
1746is included.
1747This is obtained via local_clock() which is a hardware timestamp
1748if available and the jiffies value if not.
f2b1d720
MK
1749.TP
1750.I addr
7db515ef
MK
1751If
1752.B PERF_SAMPLE_ADDR
1753is enabled, then a 64-bit address is included.
f2b1d720
MK
1754This is usually the address of a tracepoint,
1755breakpoint, or software event; otherwise the value is 0.
f2b1d720
MK
1756.TP
1757.I id
7db515ef
MK
1758If
1759.B PERF_SAMPLE_ID
1760is enabled, a 64-bit unique ID is included.
f2b1d720 1761If the event is a member of an event group, the group leader ID is returned.
7db515ef
MK
1762This ID is the same as the one returned by
1763.BR PERF_FORMAT_ID .
f2b1d720
MK
1764.TP
1765.I stream_id
7db515ef
MK
1766If
1767.B PERF_SAMPLE_STREAM_ID
1768is enabled, a 64-bit unique ID is included.
f2b1d720
MK
1769Unlike
1770.B PERF_SAMPLE_ID
1771the actual ID is returned, not the group leader.
7db515ef
MK
1772This ID is the same as the one returned by
1773.BR PERF_FORMAT_ID .
f2b1d720 1774.TP
7db515ef
MK
1775.IR cpu ", " res
1776If
1777.B PERF_SAMPLE_CPU
1778is enabled, this is a 32-bit value indicating
f2b1d720
MK
1779which CPU was being used, in addition to a reserved (unused)
178032-bit value.
f2b1d720
MK
1781.TP
1782.I period
7db515ef
MK
1783If
1784.B PERF_SAMPLE_PERIOD
1785is enabled, a 64-bit value indicating
f2b1d720 1786the current sampling period is written.
f2b1d720
MK
1787.TP
1788.I v
7db515ef
MK
1789If
1790.B PERF_SAMPLE_READ
1791is enabled, a structure of type read_format
f2b1d720
MK
1792is included which has values for all events in the event group.
1793The values included depend on the
1794.I read_format
7db515ef
MK
1795value used at
1796.BR perf_event_open ()
1797time.
f2b1d720 1798.TP
7db515ef
MK
1799.IR nr ", " ips[nr]
1800If
1801.B PERF_SAMPLE_CALLCHAIN
1802is enabled, then a 64-bit number is included
f2b1d720 1803which indicates how many following 64-bit instruction pointers will
7db515ef
MK
1804follow.
1805This is the current callchain.
f2b1d720 1806.TP
7ede2f66 1807.IR size ", " data[size]
7db515ef
MK
1808If
1809.B PERF_SAMPLE_RAW
1810is enabled, then a 32-bit value indicating size
f2b1d720
MK
1811is included followed by an array of 8-bit values of length size.
1812The values are padded with 0 to have 64-bit alignment.
1813
1814This RAW record data is opaque with respect to the ABI.
1815The ABI doesn't make any promises with respect to the stability
1816of its content, it may vary depending
1817on event, hardware, and kernel version.
f2b1d720 1818.TP
7db515ef
MK
1819.IR bnr ", " lbr[bnr]
1820If
1821.B PERF_SAMPLE_BRANCH_STACK
1822is enabled, then a 64-bit value indicating
1823the number of records is included, followed by
1824.I bnr
1825.I perf_branch_entry
045bf4d3
VW
1826structures which each include the fields:
1827.RS
1828.TP
1829.I from
2b538c3e 1830This indicates the source instruction (may not be a branch).
045bf4d3
VW
1831.TP
1832.I to
2b538c3e 1833The branch target.
045bf4d3
VW
1834.TP
1835.I mispred
2b538c3e 1836The branch target was mispredicted.
045bf4d3
VW
1837.TP
1838.I predicted
2b538c3e 1839The branch target was predicted.
e3c9782b 1840.TP
31c1f2b0 1841.IR in_tx " (since Linux 3.11)"
2b538c3e 1842The branch was in a transactional memory transaction.
e3c9782b 1843.TP
31c1f2b0 1844.IR abort " (since Linux 3.11)"
2b538c3e 1845The branch was in an aborted transactional memory transaction.
e3c9782b
VW
1846
1847.P
045bf4d3
VW
1848The entries are from most to least recent, so the first entry
1849has the most recent branch.
1850
8a94e783
MK
1851Support for
1852.I mispred
1853and
1854.I predicted
baf7029b 1855is optional; if not supported, both
045bf4d3
VW
1856values will be 0.
1857
e3c9782b
VW
1858The type of branches recorded is specified by the
1859.I branch_sample_type
1860field.
1861.RE
1862
f2b1d720 1863.TP
7db515ef
MK
1864.IR abi ", " regs[weight(mask)]
1865If
1866.B PERF_SAMPLE_REGS_USER
d1007d14 1867is enabled, then the user CPU registers are recorded.
f2b1d720
MK
1868
1869The
1870.I abi
1871field is one of
1872.BR PERF_SAMPLE_REGS_ABI_NONE ", " PERF_SAMPLE_REGS_ABI_32 " or "
7db515ef 1873.BR PERF_SAMPLE_REGS_ABI_64 .
d1007d14
VW
1874
1875The
1876.I regs
1877field is an array of the CPU registers that were specified by
1878the
1879.I sample_regs_user
1880attr field.
1881The number of values is the number of bits set in the
51700fd7 1882.I sample_regs_user
4651e412 1883bit mask.
f2b1d720 1884.TP
7db515ef
MK
1885.IR size ", " data[size] ", " dyn_size
1886If
1887.B PERF_SAMPLE_STACK_USER
d1007d14
VW
1888is enabled, then record the user stack to enable backtracing.
1889.I size
1890is the size requested by the user in
1891.I stack_user_size
1892or else the maximum record size.
1893.I data
1894is the stack data.
1895.I dyn_size
1896is the amount of data actually dumped (can be less than
460e3d7a 1897.IR size ).
d1007d14 1898.TP
51700fd7 1899.I weight
d1007d14
VW
1900If
1901.B PERF_SAMPLE_WEIGHT
7de4a1e3 1902is enabled, then a 64-bit value provided by the hardware
d1007d14
VW
1903is recorded that indicates how costly the event was.
1904This allows expensive events to stand out more clearly
1905in profiles.
1906.TP
1907.I data_src
51700fd7 1908If
d1007d14 1909.B PERF_SAMPLE_DATA_SRC
7de4a1e3 1910is enabled, then a 64-bit value is recorded that is made up of
d1007d14
VW
1911the following fields:
1912.RS
2b538c3e 1913.TP 4
d1007d14 1914.I mem_op
2b538c3e
MK
1915Type of opcode, a bitwise combination of:
1916
1917.PD 0
1918.RS
1919.TP 24
d1007d14 1920.B PERF_MEM_OP_NA
2b538c3e
MK
1921Not available
1922.TP
d1007d14 1923.B PERF_MEM_OP_LOAD
2b538c3e
MK
1924Load instruction
1925.TP
d1007d14 1926.B PERF_MEM_OP_STORE
2b538c3e
MK
1927Store instruction
1928.TP
d1007d14 1929.B PERF_MEM_OP_PFETCH
2b538c3e
MK
1930Prefetch
1931.TP
d1007d14 1932.B PERF_MEM_OP_EXEC
2b538c3e
MK
1933Executable code
1934.RE
1935.PD
d1007d14
VW
1936.TP
1937.I mem_lvl
2b538c3e
MK
1938Memory hierarchy level hit or miss, a bitwise combination of:
1939
1940.PD 0
1941.RS
1942.TP 24
d1007d14 1943.B PERF_MEM_LVL_NA
2b538c3e
MK
1944Not available
1945.TP
d1007d14 1946.B PERF_MEM_LVL_HIT
2b538c3e
MK
1947Hit
1948.TP
d1007d14 1949.B PERF_MEM_LVL_MISS
2b538c3e
MK
1950Miss
1951.TP
d1007d14 1952.B PERF_MEM_LVL_L1
2b538c3e
MK
1953Level 1 cache
1954.TP
d1007d14 1955.B PERF_MEM_LVL_LFB
2b538c3e
MK
1956Line fill buffer
1957.TP
d1007d14 1958.B PERF_MEM_LVL_L2
2b538c3e
MK
1959Level 2 cache
1960.TP
d1007d14 1961.B PERF_MEM_LVL_L3
2b538c3e
MK
1962Level 3 cache
1963.TP
d1007d14 1964.B PERF_MEM_LVL_LOC_RAM
2b538c3e
MK
1965Local DRAM
1966.TP
d1007d14 1967.B PERF_MEM_LVL_REM_RAM1
2b538c3e
MK
1968Remote DRAM 1 hop
1969.TP
d1007d14 1970.B PERF_MEM_LVL_REM_RAM2
2b538c3e
MK
1971Remote DRAM 2 hops
1972.TP
d1007d14 1973.B PERF_MEM_LVL_REM_CCE1
2b538c3e
MK
1974Remote cache 1 hop
1975.TP
d1007d14 1976.B PERF_MEM_LVL_REM_CCE2
2b538c3e
MK
1977Remote cache 2 hops
1978.TP
d1007d14 1979.B PERF_MEM_LVL_IO
2b538c3e
MK
1980I/O memory
1981.TP
d1007d14 1982.B PERF_MEM_LVL_UNC
2b538c3e
MK
1983Uncached memory
1984.RE
1985.PD
d1007d14
VW
1986.TP
1987.I mem_snoop
2b538c3e
MK
1988Snoop mode, a bitwise combination of:
1989
1990.PD 0
1991.RS
1992.TP 24
d1007d14 1993.B PERF_MEM_SNOOP_NA
2b538c3e
MK
1994Not available
1995.TP
d1007d14 1996.B PERF_MEM_SNOOP_NONE
2b538c3e
MK
1997No snoop
1998.TP
d1007d14 1999.B PERF_MEM_SNOOP_HIT
2b538c3e
MK
2000Snoop hit
2001.TP
d1007d14 2002.B PERF_MEM_SNOOP_MISS
2b538c3e
MK
2003Snoop miss
2004.TP
d1007d14 2005.B PERF_MEM_SNOOP_HITM
2b538c3e
MK
2006Snoop hit modified
2007.RE
2008.PD
d1007d14
VW
2009.TP
2010.I mem_lock
2b538c3e
MK
2011Lock instruction, a bitwise combination of:
2012
2013.PD 0
2014.RS
2015.TP 24
d1007d14 2016.B PERF_MEM_LOCK_NA
2b538c3e
MK
2017Not available
2018.TP
d1007d14 2019.B PERF_MEM_LOCK_LOCKED
2b538c3e
MK
2020Locked transaction
2021.RE
2022.PD
d1007d14
VW
2023.TP
2024.I mem_dtlb
2b538c3e
MK
2025TLB access hit or miss, a bitwise combination of:
2026
2027.PD 0
2028.RS
2029.TP 24
d1007d14 2030.B PERF_MEM_TLB_NA
2b538c3e
MK
2031Not available
2032.TP
d1007d14 2033.B PERF_MEM_TLB_HIT
2b538c3e
MK
2034Hit
2035.TP
d1007d14 2036.B PERF_MEM_TLB_MISS
2b538c3e
MK
2037Miss
2038.TP
d1007d14 2039.B PERF_MEM_TLB_L1
2b538c3e
MK
2040Level 1 TLB
2041.TP
d1007d14 2042.B PERF_MEM_TLB_L2
2b538c3e
MK
2043Level 2 TLB
2044.TP
d1007d14 2045.B PERF_MEM_TLB_WK
2b538c3e
MK
2046Hardware walker
2047.TP
d1007d14 2048.B PERF_MEM_TLB_OS
2b538c3e
MK
2049OS fault handler
2050.RE
2051.PD
d1007d14 2052.RE
1e043959
VW
2053.TP
2054.I transaction
2055If the
2056.B PERF_SAMPLE_TRANSACTION
37bee118 2057flag is set, then a 64-bit field is recorded describing
1e043959
VW
2058the sources of any transactional memory aborts.
2059
2060The field is a bitwise combination of the following values:
2061.RS
2062.TP
2063.B PERF_TXN_ELISION
b3f39642 2064Abort from an elision type transaction (Intel-CPU-specific).
1e043959
VW
2065.TP
2066.B PERF_TXN_TRANSACTION
b3f39642 2067Abort from a generic transaction.
1e043959
VW
2068.TP
2069.B PERF_TXN_SYNC
b3f39642 2070Synchronous abort (related to the reported instruction).
1e043959
VW
2071.TP
2072.B PERF_TXN_ASYNC
b3f39642 2073Asynchronous abort (not related to the reported instruction).
1e043959
VW
2074.TP
2075.B PERF_TXN_RETRY
053a3e08 2076Retryable abort (retrying the transaction may have succeeded).
1e043959
VW
2077.TP
2078.B PERF_TXN_CONFLICT
b3f39642 2079Abort due to memory conflicts with other threads.
1e043959
VW
2080.TP
2081.B PERF_TXN_CAPACITY_WRITE
b3f39642 2082Abort due to write capacity overflow.
1e043959
VW
2083.TP
2084.B PERF_TXN_CAPACITY_READ
b3f39642 2085Abort due to read capacity overflow.
1e043959 2086.RE
b3f39642
MK
2087.IP
2088In addition, a user-specified abort code can be obtained from
2089the high 32 bits of the field by shifting right by
1e043959
VW
2090.B PERF_TXN_ABORT_SHIFT
2091and masking with
2092.BR PERF_TXN_ABORT_MASK .
f2b1d720 2093.RE
f2b1d720 2094.RE
73d8cece 2095.SS Signal overflow
f2b1d720
MK
2096Events can be set to deliver a signal when a threshold is crossed.
2097The signal handler is set up using the
2098.BR poll (2),
2099.BR select (2),
2100.BR epoll (2)
2101and
2102.BR fcntl (2),
2103system calls.
2104
2105To generate signals, sampling must be enabled
2106.RI ( sample_period
7d182bb6 2107must have a nonzero value).
f2b1d720
MK
2108
2109There are two ways to generate signals.
2110
2111The first is to set a
2112.I wakeup_events
2113or
2114.I wakeup_watermark
2115value that will generate a signal if a certain number of samples
2116or bytes have been written to the mmap ring buffer.
31020de9 2117In this case, a signal of type
7db515ef
MK
2118.B POLL_IN
2119is sent.
f2b1d720
MK
2120
2121The other way is by use of the
7db515ef 2122.B PERF_EVENT_IOC_REFRESH
f2b1d720
MK
2123ioctl.
2124This ioctl adds to a counter that decrements each time the event overflows.
7d182bb6 2125When nonzero, a
7db515ef
MK
2126.B POLL_IN
2127signal is sent on overflow, but
2128once the value reaches 0, a signal is sent of type
2129.B POLL_HUP
2130and
f2b1d720
MK
2131the underlying event is disabled.
2132
2133Note: on newer kernels (definitely noticed with 3.2)
7db515ef 2134.\" FIXME(Vince) : Find out when this was introduced
f2b1d720
MK
2135a signal is provided for every overflow, even if
2136.I wakeup_events
2137is not set.
73d8cece 2138.SS rdpmc instruction
f2b1d720
MK
2139Starting with Linux 3.4 on x86, you can use the
2140.I rdpmc
2141instruction to get low-latency reads without having to enter the kernel.
2142Note that using
2143.I rdpmc
2144is not necessarily faster than other methods for reading event values.
2145
2146Support for this can be detected with the
2147.I cap_usr_rdpmc
2148field in the mmap page; documentation on how
2149to calculate event values can be found in that section.
73d8cece 2150.SS perf_event ioctl calls
f2b1d720
MK
2151.PP
2152Various ioctls act on
7db515ef 2153.BR perf_event_open ()
ce88f77b 2154file descriptors:
f2b1d720
MK
2155.TP
2156.B PERF_EVENT_IOC_ENABLE
ce88f77b 2157This enables the individual event or event group specified by the
7db515ef 2158file descriptor argument.
f2b1d720 2159
51700fd7 2160If the
8cc8b90d 2161.B PERF_IOC_FLAG_GROUP
51700fd7 2162bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2163enabled, even if the event specified is not the group leader
2164(but see BUGS).
f2b1d720
MK
2165.TP
2166.B PERF_EVENT_IOC_DISABLE
ce88f77b 2167This disables the individual counter or event group specified by the
7db515ef 2168file descriptor argument.
f2b1d720
MK
2169
2170Enabling or disabling the leader of a group enables or disables the
2171entire group; that is, while the group leader is disabled, none of the
2172counters in the group will count.
33a0ccb2
MK
2173Enabling or disabling a member of a group other than the leader
2174affects only that counter; disabling a non-leader
f2b1d720
MK
2175stops that counter from counting but doesn't affect any other counter.
2176
51700fd7 2177If the
8cc8b90d 2178.B PERF_IOC_FLAG_GROUP
51700fd7 2179bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2180disabled, even if the event specified is not the group leader
2181(but see BUGS).
f2b1d720
MK
2182.TP
2183.B PERF_EVENT_IOC_REFRESH
2184Non-inherited overflow counters can use this
2185to enable a counter for a number of overflows specified by the argument,
2186after which it is disabled.
2187Subsequent calls of this ioctl add the argument value to the current
2188count.
7db515ef
MK
2189A signal with
2190.B POLL_IN
2191set will happen on each overflow until the
2192count reaches 0; when that happens a signal with
2193POLL_HUP
2194set is sent and the event is disabled.
f2b1d720 2195Using an argument of 0 is considered undefined behavior.
f2b1d720
MK
2196.TP
2197.B PERF_EVENT_IOC_RESET
36127c0e 2198Reset the event count specified by the
6061d29f 2199file descriptor argument to zero.
33a0ccb2 2200This resets only the counts; there is no way to reset the
f2b1d720
MK
2201multiplexing
2202.I time_enabled
2203or
2204.I time_running
2205values.
f2b1d720 2206
51700fd7 2207If the
8cc8b90d 2208.B PERF_IOC_FLAG_GROUP
51700fd7 2209bit is set in the ioctl argument, then all events in a group are
dbc01ecd
VW
2210reset, even if the event specified is not the group leader
2211(but see BUGS).
f2b1d720
MK
2212.TP
2213.B PERF_EVENT_IOC_PERIOD
e6cf5694 2214This updates the overflow period for the event.
3f118a29 2215
ed81fdd9 2216Since Linux 3.7 (on ARM) and Linux 3.14 (all other architectures),
3f118a29 2217the new period takes effect immediately.
ed81fdd9 2218On older kernels, the new period did not take effect until
3f118a29 2219after the next overflow.
f2b1d720
MK
2220
2221The argument is a pointer to a 64-bit value containing the
2222desired new period.
e6cf5694
VW
2223
2224Prior to Linux 2.6.36 this ioctl always failed due to a bug
2225in the kernel.
2226
f2b1d720
MK
2227.TP
2228.B PERF_EVENT_IOC_SET_OUTPUT
2229This tells the kernel to report event notifications to the specified
2230file descriptor rather than the default one.
2231The file descriptors must all be on the same CPU.
2232
2233The argument specifies the desired file descriptor, or \-1 if
2234output should be ignored.
f2b1d720 2235.TP
31c1f2b0 2236.BR PERF_EVENT_IOC_SET_FILTER " (since Linux 2.6.33)"
f2b1d720
MK
2237This adds an ftrace filter to this event.
2238
2239The argument is a pointer to the desired ftrace filter.
a0dcc8dd 2240.TP
31c1f2b0 2241.BR PERF_EVENT_IOC_ID " (since Linux 3.12)"
bec6277e 2242This returns the event ID value for the given event file descriptor.
a0dcc8dd
VW
2243
2244The argument is a pointer to a 64-bit unsigned integer
2245to hold the result.
73d8cece 2246.SS Using prctl
f2b1d720
MK
2247A process can enable or disable all the event groups that are
2248attached to it using the
2249.BR prctl (2)
2250.B PR_TASK_PERF_EVENTS_ENABLE
2251and
2252.B PR_TASK_PERF_EVENTS_DISABLE
2253operations.
ee7b0cbf 2254This applies to all counters on the calling process, whether created by
f2b1d720
MK
2255this process or by another, and does not affect any counters that this
2256process has created on other processes.
33a0ccb2 2257It enables or disables only
f2b1d720 2258the group leaders, not any other members in the groups.
f2b1d720 2259.SS perf_event related configuration files
7db515ef
MK
2260Files in
2261.I /proc/sys/kernel/
7db515ef 2262.RS 4
f2b1d720 2263.TP
7db515ef 2264.I /proc/sys/kernel/perf_event_paranoid
f2b1d720
MK
2265
2266The
2267.I perf_event_paranoid
2268file can be set to restrict access to the performance counters.
2b538c3e
MK
2269.RS
2270.IP 2 4
2271only allow user-space measurements.
2272.IP 1
2273allow both kernel and user measurements (default).
2274.IP 0
2275allow access to CPU-specific data but not raw tracepoint samples.
2276.IP \-1
2277no restrictions.
2278.RE
2279.IP
f2b1d720
MK
2280The existence of the
2281.I perf_event_paranoid
2282file is the official method for determining if a kernel supports
7db515ef 2283.BR perf_event_open ().
f2b1d720
MK
2284.TP
2285.I /proc/sys/kernel/perf_event_max_sample_rate
2286
7db515ef
MK
2287This sets the maximum sample rate.
2288Setting this too high can allow
f2b1d720 2289users to sample at a rate that impacts overall machine performance
7db515ef
MK
2290and potentially lock up the machine.
2291The default value is
f2b1d720 2292100000 (samples per second).
f2b1d720
MK
2293.TP
2294.I /proc/sys/kernel/perf_event_mlock_kb
2295
ce88f77b
MK
2296Maximum number of pages an unprivileged user can
2297.BR mlock (2).
f2b1d720 2298The default is 516 (kB).
e30dc77f 2299
f2b1d720 2300.RE
7db515ef
MK
2301Files in
2302.I /sys/bus/event_source/devices/
7db515ef 2303.RS 4
ce88f77b 2304Since Linux 2.6.34, the kernel supports having multiple PMUs
f2b1d720
MK
2305available for monitoring.
2306Information on how to program these PMUs can be found under
2307.IR /sys/bus/event_source/devices/ .
2308Each subdirectory corresponds to a different PMU.
f2b1d720 2309.TP
31c1f2b0 2310.IR /sys/bus/event_source/devices/*/type " (since Linux 2.6.38)"
f2b1d720
MK
2311This contains an integer that can be used in the
2312.I type
ce88f77b
MK
2313field of
2314.I perf_event_attr
2315to indicate that you wish to use this PMU.
f2b1d720 2316.TP
31c1f2b0 2317.IR /sys/bus/event_source/devices/*/rdpmc " (since Linux 3.4)"
8a94e783 2318If this file is 1, then direct user-space access to the
e30dc77f
VW
2319performance counter registers is allowed via the rdpmc instruction.
2320This can be disabled by echoing 0 to the file.
f2b1d720 2321.TP
31c1f2b0 2322.IR /sys/bus/event_source/devices/*/format/ " (since Linux 3.4)"
7d182bb6
MK
2323This subdirectory contains information on the architecture-specific
2324subfields available for programming the various
f2b1d720 2325.I config
ce88f77b
MK
2326fields in the
2327.I perf_event_attr
2328struct.
e30dc77f
VW
2329
2330The content of each file is the name of the config field, followed
2331by a colon, followed by a series of integer bit ranges separated by
2332commas.
8a94e783 2333For example, the file
e30dc77f
VW
2334.I event
2335may contain the value
2336.I config1:1,6-10,44
2337which indicates that event is an attribute that occupies bits 1,6-10, and 44
ce88f77b
MK
2338of
2339.IR perf_event_attr::config1 .
e30dc77f 2340.TP
31c1f2b0 2341.IR /sys/bus/event_source/devices/*/events/ " (since Linux 3.4)"
7d182bb6 2342This subdirectory contains files with predefined events.
f2b1d720 2343The contents are strings describing the event settings
e30dc77f 2344expressed in terms of the fields found in the previously mentioned
f2b1d720
MK
2345.I ./format/
2346directory.
2347These are not necessarily complete lists of all events supported by
2348a PMU, but usually a subset of events deemed useful or interesting.
e30dc77f
VW
2349
2350The content of each file is a list of attribute names
8a94e783
MK
2351separated by commas.
2352Each entry has an optional value (either hex or decimal).
37bee118 2353If no value is specified, then it is assumed to be a single-bit
e30dc77f
VW
2354field with a value of 1.
2355An example entry may look like this:
699893d8 2356.IR event=0x2,inv,ldlat=3 .
f2b1d720
MK
2357.TP
2358.I /sys/bus/event_source/devices/*/uevent
e30dc77f
VW
2359This file is the standard kernel device interface
2360for injecting hotplug events.
2361.TP
31c1f2b0 2362.IR /sys/bus/event_source/devices/*/cpumask " (since Linux 3.7)"
699893d8
DP
2363The
2364.I cpumask
2365file contains a comma-separated list of integers that
2366indicate a representative CPU number for each socket (package)
e30dc77f
VW
2367on the motherboard.
2368This is needed when setting up uncore or northbridge events, as
2369those PMUs present socket-wide events.
f2b1d720 2370.RE
47297adb 2371.SH RETURN VALUE
f2b1d720
MK
2372.BR perf_event_open ()
2373returns the new file descriptor, or \-1 if an error occurred
2374(in which case,
2375.I errno
2376is set appropriately).
2377.SH ERRORS
d8b7d950
VW
2378The errors returned by
2379.BR perf_event_open ()
2380can be inconsistent, and may
2381vary across processor architectures and performance monitoring units.
f2b1d720 2382.TP
82b09254 2383.B E2BIG
ce88f77b
MK
2384Returned if the
2385.I perf_event_attr
82b09254
VW
2386.I size
2387value is too small
2388(smaller than
2389.BR PERF_ATTR_SIZE_VER0 ),
2390too big (larger than the page size),
2391or larger than the kernel supports and the extra bytes are not zero.
2392When
2393.B E2BIG
ce88f77b
MK
2394is returned, the
2395.I perf_event_attr
e9bd9b2c 2396.I size
d6af98f8 2397field is overwritten by the kernel to be the size of the structure
82b09254
VW
2398it was expecting.
2399.TP
d8b7d950 2400.B EACCES
27f0af8e
VW
2401Returned when the requested event requires
2402.B CAP_SYS_ADMIN
2403permissions (or a more permissive perf_event paranoid setting).
2404Some common cases where an unprivileged process
2405may encounter this error:
2406attaching to a process owned by a different user;
2b23ecbd
MK
2407monitoring all processes on a given CPU (i.e., specifying the
2408.I pid
2409argument as \-1);
079928f3 2410and not setting
accec051 2411.I exclude_kernel
079928f3 2412when the paranoid setting requires it.
d8b7d950
VW
2413.TP
2414.B EBADF
2415Returned if the
2416.I group_fd
accec051
MK
2417file descriptor is not valid, or, if
2418.B PERF_FLAG_PID_CGROUP
2419is set,
d8b7d950
VW
2420the cgroup file descriptor in
2421.I pid
2422is not valid.
2423.TP
2424.B EFAULT
2425Returned if the
2426.I attr
2427pointer points at an invalid memory address.
2428.TP
f2b1d720 2429.B EINVAL
d8b7d950
VW
2430Returned if the specified event is invalid.
2431There are many possible reasons for this.
2432A not-exhaustive list:
2433.I sample_freq
accec051 2434is higher than the maximum setting;
d8b7d950
VW
2435the
2436.I cpu
accec051 2437to monitor does not exist;
d8b7d950 2438.I read_format
accec051 2439is out of range;
d8b7d950 2440.I sample_type
accec051 2441is out of range;
d8b7d950
VW
2442the
2443.I flags
accec051 2444value is out of range;
d8b7d950
VW
2445.I exclusive
2446or
2447.I pinned
accec051 2448set and the event is not a group leader;
d8b7d950
VW
2449the event
2450.I config
accec051
MK
2451values are out of range or set reserved bits;
2452the generic event selected is not supported; or
d8b7d950
VW
2453there is not enough room to add the selected event.
2454.TP
2455.B EMFILE
2456Each opened event uses one file descriptor.
2457If a large number of events are opened the per-user file
2458descriptor limit (often 1024) will be hit and no more
2459events can be created.
2460.TP
2461.B ENODEV
2462Returned when the event involves a feature not supported
accec051 2463by the current CPU.
d8b7d950
VW
2464.TP
2465.B ENOENT
2466Returned if the
2467.I type
2468setting is not valid.
accec051 2469This error is also returned for
d8b7d950 2470some unsupported generic events.
f2b1d720
MK
2471.TP
2472.B ENOSPC
2473Prior to Linux 3.3, if there was not enough room for the event,
2474.B ENOSPC
2475was returned.
accec051 2476In Linux 3.3, this was changed to
f2b1d720
MK
2477.BR EINVAL .
2478.B ENOSPC
d8b7d950 2479is still returned if you try to add more breakpoint events
accec051 2480than supported by the hardware.
d8b7d950
VW
2481.TP
2482.B ENOSYS
2483Returned if
2484.B PERF_SAMPLE_STACK_USER
2485is set in
2486.I sample_type
2487and it is not supported by hardware.
2488.TP
2489.B EOPNOTSUPP
2490Returned if an event requiring a specific hardware feature is
2491requested but there is no hardware support.
2492This includes requesting low-skid events if not supported,
2493branch tracing if it is not available, sampling if no PMU
2494interrupt is available, and branch stacks for software events.
2495.TP
2496.B EPERM
27f0af8e
VW
2497Returned on many (but not all) architectures when an unsupported
2498.IR exclude_hv ", " exclude_idle ", " exclude_user ", or " exclude_kernel
2499setting is specified.
2500
2501It can also happen, as with
2502.BR EACCES ,
2503when the requested event requires
2504.B CAP_SYS_ADMIN
2505permissions (or a more permissive perf_event paranoid setting).
2506This includes setting a breakpoint on a kernel address,
2507and (since Linux 3.13) setting a kernel function-trace tracepoint.
d8b7d950
VW
2508.TP
2509.B ESRCH
2510Returned if attempting to attach to a process that does not exist.
f2b1d720 2511.SH VERSION
f2b1d720
MK
2512.BR perf_event_open ()
2513was introduced in Linux 2.6.31 but was called
ffd4dec0 2514.BR perf_counter_open ().
f2b1d720 2515It was renamed in Linux 2.6.32.
f2b1d720 2516.SH CONFORMING TO
7db515ef
MK
2517This
2518.BR perf_event_open ()
2519system call Linux- specific
f2b1d720 2520and should not be used in programs intended to be portable.
f2b1d720
MK
2521.SH NOTES
2522Glibc does not provide a wrapper for this system call; call it using
2523.BR syscall (2).
7db515ef 2524See the example below.
f2b1d720
MK
2525
2526The official way of knowing if
7db515ef 2527.BR perf_event_open ()
f2b1d720
MK
2528support is enabled is checking
2529for the existence of the file
7db515ef 2530.IR /proc/sys/kernel/perf_event_paranoid .
f2b1d720 2531.SH BUGS
f2b1d720
MK
2532The
2533.B F_SETOWN_EX
2534option to
7db515ef 2535.BR fcntl (2)
f2b1d720
MK
2536is needed to properly get overflow signals in threads.
2537This was introduced in Linux 2.6.32.
2538
ce88f77b 2539Prior to Linux 2.6.33 (at least for x86), the kernel did not check
f2b1d720
MK
2540if events could be scheduled together until read time.
2541The same happens on all known kernels if the NMI watchdog is enabled.
2542This means to see if a given set of events works you have to
2543.BR perf_event_open (),
2544start, then read before you know for sure you
2545can get valid measurements.
2546
ce88f77b 2547Prior to Linux 2.6.34, event constraints were not enforced by the kernel.
f2b1d720
MK
2548In that case, some events would silently return "0" if the kernel
2549scheduled them in an improper counter slot.
2550
ce88f77b 2551Prior to Linux 2.6.34, there was a bug when multiplexing where the
f2b1d720
MK
2552wrong results could be returned.
2553
2554Kernels from Linux 2.6.35 to Linux 2.6.39 can quickly crash the kernel if
2555"inherit" is enabled and many threads are started.
2556
2557Prior to Linux 2.6.35,
2558.B PERF_FORMAT_GROUP
2559did not work with attached processes.
2560
2561In older Linux 2.6 versions,
2562refreshing an event group leader refreshed all siblings,
2563and refreshing with a parameter of 0 enabled infinite refresh.
2564This behavior is unsupported and should not be relied on.
2565
2566There is a bug in the kernel code between
2567Linux 2.6.36 and Linux 3.0 that ignores the
2568"watermark" field and acts as if a wakeup_event
2569was chosen if the union has a
7d182bb6 2570nonzero value in it.
f2b1d720 2571
8a94e783 2572From Linux 2.6.31 to Linux 3.4, the
dbc01ecd
VW
2573.B PERF_IOC_FLAG_GROUP
2574ioctl argument was broken and would repeatedly operate
2575on the event specified rather than iterating across
2576all sibling events in a group.
2577
7205b8df 2578From Linux 3.4 to Linux 3.11, the mmap
135cba8b
VW
2579.I cap_usr_rdpmc
2580and
2581.I cap_usr_time
2582bits mapped to the same location.
2583Code should migrate to the new
2584.I cap_user_rdpmc
2585and
2586.I cap_user_time
2587fields instead.
2588
7db515ef
MK
2589Always double-check your results!
2590Various generalized events have had wrong values.
f2b1d720
MK
2591For example, retired branches measured
2592the wrong thing on AMD machines until Linux 2.6.35.
f2b1d720
MK
2593.SH EXAMPLE
2594The following is a short example that measures the total
7db515ef
MK
2595instruction count of a call to
2596.BR printf (3).
f2b1d720
MK
2597.nf
2598
2599#include <stdlib.h>
2600#include <stdio.h>
2601#include <unistd.h>
2602#include <string.h>
2603#include <sys/ioctl.h>
2604#include <linux/perf_event.h>
2605#include <asm/unistd.h>
2606
571767ca 2607static long
7db515ef
MK
2608perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
2609 int cpu, int group_fd, unsigned long flags)
f2b1d720
MK
2610{
2611 int ret;
2612
7db515ef
MK
2613 ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
2614 group_fd, flags);
f2b1d720
MK
2615 return ret;
2616}
2617
f2b1d720
MK
2618int
2619main(int argc, char **argv)
2620{
f2b1d720
MK
2621 struct perf_event_attr pe;
2622 long long count;
2623 int fd;
2624
2625 memset(&pe, 0, sizeof(struct perf_event_attr));
2626 pe.type = PERF_TYPE_HARDWARE;
2627 pe.size = sizeof(struct perf_event_attr);
2628 pe.config = PERF_COUNT_HW_INSTRUCTIONS;
2629 pe.disabled = 1;
2630 pe.exclude_kernel = 1;
2631 pe.exclude_hv = 1;
2632
2633 fd = perf_event_open(&pe, 0, \-1, \-1, 0);
7db515ef 2634 if (fd == \-1) {
f2b1d720 2635 fprintf(stderr, "Error opening leader %llx\\n", pe.config);
7db515ef 2636 exit(EXIT_FAILURE);
f2b1d720
MK
2637 }
2638
2639 ioctl(fd, PERF_EVENT_IOC_RESET, 0);
2640 ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
2641
2642 printf("Measuring instruction count for this printf\\n");
2643
2644 ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
2645 read(fd, &count, sizeof(long long));
2646
2647 printf("Used %lld instructions\\n", count);
2648
2649 close(fd);
2650}
2651.fi
47297adb 2652.SH SEE ALSO
f2b1d720
MK
2653.BR fcntl (2),
2654.BR mmap (2),
2655.BR open (2),
2656.BR prctl (2),
2657.BR read (2)