]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/eventfd.2
6574e23c98e2bd52ea4d9a803b2ea8e2a2c87f4d
[thirdparty/man-pages.git] / man2 / eventfd.2
1 .\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com>
2 .\" starting from a version by Davide Libenzi <davidel@xmailserver.org>
3 .\"
4 .\" SPDX-License-Identifier: GPL-2.0-or-later
5 .\"
6 .\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC
7 .\"
8 .TH EVENTFD 2 2022-09-17 "Linux man-pages (unreleased)"
9 .SH NAME
10 eventfd \- create a file descriptor for event notification
11 .SH LIBRARY
12 Standard C library
13 .RI ( libc ", " \-lc )
14 .SH SYNOPSIS
15 .nf
16 .B #include <sys/eventfd.h>
17 .PP
18 .BI "int eventfd(unsigned int " initval ", int " flags );
19 .fi
20 .SH DESCRIPTION
21 .BR eventfd ()
22 creates an "eventfd object" that can be used as
23 an event wait/notify mechanism by user-space applications,
24 and by the kernel to notify user-space applications of events.
25 The object contains an unsigned 64-bit integer
26 .RI ( uint64_t )
27 counter that is maintained by the kernel.
28 This counter is initialized with the value specified in the argument
29 .IR initval .
30 .PP
31 As its return value,
32 .BR eventfd ()
33 returns a new file descriptor that can be used to refer to the
34 eventfd object.
35 .PP
36 The following values may be bitwise ORed in
37 .I flags
38 to change the behavior of
39 .BR eventfd ():
40 .TP
41 .BR EFD_CLOEXEC " (since Linux 2.6.27)"
42 Set the close-on-exec
43 .RB ( FD_CLOEXEC )
44 flag on the new file descriptor.
45 See the description of the
46 .B O_CLOEXEC
47 flag in
48 .BR open (2)
49 for reasons why this may be useful.
50 .TP
51 .BR EFD_NONBLOCK " (since Linux 2.6.27)"
52 Set the
53 .B O_NONBLOCK
54 file status flag on the open file description (see
55 .BR open (2))
56 referred to by the new file descriptor.
57 Using this flag saves extra calls to
58 .BR fcntl (2)
59 to achieve the same result.
60 .TP
61 .BR EFD_SEMAPHORE " (since Linux 2.6.30)"
62 Provide semaphore-like semantics for reads from the new file descriptor.
63 See below.
64 .PP
65 In Linux up to version 2.6.26, the
66 .I flags
67 argument is unused, and must be specified as zero.
68 .PP
69 The following operations can be performed on the file descriptor returned by
70 .BR eventfd ():
71 .TP
72 .BR read (2)
73 Each successful
74 .BR read (2)
75 returns an 8-byte integer.
76 A
77 .BR read (2)
78 fails with the error
79 .B EINVAL
80 if the size of the supplied buffer is less than 8 bytes.
81 .IP
82 The value returned by
83 .BR read (2)
84 is in host byte order\(emthat is,
85 the native byte order for integers on the host machine.
86 .IP
87 The semantics of
88 .BR read (2)
89 depend on whether the eventfd counter currently has a nonzero value
90 and whether the
91 .B EFD_SEMAPHORE
92 flag was specified when creating the eventfd file descriptor:
93 .RS
94 .IP * 3
95 If
96 .B EFD_SEMAPHORE
97 was not specified and the eventfd counter has a nonzero value, then a
98 .BR read (2)
99 returns 8 bytes containing that value,
100 and the counter's value is reset to zero.
101 .IP *
102 If
103 .B EFD_SEMAPHORE
104 was specified and the eventfd counter has a nonzero value, then a
105 .BR read (2)
106 returns 8 bytes containing the value 1,
107 and the counter's value is decremented by 1.
108 .IP *
109 If the eventfd counter is zero at the time of the call to
110 .BR read (2),
111 then the call either blocks until the counter becomes nonzero
112 (at which time, the
113 .BR read (2)
114 proceeds as described above)
115 or fails with the error
116 .B EAGAIN
117 if the file descriptor has been made nonblocking.
118 .RE
119 .TP
120 .BR write (2)
121 A
122 .BR write (2)
123 call adds the 8-byte integer value supplied in its
124 buffer to the counter.
125 The maximum value that may be stored in the counter is the largest
126 unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe).
127 If the addition would cause the counter's value to exceed
128 the maximum, then the
129 .BR write (2)
130 either blocks until a
131 .BR read (2)
132 is performed on the file descriptor,
133 or fails with the error
134 .B EAGAIN
135 if the file descriptor has been made nonblocking.
136 .IP
137 A
138 .BR write (2)
139 fails with the error
140 .B EINVAL
141 if the size of the supplied buffer is less than 8 bytes,
142 or if an attempt is made to write the value 0xffffffffffffffff.
143 .TP
144 .BR poll "(2), " select "(2) (and similar)"
145 The returned file descriptor supports
146 .BR poll (2)
147 (and analogously
148 .BR epoll (7))
149 and
150 .BR select (2),
151 as follows:
152 .RS
153 .IP * 3
154 The file descriptor is readable
155 (the
156 .BR select (2)
157 .I readfds
158 argument; the
159 .BR poll (2)
160 .B POLLIN
161 flag)
162 if the counter has a value greater than 0.
163 .IP *
164 The file descriptor is writable
165 (the
166 .BR select (2)
167 .I writefds
168 argument; the
169 .BR poll (2)
170 .B POLLOUT
171 flag)
172 if it is possible to write a value of at least "1" without blocking.
173 .IP *
174 If an overflow of the counter value was detected,
175 then
176 .BR select (2)
177 indicates the file descriptor as being both readable and writable, and
178 .BR poll (2)
179 returns a
180 .B POLLERR
181 event.
182 As noted above,
183 .BR write (2)
184 can never overflow the counter.
185 However an overflow can occur if 2^64
186 eventfd "signal posts" were performed by the KAIO
187 subsystem (theoretically possible, but practically unlikely).
188 If an overflow has occurred, then
189 .BR read (2)
190 will return that maximum
191 .I uint64_t
192 value (i.e., 0xffffffffffffffff).
193 .RE
194 .IP
195 The eventfd file descriptor also supports the other file-descriptor
196 multiplexing APIs:
197 .BR pselect (2)
198 and
199 .BR ppoll (2).
200 .TP
201 .BR close (2)
202 When the file descriptor is no longer required it should be closed.
203 When all file descriptors associated with the same eventfd object
204 have been closed, the resources for object are freed by the kernel.
205 .PP
206 A copy of the file descriptor created by
207 .BR eventfd ()
208 is inherited by the child produced by
209 .BR fork (2).
210 The duplicate file descriptor is associated with the same
211 eventfd object.
212 File descriptors created by
213 .BR eventfd ()
214 are preserved across
215 .BR execve (2),
216 unless the close-on-exec flag has been set.
217 .SH RETURN VALUE
218 On success,
219 .BR eventfd ()
220 returns a new eventfd file descriptor.
221 On error, \-1 is returned and
222 .I errno
223 is set to indicate the error.
224 .SH ERRORS
225 .TP
226 .B EINVAL
227 An unsupported value was specified in
228 .IR flags .
229 .TP
230 .B EMFILE
231 The per-process limit on the number of open file descriptors has been reached.
232 .TP
233 .B ENFILE
234 The system-wide limit on the total number of open files has been
235 reached.
236 .TP
237 .B ENODEV
238 .\" Note from Davide:
239 .\" The ENODEV error is basically never going to happen if
240 .\" the kernel boots correctly. That error happen only if during
241 .\" the kernel initialization, some error occur in the anonymous
242 .\" inode source initialization.
243 Could not mount (internal) anonymous inode device.
244 .TP
245 .B ENOMEM
246 There was insufficient memory to create a new
247 eventfd file descriptor.
248 .SH VERSIONS
249 .BR eventfd ()
250 is available on Linux since kernel 2.6.22.
251 Working support is provided in glibc since version 2.8.
252 .\" eventfd() is in glibc 2.7, but reportedly does not build
253 The
254 .BR eventfd2 ()
255 system call (see NOTES) is available on Linux since kernel 2.6.27.
256 Since version 2.9, the glibc
257 .BR eventfd ()
258 wrapper will employ the
259 .BR eventfd2 ()
260 system call, if it is supported by the kernel.
261 .SH ATTRIBUTES
262 For an explanation of the terms used in this section, see
263 .BR attributes (7).
264 .ad l
265 .nh
266 .TS
267 allbox;
268 lbx lb lb
269 l l l.
270 Interface Attribute Value
271 T{
272 .BR eventfd ()
273 T} Thread safety MT-Safe
274 .TE
275 .hy
276 .ad
277 .sp 1
278 .SH STANDARDS
279 .BR eventfd ()
280 and
281 .BR eventfd2 ()
282 are Linux-specific.
283 .SH NOTES
284 Applications can use an eventfd file descriptor instead of a pipe (see
285 .BR pipe (2))
286 in all cases where a pipe is used simply to signal events.
287 The kernel overhead of an eventfd file descriptor
288 is much lower than that of a pipe,
289 and only one file descriptor is
290 required (versus the two required for a pipe).
291 .PP
292 When used in the kernel, an eventfd
293 file descriptor can provide a bridge from kernel to user space, allowing,
294 for example, functionalities like KAIO (kernel AIO)
295 .\" or eventually syslets/threadlets
296 to signal to a file descriptor that some operation is complete.
297 .PP
298 A key point about an eventfd file descriptor is that it can be
299 monitored just like any other file descriptor using
300 .BR select (2),
301 .BR poll (2),
302 or
303 .BR epoll (7).
304 This means that an application can simultaneously monitor the
305 readiness of "traditional" files and the readiness of other
306 kernel mechanisms that support the eventfd interface.
307 (Without the
308 .BR eventfd ()
309 interface, these mechanisms could not be multiplexed via
310 .BR select (2),
311 .BR poll (2),
312 or
313 .BR epoll (7).)
314 .PP
315 The current value of an eventfd counter can be viewed
316 via the entry for the corresponding file descriptor in the process's
317 .IR /proc/ pid /fdinfo
318 directory.
319 See
320 .BR proc (5)
321 for further details.
322 .\"
323 .SS C library/kernel differences
324 There are two underlying Linux system calls:
325 .BR eventfd ()
326 and the more recent
327 .BR eventfd2 ().
328 The former system call does not implement a
329 .I flags
330 argument.
331 The latter system call implements the
332 .I flags
333 values described above.
334 The glibc wrapper function will use
335 .BR eventfd2 ()
336 where it is available.
337 .SS Additional glibc features
338 The GNU C library defines an additional type,
339 and two functions that attempt to abstract some of the details of
340 reading and writing on an eventfd file descriptor:
341 .PP
342 .in +4n
343 .EX
344 typedef uint64_t eventfd_t;
345
346 int eventfd_read(int fd, eventfd_t *value);
347 int eventfd_write(int fd, eventfd_t value);
348 .EE
349 .in
350 .PP
351 The functions perform the read and write operations on an
352 eventfd file descriptor,
353 returning 0 if the correct number of bytes was transferred,
354 or \-1 otherwise.
355 .SH EXAMPLES
356 The following program creates an eventfd file descriptor
357 and then forks to create a child process.
358 While the parent briefly sleeps,
359 the child writes each of the integers supplied in the program's
360 command-line arguments to the eventfd file descriptor.
361 When the parent has finished sleeping,
362 it reads from the eventfd file descriptor.
363 .PP
364 The following shell session shows a sample run of the program:
365 .PP
366 .in +4n
367 .EX
368 .RB "$" " ./a.out 1 2 4 7 14"
369 Child writing 1 to efd
370 Child writing 2 to efd
371 Child writing 4 to efd
372 Child writing 7 to efd
373 Child writing 14 to efd
374 Child completed write loop
375 Parent about to read
376 Parent read 28 (0x1c) from efd
377 .EE
378 .in
379 .SS Program source
380 \&
381 .\" SRC BEGIN (eventfd.c)
382 .EX
383 #include <err.h>
384 #include <inttypes.h>
385 #include <stdio.h>
386 #include <stdlib.h>
387 #include <sys/eventfd.h>
388 #include <unistd.h>
389
390 int
391 main(int argc, char *argv[])
392 {
393 int efd;
394 uint64_t u;
395 ssize_t s;
396
397 if (argc < 2) {
398 fprintf(stderr, "Usage: %s <num>...\en", argv[0]);
399 exit(EXIT_FAILURE);
400 }
401
402 efd = eventfd(0, 0);
403 if (efd == \-1)
404 err(EXIT_FAILURE, "eventfd");
405
406 switch (fork()) {
407 case 0:
408 for (size_t j = 1; j < argc; j++) {
409 printf("Child writing %s to efd\en", argv[j]);
410 u = strtoull(argv[j], NULL, 0);
411 /* strtoull() allows various bases */
412 s = write(efd, &u, sizeof(uint64_t));
413 if (s != sizeof(uint64_t))
414 err(EXIT_FAILURE, "write");
415 }
416 printf("Child completed write loop\en");
417
418 exit(EXIT_SUCCESS);
419
420 default:
421 sleep(2);
422
423 printf("Parent about to read\en");
424 s = read(efd, &u, sizeof(uint64_t));
425 if (s != sizeof(uint64_t))
426 err(EXIT_FAILURE, "read");
427 printf("Parent read %"PRIu64" (%#"PRIx64") from efd\en", u, u);
428 exit(EXIT_SUCCESS);
429
430 case \-1:
431 err(EXIT_FAILURE, "fork");
432 }
433 }
434 .EE
435 .\" SRC END
436 .SH SEE ALSO
437 .BR futex (2),
438 .BR pipe (2),
439 .BR poll (2),
440 .BR read (2),
441 .BR select (2),
442 .BR signalfd (2),
443 .BR timerfd_create (2),
444 .BR write (2),
445 .BR epoll (7),
446 .BR sem_overview (7)