]>
Commit | Line | Data |
---|---|---|
8b428090 MK |
1 | .\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com> |
2 | .\" starting from a version by Davide Libenzi <davidel@xmailserver.org> | |
3 | .\" | |
e4a74ca8 | 4 | .\" SPDX-License-Identifier: GPL-2.0-or-later |
8b428090 | 5 | .\" |
c22cb204 MK |
6 | .\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC |
7 | .\" | |
1d767b55 | 8 | .TH EVENTFD 2 2021-03-22 Linux "Linux Programmer's Manual" |
8b428090 MK |
9 | .SH NAME |
10 | eventfd \- create a file descriptor for event notification | |
f8b67e8e AC |
11 | .SH LIBRARY |
12 | Standard C library | |
8fc3b2cf | 13 | .RI ( libc ", " \-lc ) |
8b428090 | 14 | .SH SYNOPSIS |
c7db92b9 | 15 | .nf |
8b428090 | 16 | .B #include <sys/eventfd.h> |
68e4db0a | 17 | .PP |
8b428090 | 18 | .BI "int eventfd(unsigned int " initval ", int " flags ); |
c7db92b9 | 19 | .fi |
8b428090 MK |
20 | .SH DESCRIPTION |
21 | .BR eventfd () | |
22 | creates an "eventfd object" that can be used as | |
7fac88a9 MK |
23 | an event wait/notify mechanism by user-space applications, |
24 | and by the kernel to notify user-space applications of events. | |
8b428090 MK |
25 | The object contains an unsigned 64-bit integer |
26 | .RI ( uint64_t ) | |
27 | counter that is maintained by the kernel. | |
28 | This counter is initialized with the value specified in the argument | |
29 | .IR initval . | |
efeece04 | 30 | .PP |
735e2912 MK |
31 | As its return value, |
32 | .BR eventfd () | |
33 | returns a new file descriptor that can be used to refer to the | |
34 | eventfd object. | |
35 | .PP | |
0986cb57 | 36 | The following values may be bitwise ORed in |
1ae6b2c7 | 37 | .I flags |
a1fa36af | 38 | to change the behavior of |
e64b5482 | 39 | .BR eventfd (): |
e64b5482 | 40 | .TP |
0986cb57 | 41 | .BR EFD_CLOEXEC " (since Linux 2.6.27)" |
e64b5482 MK |
42 | Set the close-on-exec |
43 | .RB ( FD_CLOEXEC ) | |
44 | flag on the new file descriptor. | |
c5571b61 | 45 | See the description of the |
e64b5482 MK |
46 | .B O_CLOEXEC |
47 | flag in | |
48 | .BR open (2) | |
49 | for reasons why this may be useful. | |
4eb6d333 | 50 | .TP |
0986cb57 | 51 | .BR EFD_NONBLOCK " (since Linux 2.6.27)" |
4eb6d333 | 52 | Set the |
1ae6b2c7 | 53 | .B O_NONBLOCK |
7f11e32c MK |
54 | file status flag on the open file description (see |
55 | .BR open (2)) | |
56 | referred to by the new file descriptor. | |
4eb6d333 MK |
57 | Using this flag saves extra calls to |
58 | .BR fcntl (2) | |
59 | to achieve the same result. | |
0986cb57 MK |
60 | .TP |
61 | .BR EFD_SEMAPHORE " (since Linux 2.6.30)" | |
62 | Provide semaphore-like semantics for reads from the new file descriptor. | |
63 | See below. | |
e64b5482 MK |
64 | .PP |
65 | In Linux up to version 2.6.26, the | |
8b428090 | 66 | .I flags |
e64b5482 | 67 | argument is unused, and must be specified as zero. |
efeece04 | 68 | .PP |
735e2912 MK |
69 | The following operations can be performed on the file descriptor returned by |
70 | .BR eventfd (): | |
8b428090 MK |
71 | .TP |
72 | .BR read (2) | |
0986cb57 MK |
73 | Each successful |
74 | .BR read (2) | |
75 | returns an 8-byte integer. | |
76 | A | |
77 | .BR read (2) | |
26cd31fd | 78 | fails with the error |
0986cb57 MK |
79 | .B EINVAL |
80 | if the size of the supplied buffer is less than 8 bytes. | |
81 | .IP | |
82 | The value returned by | |
83 | .BR read (2) | |
88879aeb MK |
84 | is in host byte order\(emthat is, |
85 | the native byte order for integers on the host machine. | |
0986cb57 MK |
86 | .IP |
87 | The semantics of | |
88 | .BR read (2) | |
89 | depend on whether the eventfd counter currently has a nonzero value | |
90 | and whether the | |
1ae6b2c7 | 91 | .B EFD_SEMAPHORE |
0986cb57 MK |
92 | flag was specified when creating the eventfd file descriptor: |
93 | .RS | |
94 | .IP * 3 | |
95 | If | |
1ae6b2c7 | 96 | .B EFD_SEMAPHORE |
0986cb57 | 97 | was not specified and the eventfd counter has a nonzero value, then a |
8b428090 MK |
98 | .BR read (2) |
99 | returns 8 bytes containing that value, | |
100 | and the counter's value is reset to zero. | |
0986cb57 MK |
101 | .IP * |
102 | If | |
1ae6b2c7 | 103 | .B EFD_SEMAPHORE |
0986cb57 MK |
104 | was specified and the eventfd counter has a nonzero value, then a |
105 | .BR read (2) | |
106 | returns 8 bytes containing the value 1, | |
107 | and the counter's value is decremented by 1. | |
108 | .IP * | |
109 | If the eventfd counter is zero at the time of the call to | |
8b428090 | 110 | .BR read (2), |
0986cb57 MK |
111 | then the call either blocks until the counter becomes nonzero |
112 | (at which time, the | |
113 | .BR read (2) | |
114 | proceeds as described above) | |
8b428090 MK |
115 | or fails with the error |
116 | .B EAGAIN | |
ff40dbb3 | 117 | if the file descriptor has been made nonblocking. |
0986cb57 | 118 | .RE |
8b428090 MK |
119 | .TP |
120 | .BR write (2) | |
121 | A | |
122 | .BR write (2) | |
123 | call adds the 8-byte integer value supplied in its | |
124 | buffer to the counter. | |
125 | The maximum value that may be stored in the counter is the largest | |
126 | unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe). | |
127 | If the addition would cause the counter's value to exceed | |
128 | the maximum, then the | |
129 | .BR write (2) | |
130 | either blocks until a | |
131 | .BR read (2) | |
132 | is performed on the file descriptor, | |
133 | or fails with the error | |
134 | .B EAGAIN | |
ff40dbb3 | 135 | if the file descriptor has been made nonblocking. |
8b428090 MK |
136 | .IP |
137 | A | |
138 | .BR write (2) | |
26cd31fd | 139 | fails with the error |
8b428090 MK |
140 | .B EINVAL |
141 | if the size of the supplied buffer is less than 8 bytes, | |
142 | or if an attempt is made to write the value 0xffffffffffffffff. | |
143 | .TP | |
144 | .BR poll "(2), " select "(2) (and similar)" | |
145 | The returned file descriptor supports | |
146 | .BR poll (2) | |
147 | (and analogously | |
148 | .BR epoll (7)) | |
149 | and | |
150 | .BR select (2), | |
151 | as follows: | |
152 | .RS | |
153 | .IP * 3 | |
154 | The file descriptor is readable | |
155 | (the | |
156 | .BR select (2) | |
157 | .I readfds | |
158 | argument; the | |
159 | .BR poll (2) | |
160 | .B POLLIN | |
161 | flag) | |
162 | if the counter has a value greater than 0. | |
163 | .IP * | |
164 | The file descriptor is writable | |
165 | (the | |
166 | .BR select (2) | |
167 | .I writefds | |
168 | argument; the | |
169 | .BR poll (2) | |
170 | .B POLLOUT | |
171 | flag) | |
172 | if it is possible to write a value of at least "1" without blocking. | |
173 | .IP * | |
20e5fb78 MK |
174 | If an overflow of the counter value was detected, |
175 | then | |
8b428090 | 176 | .BR select (2) |
20e5fb78 | 177 | indicates the file descriptor as being both readable and writable, and |
8b428090 | 178 | .BR poll (2) |
20e5fb78 | 179 | returns a |
8b428090 | 180 | .B POLLERR |
20e5fb78 | 181 | event. |
8b428090 MK |
182 | As noted above, |
183 | .BR write (2) | |
184 | can never overflow the counter. | |
185 | However an overflow can occur if 2^64 | |
186 | eventfd "signal posts" were performed by the KAIO | |
187 | subsystem (theoretically possible, but practically unlikely). | |
188 | If an overflow has occurred, then | |
189 | .BR read (2) | |
190 | will return that maximum | |
191 | .I uint64_t | |
192 | value (i.e., 0xffffffffffffffff). | |
193 | .RE | |
194 | .IP | |
195 | The eventfd file descriptor also supports the other file-descriptor | |
196 | multiplexing APIs: | |
531b15bc | 197 | .BR pselect (2) |
8b428090 | 198 | and |
531b15bc | 199 | .BR ppoll (2). |
8b428090 MK |
200 | .TP |
201 | .BR close (2) | |
202 | When the file descriptor is no longer required it should be closed. | |
203 | When all file descriptors associated with the same eventfd object | |
204 | have been closed, the resources for object are freed by the kernel. | |
205 | .PP | |
206 | A copy of the file descriptor created by | |
207 | .BR eventfd () | |
208 | is inherited by the child produced by | |
209 | .BR fork (2). | |
210 | The duplicate file descriptor is associated with the same | |
211 | eventfd object. | |
212 | File descriptors created by | |
213 | .BR eventfd () | |
214 | are preserved across | |
d45105d9 AS |
215 | .BR execve (2), |
216 | unless the close-on-exec flag has been set. | |
47297adb | 217 | .SH RETURN VALUE |
8b428090 MK |
218 | On success, |
219 | .BR eventfd () | |
220 | returns a new eventfd file descriptor. | |
221 | On error, \-1 is returned and | |
222 | .I errno | |
223 | is set to indicate the error. | |
224 | .SH ERRORS | |
40725279 MK |
225 | .TP |
226 | .B EINVAL | |
0986cb57 MK |
227 | An unsupported value was specified in |
228 | .IR flags . | |
8b428090 MK |
229 | .TP |
230 | .B EMFILE | |
26c32fab | 231 | The per-process limit on the number of open file descriptors has been reached. |
8b428090 MK |
232 | .TP |
233 | .B ENFILE | |
234 | The system-wide limit on the total number of open files has been | |
235 | reached. | |
236 | .TP | |
237 | .B ENODEV | |
238 | .\" Note from Davide: | |
239 | .\" The ENODEV error is basically never going to happen if | |
240 | .\" the kernel boots correctly. That error happen only if during | |
241 | .\" the kernel initialization, some error occur in the anonymous | |
242 | .\" inode source initialization. | |
243 | Could not mount (internal) anonymous inode device. | |
244 | .TP | |
245 | .B ENOMEM | |
246 | There was insufficient memory to create a new | |
247 | eventfd file descriptor. | |
248 | .SH VERSIONS | |
249 | .BR eventfd () | |
250 | is available on Linux since kernel 2.6.22. | |
251 | Working support is provided in glibc since version 2.8. | |
252 | .\" eventfd() is in glibc 2.7, but reportedly does not build | |
e64b5482 MK |
253 | The |
254 | .BR eventfd2 () | |
255 | system call (see NOTES) is available on Linux since kernel 2.6.27. | |
66bbce00 MK |
256 | Since version 2.9, the glibc |
257 | .BR eventfd () | |
258 | wrapper will employ the | |
259 | .BR eventfd2 () | |
260 | system call, if it is supported by the kernel. | |
288c1a09 ZL |
261 | .SH ATTRIBUTES |
262 | For an explanation of the terms used in this section, see | |
263 | .BR attributes (7). | |
c466875e MK |
264 | .ad l |
265 | .nh | |
288c1a09 ZL |
266 | .TS |
267 | allbox; | |
c466875e | 268 | lbx lb lb |
288c1a09 ZL |
269 | l l l. |
270 | Interface Attribute Value | |
271 | T{ | |
272 | .BR eventfd () | |
273 | T} Thread safety MT-Safe | |
274 | .TE | |
c466875e MK |
275 | .hy |
276 | .ad | |
efeece04 | 277 | .sp 1 |
8b428090 MK |
278 | .SH CONFORMING TO |
279 | .BR eventfd () | |
e64b5482 | 280 | and |
2aa3fb2d | 281 | .BR eventfd2 () |
e64b5482 | 282 | are Linux-specific. |
8b428090 MK |
283 | .SH NOTES |
284 | Applications can use an eventfd file descriptor instead of a pipe (see | |
285 | .BR pipe (2)) | |
286 | in all cases where a pipe is used simply to signal events. | |
287 | The kernel overhead of an eventfd file descriptor | |
288 | is much lower than that of a pipe, | |
289 | and only one file descriptor is | |
290 | required (versus the two required for a pipe). | |
efeece04 | 291 | .PP |
8b428090 | 292 | When used in the kernel, an eventfd |
7fac88a9 | 293 | file descriptor can provide a bridge from kernel to user space, allowing, |
8b428090 MK |
294 | for example, functionalities like KAIO (kernel AIO) |
295 | .\" or eventually syslets/threadlets | |
296 | to signal to a file descriptor that some operation is complete. | |
efeece04 | 297 | .PP |
8b428090 MK |
298 | A key point about an eventfd file descriptor is that it can be |
299 | monitored just like any other file descriptor using | |
300 | .BR select (2), | |
301 | .BR poll (2), | |
302 | or | |
303 | .BR epoll (7). | |
304 | This means that an application can simultaneously monitor the | |
305 | readiness of "traditional" files and the readiness of other | |
306 | kernel mechanisms that support the eventfd interface. | |
307 | (Without the | |
308 | .BR eventfd () | |
309 | interface, these mechanisms could not be multiplexed via | |
310 | .BR select (2), | |
311 | .BR poll (2), | |
312 | or | |
313 | .BR epoll (7).) | |
efeece04 | 314 | .PP |
9764a9ff MK |
315 | The current value of an eventfd counter can be viewed |
316 | via the entry for the corresponding file descriptor in the process's | |
1ae6b2c7 | 317 | .IR /proc/ pid /fdinfo |
9764a9ff MK |
318 | directory. |
319 | See | |
320 | .BR proc (5) | |
321 | for further details. | |
322 | .\" | |
0722a578 | 323 | .SS C library/kernel differences |
e64b5482 MK |
324 | There are two underlying Linux system calls: |
325 | .BR eventfd () | |
326 | and the more recent | |
327 | .BR eventfd2 (). | |
328 | The former system call does not implement a | |
329 | .I flags | |
330 | argument. | |
331 | The latter system call implements the | |
332 | .I flags | |
333 | values described above. | |
e64b5482 MK |
334 | The glibc wrapper function will use |
335 | .BR eventfd2 () | |
336 | where it is available. | |
8b428090 MK |
337 | .SS Additional glibc features |
338 | The GNU C library defines an additional type, | |
339 | and two functions that attempt to abstract some of the details of | |
340 | reading and writing on an eventfd file descriptor: | |
0ffeaeae | 341 | .PP |
8b428090 | 342 | .in +4n |
0ffeaeae | 343 | .EX |
8b428090 MK |
344 | typedef uint64_t eventfd_t; |
345 | ||
ffaecbc1 MK |
346 | int eventfd_read(int fd, eventfd_t *value); |
347 | int eventfd_write(int fd, eventfd_t value); | |
0ffeaeae | 348 | .EE |
8b428090 | 349 | .in |
efeece04 | 350 | .PP |
8b428090 MK |
351 | The functions perform the read and write operations on an |
352 | eventfd file descriptor, | |
353 | returning 0 if the correct number of bytes was transferred, | |
354 | or \-1 otherwise. | |
a14af333 | 355 | .SH EXAMPLES |
8b428090 MK |
356 | The following program creates an eventfd file descriptor |
357 | and then forks to create a child process. | |
358 | While the parent briefly sleeps, | |
359 | the child writes each of the integers supplied in the program's | |
360 | command-line arguments to the eventfd file descriptor. | |
361 | When the parent has finished sleeping, | |
362 | it reads from the eventfd file descriptor. | |
efeece04 | 363 | .PP |
8b428090 | 364 | The following shell session shows a sample run of the program: |
0ffeaeae | 365 | .PP |
8b428090 | 366 | .in +4n |
0ffeaeae | 367 | .EX |
b43a3b30 | 368 | .RB "$" " ./a.out 1 2 4 7 14" |
8b428090 MK |
369 | Child writing 1 to efd |
370 | Child writing 2 to efd | |
371 | Child writing 4 to efd | |
372 | Child writing 7 to efd | |
373 | Child writing 14 to efd | |
374 | Child completed write loop | |
375 | Parent about to read | |
376 | Parent read 28 (0x1c) from efd | |
0ffeaeae | 377 | .EE |
1c32ee47 | 378 | .in |
9c330504 | 379 | .SS Program source |
d84d0300 | 380 | \& |
e7d0bb47 | 381 | .EX |
8b428090 MK |
382 | #include <sys/eventfd.h> |
383 | #include <unistd.h> | |
77de385d | 384 | #include <inttypes.h> /* Definition of PRIu64 & PRIx64 */ |
8b428090 MK |
385 | #include <stdlib.h> |
386 | #include <stdio.h> | |
387 | #include <stdint.h> /* Definition of uint64_t */ | |
388 | ||
d1a71985 | 389 | #define handle_error(msg) \e |
8b428090 MK |
390 | do { perror(msg); exit(EXIT_FAILURE); } while (0) |
391 | ||
392 | int | |
393 | main(int argc, char *argv[]) | |
394 | { | |
88893a77 | 395 | int efd; |
8b428090 MK |
396 | uint64_t u; |
397 | ssize_t s; | |
398 | ||
399 | if (argc < 2) { | |
d1a71985 | 400 | fprintf(stderr, "Usage: %s <num>...\en", argv[0]); |
8b428090 MK |
401 | exit(EXIT_FAILURE); |
402 | } | |
403 | ||
404 | efd = eventfd(0, 0); | |
405 | if (efd == \-1) | |
406 | handle_error("eventfd"); | |
407 | ||
408 | switch (fork()) { | |
409 | case 0: | |
88893a77 | 410 | for (int j = 1; j < argc; j++) { |
d1a71985 | 411 | printf("Child writing %s to efd\en", argv[j]); |
8b428090 MK |
412 | u = strtoull(argv[j], NULL, 0); |
413 | /* strtoull() allows various bases */ | |
414 | s = write(efd, &u, sizeof(uint64_t)); | |
415 | if (s != sizeof(uint64_t)) | |
416 | handle_error("write"); | |
417 | } | |
d1a71985 | 418 | printf("Child completed write loop\en"); |
8b428090 MK |
419 | |
420 | exit(EXIT_SUCCESS); | |
421 | ||
422 | default: | |
423 | sleep(2); | |
424 | ||
d1a71985 | 425 | printf("Parent about to read\en"); |
8b428090 MK |
426 | s = read(efd, &u, sizeof(uint64_t)); |
427 | if (s != sizeof(uint64_t)) | |
428 | handle_error("read"); | |
dc97703b | 429 | printf("Parent read %"PRIu64" (%#"PRIx64") from efd\en", u, u); |
8b428090 MK |
430 | exit(EXIT_SUCCESS); |
431 | ||
432 | case \-1: | |
433 | handle_error("fork"); | |
434 | } | |
435 | } | |
e7d0bb47 | 436 | .EE |
47297adb | 437 | .SH SEE ALSO |
8b428090 MK |
438 | .BR futex (2), |
439 | .BR pipe (2), | |
440 | .BR poll (2), | |
441 | .BR read (2), | |
442 | .BR select (2), | |
443 | .BR signalfd (2), | |
444 | .BR timerfd_create (2), | |
445 | .BR write (2), | |
446 | .BR epoll (7), | |
447 | .BR sem_overview (7) |