]>
Commit | Line | Data |
---|---|---|
8b428090 MK |
1 | .\" Copyright (C) 2008 Michael Kerrisk <mtk.manpages@gmail.com> |
2 | .\" starting from a version by Davide Libenzi <davidel@xmailserver.org> | |
3 | .\" | |
4 | .\" This program is free software; you can redistribute it and/or modify | |
5 | .\" it under the terms of the GNU General Public License as published by | |
6 | .\" the Free Software Foundation; either version 2 of the License, or | |
7 | .\" (at your option) any later version. | |
8 | .\" | |
9 | .\" This program is distributed in the hope that it will be useful, | |
10 | .\" but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | .\" GNU General Public License for more details. | |
13 | .\" | |
14 | .\" You should have received a copy of the GNU General Public License | |
15 | .\" along with this program; if not, write to the Free Software | |
16 | .\" Foundation, Inc., 59 Temple Place, Suite 330, Boston, | |
17 | .\" MA 02111-1307 USA | |
18 | .\" | |
c22cb204 MK |
19 | .\" 2008-10-10, mtk: describe eventfd2(), and EFD_NONBLOCK and EFD_CLOEXEC |
20 | .\" | |
e64b5482 | 21 | .TH EVENTFD 2 2008-10-10 Linux "Linux Programmer's Manual" |
8b428090 MK |
22 | .SH NAME |
23 | eventfd \- create a file descriptor for event notification | |
24 | .SH SYNOPSIS | |
25 | .B #include <sys/eventfd.h> | |
26 | .sp | |
27 | .BI "int eventfd(unsigned int " initval ", int " flags ); | |
28 | .SH DESCRIPTION | |
29 | .BR eventfd () | |
30 | creates an "eventfd object" that can be used as | |
31 | an event wait/notify mechanism by userspace applications, | |
32 | and by the kernel to notify userspace applications of events. | |
33 | The object contains an unsigned 64-bit integer | |
34 | .RI ( uint64_t ) | |
35 | counter that is maintained by the kernel. | |
36 | This counter is initialized with the value specified in the argument | |
37 | .IR initval . | |
38 | ||
e64b5482 MK |
39 | Starting with Linux 2.6.27, the following values may be bitwise ORed in |
40 | .IR flags | |
41 | to change the behaviour of | |
42 | .BR eventfd (): | |
43 | .TP 14 | |
44 | .B EFD_NONBLOCK | |
45 | Set the | |
46 | .BR O_NONBLOCK | |
47 | file status flag on the new open file description. | |
48 | Using this flag saves extra calls to | |
49 | .BR fcntl (2) | |
50 | to achieve the same result. | |
51 | .TP | |
52 | .B EFD_CLOEXEC | |
53 | Set the close-on-exec | |
54 | .RB ( FD_CLOEXEC ) | |
55 | flag on the new file descriptor. | |
c5571b61 | 56 | See the description of the |
e64b5482 MK |
57 | .B O_CLOEXEC |
58 | flag in | |
59 | .BR open (2) | |
60 | for reasons why this may be useful. | |
61 | .PP | |
62 | In Linux up to version 2.6.26, the | |
8b428090 | 63 | .I flags |
e64b5482 | 64 | argument is unused, and must be specified as zero. |
8b428090 MK |
65 | |
66 | As its return value, | |
67 | .BR eventfd () | |
68 | returns a new file descriptor that can be used to refer to the | |
69 | eventfd object. | |
70 | The following operations can be performed on the file descriptor: | |
71 | .TP | |
72 | .BR read (2) | |
eba72288 | 73 | If the eventfd counter has a non-zero value, then a |
8b428090 MK |
74 | .BR read (2) |
75 | returns 8 bytes containing that value, | |
76 | and the counter's value is reset to zero. | |
77 | (The returned value is in host byte order, | |
78 | i.e., the native byte order for integers on the host machine.) | |
79 | .IP | |
80 | If the counter is zero at the time of the | |
81 | .BR read (2), | |
eba72288 | 82 | then the call either blocks until the counter becomes non-zero, |
8b428090 MK |
83 | or fails with the error |
84 | .B EAGAIN | |
e64b5482 | 85 | if the file descriptor has been made non-blocking. |
8b428090 MK |
86 | .IP |
87 | A | |
88 | .BR read (2) | |
89 | will fail with the error | |
90 | .B EINVAL | |
91 | if the size of the supplied buffer is less than 8 bytes. | |
92 | .TP | |
93 | .BR write (2) | |
94 | A | |
95 | .BR write (2) | |
96 | call adds the 8-byte integer value supplied in its | |
97 | buffer to the counter. | |
98 | The maximum value that may be stored in the counter is the largest | |
99 | unsigned 64-bit value minus 1 (i.e., 0xfffffffffffffffe). | |
100 | If the addition would cause the counter's value to exceed | |
101 | the maximum, then the | |
102 | .BR write (2) | |
103 | either blocks until a | |
104 | .BR read (2) | |
105 | is performed on the file descriptor, | |
106 | or fails with the error | |
107 | .B EAGAIN | |
108 | if the file descriptor has been made non-blocking. | |
109 | .IP | |
110 | A | |
111 | .BR write (2) | |
112 | will fail with the error | |
113 | .B EINVAL | |
114 | if the size of the supplied buffer is less than 8 bytes, | |
115 | or if an attempt is made to write the value 0xffffffffffffffff. | |
116 | .TP | |
117 | .BR poll "(2), " select "(2) (and similar)" | |
118 | The returned file descriptor supports | |
119 | .BR poll (2) | |
120 | (and analogously | |
121 | .BR epoll (7)) | |
122 | and | |
123 | .BR select (2), | |
124 | as follows: | |
125 | .RS | |
126 | .IP * 3 | |
127 | The file descriptor is readable | |
128 | (the | |
129 | .BR select (2) | |
130 | .I readfds | |
131 | argument; the | |
132 | .BR poll (2) | |
133 | .B POLLIN | |
134 | flag) | |
135 | if the counter has a value greater than 0. | |
136 | .IP * | |
137 | The file descriptor is writable | |
138 | (the | |
139 | .BR select (2) | |
140 | .I writefds | |
141 | argument; the | |
142 | .BR poll (2) | |
143 | .B POLLOUT | |
144 | flag) | |
145 | if it is possible to write a value of at least "1" without blocking. | |
146 | .IP * | |
20e5fb78 MK |
147 | If an overflow of the counter value was detected, |
148 | then | |
8b428090 | 149 | .BR select (2) |
20e5fb78 | 150 | indicates the file descriptor as being both readable and writable, and |
8b428090 | 151 | .BR poll (2) |
20e5fb78 | 152 | returns a |
8b428090 | 153 | .B POLLERR |
20e5fb78 | 154 | event. |
8b428090 MK |
155 | As noted above, |
156 | .BR write (2) | |
157 | can never overflow the counter. | |
158 | However an overflow can occur if 2^64 | |
159 | eventfd "signal posts" were performed by the KAIO | |
160 | subsystem (theoretically possible, but practically unlikely). | |
161 | If an overflow has occurred, then | |
162 | .BR read (2) | |
163 | will return that maximum | |
164 | .I uint64_t | |
165 | value (i.e., 0xffffffffffffffff). | |
166 | .RE | |
167 | .IP | |
168 | The eventfd file descriptor also supports the other file-descriptor | |
169 | multiplexing APIs: | |
170 | .BR pselect (2), | |
171 | .BR ppoll (2), | |
172 | and | |
173 | .BR epoll (7). | |
174 | .TP | |
175 | .BR close (2) | |
176 | When the file descriptor is no longer required it should be closed. | |
177 | When all file descriptors associated with the same eventfd object | |
178 | have been closed, the resources for object are freed by the kernel. | |
179 | .PP | |
180 | A copy of the file descriptor created by | |
181 | .BR eventfd () | |
182 | is inherited by the child produced by | |
183 | .BR fork (2). | |
184 | The duplicate file descriptor is associated with the same | |
185 | eventfd object. | |
186 | File descriptors created by | |
187 | .BR eventfd () | |
188 | are preserved across | |
189 | .BR execve (2). | |
190 | .SH "RETURN VALUE" | |
191 | On success, | |
192 | .BR eventfd () | |
193 | returns a new eventfd file descriptor. | |
194 | On error, \-1 is returned and | |
195 | .I errno | |
196 | is set to indicate the error. | |
197 | .SH ERRORS | |
40725279 MK |
198 | .TP |
199 | .B EINVAL | |
8b428090 | 200 | .I flags |
e64b5482 MK |
201 | is invalid; |
202 | or, in Linux 2.6.26 or earlier, | |
203 | .I flags | |
eba72288 | 204 | is non-zero. |
8b428090 MK |
205 | .TP |
206 | .B EMFILE | |
207 | The per-process limit on open file descriptors has been reached. | |
208 | .TP | |
209 | .B ENFILE | |
210 | The system-wide limit on the total number of open files has been | |
211 | reached. | |
212 | .TP | |
213 | .B ENODEV | |
214 | .\" Note from Davide: | |
215 | .\" The ENODEV error is basically never going to happen if | |
216 | .\" the kernel boots correctly. That error happen only if during | |
217 | .\" the kernel initialization, some error occur in the anonymous | |
218 | .\" inode source initialization. | |
219 | Could not mount (internal) anonymous inode device. | |
220 | .TP | |
221 | .B ENOMEM | |
222 | There was insufficient memory to create a new | |
223 | eventfd file descriptor. | |
224 | .SH VERSIONS | |
225 | .BR eventfd () | |
226 | is available on Linux since kernel 2.6.22. | |
227 | Working support is provided in glibc since version 2.8. | |
228 | .\" eventfd() is in glibc 2.7, but reportedly does not build | |
e64b5482 MK |
229 | The |
230 | .BR eventfd2 () | |
231 | system call (see NOTES) is available on Linux since kernel 2.6.27. | |
8b428090 MK |
232 | .SH CONFORMING TO |
233 | .BR eventfd () | |
e64b5482 | 234 | and |
2aa3fb2d | 235 | .BR eventfd2 () |
e64b5482 | 236 | are Linux-specific. |
8b428090 MK |
237 | .SH NOTES |
238 | Applications can use an eventfd file descriptor instead of a pipe (see | |
239 | .BR pipe (2)) | |
240 | in all cases where a pipe is used simply to signal events. | |
241 | The kernel overhead of an eventfd file descriptor | |
242 | is much lower than that of a pipe, | |
243 | and only one file descriptor is | |
244 | required (versus the two required for a pipe). | |
245 | ||
246 | When used in the kernel, an eventfd | |
247 | file descriptor can provide a kernel-userspace bridge allowing, | |
248 | for example, functionalities like KAIO (kernel AIO) | |
249 | .\" or eventually syslets/threadlets | |
250 | to signal to a file descriptor that some operation is complete. | |
251 | ||
252 | A key point about an eventfd file descriptor is that it can be | |
253 | monitored just like any other file descriptor using | |
254 | .BR select (2), | |
255 | .BR poll (2), | |
256 | or | |
257 | .BR epoll (7). | |
258 | This means that an application can simultaneously monitor the | |
259 | readiness of "traditional" files and the readiness of other | |
260 | kernel mechanisms that support the eventfd interface. | |
261 | (Without the | |
262 | .BR eventfd () | |
263 | interface, these mechanisms could not be multiplexed via | |
264 | .BR select (2), | |
265 | .BR poll (2), | |
266 | or | |
267 | .BR epoll (7).) | |
e64b5482 MK |
268 | .SS Underlying Linux system calls |
269 | There are two underlying Linux system calls: | |
270 | .BR eventfd () | |
271 | and the more recent | |
272 | .BR eventfd2 (). | |
273 | The former system call does not implement a | |
274 | .I flags | |
275 | argument. | |
276 | The latter system call implements the | |
277 | .I flags | |
278 | values described above. | |
279 | .\" FIXME as at 2008-10-10, the glibc snapshot doesn't seem | |
280 | .\" to make use of eventfd2() yet. Check later to see what | |
281 | .\" glibc version adds support for this syscall. | |
282 | The glibc wrapper function will use | |
283 | .BR eventfd2 () | |
284 | where it is available. | |
8b428090 MK |
285 | .SS Additional glibc features |
286 | The GNU C library defines an additional type, | |
287 | and two functions that attempt to abstract some of the details of | |
288 | reading and writing on an eventfd file descriptor: | |
289 | .in +4n | |
290 | .nf | |
291 | ||
292 | typedef uint64_t eventfd_t; | |
293 | ||
ffaecbc1 MK |
294 | int eventfd_read(int fd, eventfd_t *value); |
295 | int eventfd_write(int fd, eventfd_t value); | |
8b428090 MK |
296 | .fi |
297 | .in | |
298 | ||
299 | The functions perform the read and write operations on an | |
300 | eventfd file descriptor, | |
301 | returning 0 if the correct number of bytes was transferred, | |
302 | or \-1 otherwise. | |
303 | .SH EXAMPLE | |
304 | .PP | |
305 | The following program creates an eventfd file descriptor | |
306 | and then forks to create a child process. | |
307 | While the parent briefly sleeps, | |
308 | the child writes each of the integers supplied in the program's | |
309 | command-line arguments to the eventfd file descriptor. | |
310 | When the parent has finished sleeping, | |
311 | it reads from the eventfd file descriptor. | |
312 | ||
313 | The following shell session shows a sample run of the program: | |
314 | .in +4n | |
315 | .nf | |
316 | ||
b43a3b30 | 317 | .RB "$" " ./a.out 1 2 4 7 14" |
8b428090 MK |
318 | Child writing 1 to efd |
319 | Child writing 2 to efd | |
320 | Child writing 4 to efd | |
321 | Child writing 7 to efd | |
322 | Child writing 14 to efd | |
323 | Child completed write loop | |
324 | Parent about to read | |
325 | Parent read 28 (0x1c) from efd | |
326 | .fi | |
1c32ee47 | 327 | .in |
9c330504 | 328 | .SS Program source |
d84d0300 | 329 | \& |
8b428090 | 330 | .nf |
8b428090 MK |
331 | #include <sys/eventfd.h> |
332 | #include <unistd.h> | |
333 | #include <stdlib.h> | |
334 | #include <stdio.h> | |
335 | #include <stdint.h> /* Definition of uint64_t */ | |
336 | ||
337 | #define handle_error(msg) \\ | |
338 | do { perror(msg); exit(EXIT_FAILURE); } while (0) | |
339 | ||
340 | int | |
341 | main(int argc, char *argv[]) | |
342 | { | |
343 | int efd, j; | |
344 | uint64_t u; | |
345 | ssize_t s; | |
346 | ||
347 | if (argc < 2) { | |
348 | fprintf(stderr, "Usage: %s <num>...\\n", argv[0]); | |
349 | exit(EXIT_FAILURE); | |
350 | } | |
351 | ||
352 | efd = eventfd(0, 0); | |
353 | if (efd == \-1) | |
354 | handle_error("eventfd"); | |
355 | ||
356 | switch (fork()) { | |
357 | case 0: | |
358 | for (j = 1; j < argc; j++) { | |
359 | printf("Child writing %s to efd\\n", argv[j]); | |
360 | u = strtoull(argv[j], NULL, 0); | |
361 | /* strtoull() allows various bases */ | |
362 | s = write(efd, &u, sizeof(uint64_t)); | |
363 | if (s != sizeof(uint64_t)) | |
364 | handle_error("write"); | |
365 | } | |
366 | printf("Child completed write loop\\n"); | |
367 | ||
368 | exit(EXIT_SUCCESS); | |
369 | ||
370 | default: | |
371 | sleep(2); | |
372 | ||
373 | printf("Parent about to read\\n"); | |
374 | s = read(efd, &u, sizeof(uint64_t)); | |
375 | if (s != sizeof(uint64_t)) | |
376 | handle_error("read"); | |
377 | printf("Parent read %llu (0x%llx) from efd\\n", | |
378 | (unsigned long long) u, (unsigned long long) u); | |
379 | exit(EXIT_SUCCESS); | |
380 | ||
381 | case \-1: | |
382 | handle_error("fork"); | |
383 | } | |
384 | } | |
385 | .fi | |
386 | .SH "SEE ALSO" | |
387 | .BR futex (2), | |
388 | .BR pipe (2), | |
389 | .BR poll (2), | |
390 | .BR read (2), | |
391 | .BR select (2), | |
392 | .BR signalfd (2), | |
393 | .BR timerfd_create (2), | |
394 | .BR write (2), | |
395 | .BR epoll (7), | |
396 | .BR sem_overview (7) |