]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/select.2
select.2: Show correspondence between select() and poll() readiness notifications
[thirdparty/man-pages.git] / man2 / select.2
1 .\" This manpage is copyright (C) 1992 Drew Eckhardt,
2 .\" copyright (C) 1995 Michael Shields.
3 .\"
4 .\" %%%LICENSE_START(VERBATIM)
5 .\" Permission is granted to make and distribute verbatim copies of this
6 .\" manual provided the copyright notice and this permission notice are
7 .\" preserved on all copies.
8 .\"
9 .\" Permission is granted to copy and distribute modified versions of this
10 .\" manual under the conditions for verbatim copying, provided that the
11 .\" entire resulting derived work is distributed under the terms of a
12 .\" permission notice identical to this one.
13 .\"
14 .\" Since the Linux kernel and libraries are constantly changing, this
15 .\" manual page may be incorrect or out-of-date. The author(s) assume no
16 .\" responsibility for errors or omissions, or for damages resulting from
17 .\" the use of the information contained herein. The author(s) may not
18 .\" have taken the same level of care in the production of this manual,
19 .\" which is licensed free of charge, as they might when working
20 .\" professionally.
21 .\"
22 .\" Formatted or processed versions of this manual, if unaccompanied by
23 .\" the source, must acknowledge the copyright and authors of this work.
24 .\" %%%LICENSE_END
25 .\"
26 .\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
27 .\" Modified 1995-05-18 by Jim Van Zandt <jrv@vanzandt.mv.com>
28 .\" Sun Feb 11 14:07:00 MET 1996 Martin Schulze <joey@linux.de>
29 .\" * layout slightly modified
30 .\"
31 .\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
32 .\" Modified Thu Feb 24 01:41:09 CET 2000 by aeb
33 .\" Modified Thu Feb 9 22:32:09 CET 2001 by bert hubert <ahu@ds9a.nl>, aeb
34 .\" Modified Mon Nov 11 14:35:00 PST 2002 by Ben Woodard <ben@zork.net>
35 .\" 2005-03-11, mtk, modified pselect() text (it is now a system
36 .\" call in 2.6.16.
37 .\"
38 .TH SELECT 2 2017-03-13 "Linux" "Linux Programmer's Manual"
39 .SH NAME
40 select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO \-
41 synchronous I/O multiplexing
42 .SH SYNOPSIS
43 .nf
44 /* According to POSIX.1-2001, POSIX.1-2008 */
45 .br
46 .B #include <sys/select.h>
47 .sp
48 /* According to earlier standards */
49 .br
50 .B #include <sys/time.h>
51 .br
52 .B #include <sys/types.h>
53 .br
54 .B #include <unistd.h>
55 .sp
56 .BI "int select(int " nfds ", fd_set *" readfds ", fd_set *" writefds ,
57 .BI " fd_set *" exceptfds ", struct timeval *" timeout );
58 .sp
59 .BI "void FD_CLR(int " fd ", fd_set *" set );
60 .br
61 .BI "int FD_ISSET(int " fd ", fd_set *" set );
62 .br
63 .BI "void FD_SET(int " fd ", fd_set *" set );
64 .br
65 .BI "void FD_ZERO(fd_set *" set );
66 .sp
67 .B #include <sys/select.h>
68 .sp
69 .BI "int pselect(int " nfds ", fd_set *" readfds ", fd_set *" writefds ,
70 .BI " fd_set *" exceptfds ", const struct timespec *" timeout ,
71 .BI " const sigset_t *" sigmask );
72 .fi
73 .sp
74 .in -4n
75 Feature Test Macro Requirements for glibc (see
76 .BR feature_test_macros (7)):
77 .in
78 .sp
79 .BR pselect ():
80 _POSIX_C_SOURCE\ >=\ 200112L
81 .SH DESCRIPTION
82 .BR select ()
83 and
84 .BR pselect ()
85 allow a program to monitor multiple file descriptors,
86 waiting until one or more of the file descriptors become "ready"
87 for some class of I/O operation (e.g., input possible).
88 A file descriptor is considered ready if it is possible to
89 perform a corresponding I/O operation (e.g.,
90 .BR read (2)
91 without blocking, or a sufficiently small
92 .BR write (2)).
93 .PP
94 .BR select ()
95 can monitor only file descriptors numbers that are less than
96 .BR FD_SETSIZE ;
97 .BR poll (2)
98 does not have this limitation.
99 See BUGS.
100 .PP
101 The operation of
102 .BR select ()
103 and
104 .BR pselect ()
105 is identical, other than these three differences:
106 .TP
107 (i)
108 .BR select ()
109 uses a timeout that is a
110 .I struct timeval
111 (with seconds and microseconds), while
112 .BR pselect ()
113 uses a
114 .I struct timespec
115 (with seconds and nanoseconds).
116 .TP
117 (ii)
118 .BR select ()
119 may update the
120 .I timeout
121 argument to indicate how much time was left.
122 .BR pselect ()
123 does not change this argument.
124 .TP
125 (iii)
126 .BR select ()
127 has no
128 .I sigmask
129 argument, and behaves as
130 .BR pselect ()
131 called with NULL
132 .IR sigmask .
133 .PP
134 Three independent sets of file descriptors are watched.
135 Those listed in
136 .I readfds
137 will be watched to see if characters become
138 available for reading (more precisely, to see if a read will not
139 block; in particular, a file descriptor is also ready on end-of-file),
140 those in
141 .I writefds
142 will be watched to see if space is available for write (though a large
143 write may still block), and those in
144 .I exceptfds
145 will be watched for exceptions.
146 On exit, the sets are modified in place
147 to indicate which file descriptors actually changed status.
148 Each of the three file descriptor sets may be specified as NULL
149 if no file descriptors are to be watched for the corresponding class
150 of events.
151 .PP
152 Four macros are provided to manipulate the sets.
153 .BR FD_ZERO ()
154 clears a set.
155 .BR FD_SET ()
156 and
157 .BR FD_CLR ()
158 respectively add and remove a given file descriptor from a set.
159 .BR FD_ISSET ()
160 tests to see if a file descriptor is part of the set;
161 this is useful after
162 .BR select ()
163 returns.
164 .PP
165 .I nfds
166 should be set to the highest-numbered file descriptor in any
167 of the three sets, plus 1.
168 The indicated file descriptors in each set are checked, up to this limit
169 (but see BUGS).
170 .PP
171 The
172 .I timeout
173 argument specifies the interval that
174 .BR select ()
175 should block waiting for a file descriptor to become ready.
176 The call will block until either:
177 .IP * 3
178 a file descriptor becomes ready;
179 .IP *
180 the call is interrupted by a signal handler; or
181 .IP *
182 the timeout expires.
183 .PP
184 Note that the
185 .I timeout
186 interval will be rounded up to the system clock granularity,
187 and kernel scheduling delays mean that the blocking interval
188 may overrun by a small amount.
189 If both fields of the
190 .I timeval
191 structure are zero, then
192 .BR select ()
193 returns immediately.
194 (This is useful for polling.)
195 If
196 .I timeout
197 is NULL (no timeout),
198 .BR select ()
199 can block indefinitely.
200 .PP
201 .I sigmask
202 is a pointer to a signal mask (see
203 .BR sigprocmask (2));
204 if it is not NULL, then
205 .BR pselect ()
206 first replaces the current signal mask by the one pointed to by
207 .IR sigmask ,
208 then does the "select" function, and then restores the original
209 signal mask.
210 .PP
211 Other than the difference in the precision of the
212 .I timeout
213 argument, the following
214 .BR pselect ()
215 call:
216 .nf
217
218 ready = pselect(nfds, &readfds, &writefds, &exceptfds,
219 timeout, &sigmask);
220
221 .fi
222 is equivalent to
223 .I atomically
224 executing the following calls:
225 .nf
226
227 sigset_t origmask;
228
229 pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
230 ready = select(nfds, &readfds, &writefds, &exceptfds, timeout);
231 pthread_sigmask(SIG_SETMASK, &origmask, NULL);
232 .fi
233 .PP
234 The reason that
235 .BR pselect ()
236 is needed is that if one wants to wait for either a signal
237 or for a file descriptor to become ready, then
238 an atomic test is needed to prevent race conditions.
239 (Suppose the signal handler sets a global flag and
240 returns.
241 Then a test of this global flag followed by a call of
242 .BR select ()
243 could hang indefinitely if the signal arrived just after the test
244 but just before the call.
245 By contrast,
246 .BR pselect ()
247 allows one to first block signals, handle the signals that have come in,
248 then call
249 .BR pselect ()
250 with the desired
251 .IR sigmask ,
252 avoiding the race.)
253 .SS The timeout
254 The time structures involved are defined in
255 .I <sys/time.h>
256 and look like
257
258 .in +4n
259 .nf
260 struct timeval {
261 long tv_sec; /* seconds */
262 long tv_usec; /* microseconds */
263 };
264 .fi
265 .in
266
267 and
268
269 .in +4n
270 .nf
271 struct timespec {
272 long tv_sec; /* seconds */
273 long tv_nsec; /* nanoseconds */
274 };
275 .fi
276 .in
277
278 (However, see below on the POSIX.1 versions.)
279 .PP
280 Some code calls
281 .BR select ()
282 with all three sets empty,
283 .I nfds
284 zero, and a non-NULL
285 .I timeout
286 as a fairly portable way to sleep with subsecond precision.
287 .PP
288 On Linux,
289 .BR select ()
290 modifies
291 .I timeout
292 to reflect the amount of time not slept; most other implementations
293 do not do this.
294 (POSIX.1 permits either behavior.)
295 This causes problems both when Linux code which reads
296 .I timeout
297 is ported to other operating systems, and when code is ported to Linux
298 that reuses a \fIstruct timeval\fP for multiple
299 .BR select ()s
300 in a loop without reinitializing it.
301 Consider
302 .I timeout
303 to be undefined after
304 .BR select ()
305 returns.
306 .\" .PP - it is rumored that:
307 .\" On BSD, when a timeout occurs, the file descriptor bits are not changed.
308 .\" - it is certainly true that:
309 .\" Linux follows SUSv2 and sets the bit masks to zero upon a timeout.
310 .SH RETURN VALUE
311 On success,
312 .BR select ()
313 and
314 .BR pselect ()
315 return the number of file descriptors contained in the three returned
316 descriptor sets (that is, the total number of bits that are set in
317 .IR readfds ,
318 .IR writefds ,
319 .IR exceptfds )
320 which may be zero if the timeout expires before anything interesting happens.
321 On error, \-1 is returned, and
322 .I errno
323 is set to indicate the error;
324 the file descriptor sets are unmodified,
325 and
326 .I timeout
327 becomes undefined.
328 .SH ERRORS
329 .TP
330 .B EBADF
331 An invalid file descriptor was given in one of the sets.
332 (Perhaps a file descriptor that was already closed,
333 or one on which an error has occurred.)
334 However, see BUGS.
335 .TP
336 .B EINTR
337 A signal was caught; see
338 .BR signal (7).
339 .TP
340 .B EINVAL
341 .I nfds
342 is negative or exceeds the
343 .BR RLIMIT_NOFILE
344 resource limit (see
345 .BR getrlimit (2)).
346 .TP
347 .B EINVAL
348 The value contained within
349 .I timeout
350 is invalid.
351 .TP
352 .B ENOMEM
353 Unable to allocate memory for internal tables.
354 .SH VERSIONS
355 .BR pselect ()
356 was added to Linux in kernel 2.6.16.
357 Prior to this,
358 .BR pselect ()
359 was emulated in glibc (but see BUGS).
360 .SH CONFORMING TO
361 .BR select ()
362 conforms to POSIX.1-2001, POSIX.1-2008, and
363 4.4BSD
364 .RB ( select ()
365 first appeared in 4.2BSD).
366 Generally portable to/from
367 non-BSD systems supporting clones of the BSD socket layer (including
368 System\ V variants).
369 However, note that the System\ V variant typically
370 sets the timeout variable before exit, but the BSD variant does not.
371 .PP
372 .BR pselect ()
373 is defined in POSIX.1g, and in
374 POSIX.1-2001 and POSIX.1-2008.
375 .SH NOTES
376 An
377 .I fd_set
378 is a fixed size buffer.
379 Executing
380 .BR FD_CLR ()
381 or
382 .BR FD_SET ()
383 with a value of
384 .I fd
385 that is negative or is equal to or larger than
386 .B FD_SETSIZE
387 will result
388 in undefined behavior.
389 Moreover, POSIX requires
390 .I fd
391 to be a valid file descriptor.
392
393 On some other UNIX systems,
394 .\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett
395 .BR select ()
396 can fail with the error
397 .B EAGAIN
398 if the system fails to allocate kernel-internal resources, rather than
399 .B ENOMEM
400 as Linux does.
401 POSIX specifies this error for
402 .BR poll (2),
403 but not for
404 .BR select ().
405 Portable programs may wish to check for
406 .B EAGAIN
407 and loop, just as with
408 .BR EINTR .
409
410 On systems that lack
411 .BR pselect (),
412 reliable (and more portable) signal trapping can be achieved
413 using the self-pipe trick.
414 In this technique,
415 a signal handler writes a byte to a pipe whose other end
416 is monitored by
417 .BR select ()
418 in the main program.
419 (To avoid possibly blocking when writing to a pipe that may be full
420 or reading from a pipe that may be empty,
421 nonblocking I/O is used when reading from and writing to the pipe.)
422
423 Concerning the types involved, the classical situation is that
424 the two fields of a
425 .I timeval
426 structure are typed as
427 .I long
428 (as shown above), and the structure is defined in
429 .IR <sys/time.h> .
430 The POSIX.1 situation is
431
432 .in +4n
433 .nf
434 struct timeval {
435 time_t tv_sec; /* seconds */
436 suseconds_t tv_usec; /* microseconds */
437 };
438 .fi
439 .in
440
441 where the structure is defined in
442 .I <sys/select.h>
443 and the data types
444 .I time_t
445 and
446 .I suseconds_t
447 are defined in
448 .IR <sys/types.h> .
449 .LP
450 Concerning prototypes, the classical situation is that one should
451 include
452 .I <time.h>
453 for
454 .BR select ().
455 The POSIX.1 situation is that one should include
456 .I <sys/select.h>
457 for
458 .BR select ()
459 and
460 .BR pselect ().
461
462 Under glibc 2.0,
463 .I <sys/select.h>
464 gives the wrong prototype for
465 .BR pselect ().
466 Under glibc 2.1 to 2.2.1, it gives
467 .BR pselect ()
468 when
469 .B _GNU_SOURCE
470 is defined.
471 Since glibc 2.2.2, the requirements are as shown in the SYNOPSIS.
472 .\"
473 .SS Correspondence between select() and poll() notifications
474 Within the Linux kernel source,
475 .\" fs/select.c
476 we find the following definitions which show the correspondence
477 between the readable, writable, and exceptional condition notifications of
478 .BR select ()
479 and the event notifications provided by
480 .BR poll (2)
481 (and
482 .BR epoll (7)):
483
484 .nf
485 .in +4n
486 #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP |
487 POLLERR)
488 /* Ready for reading */
489 #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR)
490 /* Ready for writing */
491 #define POLLEX_SET (POLLPRI)
492 /* Exceptional condition */
493 .in
494 .fi
495 .\"
496 .SS Multithreaded applications
497 If a file descriptor being monitored by
498 .BR select ()
499 is closed in another thread, the result is unspecified.
500 On some UNIX systems,
501 .BR select ()
502 unblocks and returns, with an indication that the file descriptor is ready
503 (a subsequent I/O operation will likely fail with an error,
504 unless another the file descriptor reopened between the time
505 .BR select ()
506 returned and the I/O operations was performed).
507 On Linux (and some other systems),
508 closing the file descriptor in another thread has no effect on
509 .BR select ().
510 In summary, any application that relies on a particular behavior
511 in this scenario must be considered buggy.
512 .\"
513 .SS C library/kernel differences
514 The Linux kernel allows file descriptor sets of arbitrary size,
515 determining the length of the sets to be checked from the value of
516 .IR nfds .
517 However, in the glibc implementation, the
518 .IR fd_set
519 type is fixed in size.
520 See also BUGS.
521
522 The
523 .BR pselect ()
524 interface described in this page is implemented by glibc.
525 The underlying Linux system call is named
526 .BR pselect6 ().
527 This system call has somewhat different behavior from the glibc
528 wrapper function.
529
530 The Linux
531 .BR pselect6 ()
532 system call modifies its
533 .I timeout
534 argument.
535 However, the glibc wrapper function hides this behavior
536 by using a local variable for the timeout argument that
537 is passed to the system call.
538 Thus, the glibc
539 .BR pselect ()
540 function does not modify its
541 .I timeout
542 argument;
543 this is the behavior required by POSIX.1-2001.
544
545 The final argument of the
546 .BR pselect6 ()
547 system call is not a
548 .I "sigset_t\ *"
549 pointer, but is instead a structure of the form:
550 .in +4
551 .nf
552
553 struct {
554 const kernel_sigset_t *ss; /* Pointer to signal set */
555 size_t ss_len; /* Size (in bytes) of object
556 pointed to by 'ss' */
557 };
558
559 .fi
560 .in
561 This allows the system call to obtain both
562 a pointer to the signal set and its size,
563 while allowing for the fact that most architectures
564 support a maximum of 6 arguments to a system call.
565 See
566 .BR sigprocmask (2)
567 for a discussion of the difference between the kernel and libc
568 notion of the signal set.
569 .SH BUGS
570 POSIX allows an implementation to define an upper limit,
571 advertised via the constant
572 .BR FD_SETSIZE ,
573 on the range of file descriptors that can be specified
574 in a file descriptor set.
575 The Linux kernel imposes no fixed limit, but the glibc implementation makes
576 .IR fd_set
577 a fixed-size type, with
578 .BR FD_SETSIZE
579 defined as 1024, and the
580 .BR FD_* ()
581 macros operating according to that limit.
582 To monitor file descriptors greater than 1023, use
583 .BR poll (2)
584 instead.
585
586 According to POSIX,
587 .BR select ()
588 should check all specified file descriptors in the three file descriptor sets,
589 up to the limit
590 .IR nfds\-1 .
591 However, the current implementation ignores any file descriptor in
592 these sets that is greater than the maximum file descriptor number
593 that the process currently has open.
594 According to POSIX, any such file descriptor that is specified in one
595 of the sets should result in the error
596 .BR EBADF .
597
598 Glibc 2.0 provided a version of
599 .BR pselect ()
600 that did not take a
601 .I sigmask
602 argument.
603
604 Starting with version 2.1, glibc provided an emulation of
605 .BR pselect ()
606 that was implemented using
607 .BR sigprocmask (2)
608 and
609 .BR select ().
610 This implementation remained vulnerable to the very race condition that
611 .BR pselect ()
612 was designed to prevent.
613 Modern versions of glibc use the (race-free)
614 .BR pselect ()
615 system call on kernels where it is provided.
616
617 Under Linux,
618 .BR select ()
619 may report a socket file descriptor as "ready for reading", while
620 nevertheless a subsequent read blocks.
621 This could for example
622 happen when data has arrived but upon examination has wrong
623 checksum and is discarded.
624 There may be other circumstances
625 in which a file descriptor is spuriously reported as ready.
626 .\" Stevens discusses a case where accept can block after select
627 .\" returns successfully because of an intervening RST from the client.
628 Thus it may be safer to use
629 .B O_NONBLOCK
630 on sockets that should not block.
631 .\" Maybe the kernel should have returned EIO in such a situation?
632
633 On Linux,
634 .BR select ()
635 also modifies
636 .I timeout
637 if the call is interrupted by a signal handler (i.e., the
638 .B EINTR
639 error return).
640 This is not permitted by POSIX.1.
641 The Linux
642 .BR pselect ()
643 system call has the same behavior,
644 but the glibc wrapper hides this behavior by internally copying the
645 .I timeout
646 to a local variable and passing that variable to the system call.
647 .SH EXAMPLE
648 .nf
649 #include <stdio.h>
650 #include <stdlib.h>
651 #include <sys/time.h>
652 #include <sys/types.h>
653 #include <unistd.h>
654
655 int
656 main(void)
657 {
658 fd_set rfds;
659 struct timeval tv;
660 int retval;
661
662 /* Watch stdin (fd 0) to see when it has input. */
663
664 FD_ZERO(&rfds);
665 FD_SET(0, &rfds);
666
667 /* Wait up to five seconds. */
668
669 tv.tv_sec = 5;
670 tv.tv_usec = 0;
671
672 retval = select(1, &rfds, NULL, NULL, &tv);
673 /* Don't rely on the value of tv now! */
674
675 if (retval == \-1)
676 perror("select()");
677 else if (retval)
678 printf("Data is available now.\\n");
679 /* FD_ISSET(0, &rfds) will be true. */
680 else
681 printf("No data within five seconds.\\n");
682
683 exit(EXIT_SUCCESS);
684 }
685 .fi
686 .SH SEE ALSO
687 .BR accept (2),
688 .BR connect (2),
689 .BR poll (2),
690 .BR read (2),
691 .BR recv (2),
692 .BR restart_syscall (2),
693 .BR send (2),
694 .BR sigprocmask (2),
695 .BR write (2),
696 .BR epoll (7),
697 .BR time (7)
698
699 For a tutorial with discussion and examples, see
700 .BR select_tut (2).