]> git.ipfire.org Git - thirdparty/man-pages.git/blob - man2/select.2
dist.mk, All pages: .TH: Generate date at 'make dist'
[thirdparty/man-pages.git] / man2 / select.2
1 .\" This manpage is copyright (C) 1992 Drew Eckhardt,
2 .\" copyright (C) 1995 Michael Shields,
3 .\" copyright (C) 2001 Paul Sheer,
4 .\" copyright (C) 2006, 2019 Michael Kerrisk <mtk.manpages@gmail.com>
5 .\"
6 .\" SPDX-License-Identifier: Linux-man-pages-copyleft
7 .\"
8 .\" Modified 1993-07-24 by Rik Faith <faith@cs.unc.edu>
9 .\" Modified 1995-05-18 by Jim Van Zandt <jrv@vanzandt.mv.com>
10 .\" Sun Feb 11 14:07:00 MET 1996 Martin Schulze <joey@linux.de>
11 .\" * layout slightly modified
12 .\"
13 .\" Modified Mon Oct 21 23:05:29 EDT 1996 by Eric S. Raymond <esr@thyrsus.com>
14 .\" Modified Thu Feb 24 01:41:09 CET 2000 by aeb
15 .\" Modified Thu Feb 9 22:32:09 CET 2001 by bert hubert <ahu@ds9a.nl>, aeb
16 .\" Modified Mon Nov 11 14:35:00 PST 2002 by Ben Woodard <ben@zork.net>
17 .\" 2005-03-11, mtk, modified pselect() text (it is now a system
18 .\" call in 2.6.16.
19 .\"
20 .TH SELECT 2 (date) "Linux man-pages (unreleased)"
21 .SH NAME
22 select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO, fd_set \-
23 synchronous I/O multiplexing
24 .SH LIBRARY
25 Standard C library
26 .RI ( libc ", " \-lc )
27 .SH SYNOPSIS
28 .nf
29 .B #include <sys/select.h>
30 .PP
31 .BR typedef " /* ... */ " fd_set;
32 .PP
33 .BI "int select(int " nfds ", fd_set *restrict " readfds ,
34 .BI " fd_set *restrict " writefds ", fd_set *restrict " exceptfds ,
35 .BI " struct timeval *restrict " timeout );
36 .PP
37 .BI "void FD_CLR(int " fd ", fd_set *" set );
38 .BI "int FD_ISSET(int " fd ", fd_set *" set );
39 .BI "void FD_SET(int " fd ", fd_set *" set );
40 .BI "void FD_ZERO(fd_set *" set );
41 .PP
42 .BI "int pselect(int " nfds ", fd_set *restrict " readfds ,
43 .BI " fd_set *restrict " writefds ", fd_set *restrict " exceptfds ,
44 .BI " const struct timespec *restrict " timeout ,
45 .BI " const sigset_t *restrict " sigmask );
46 .fi
47 .PP
48 .RS -4
49 Feature Test Macro Requirements for glibc (see
50 .BR feature_test_macros (7)):
51 .RE
52 .PP
53 .BR pselect ():
54 .nf
55 _POSIX_C_SOURCE >= 200112L
56 .fi
57 .SH DESCRIPTION
58 .BR "WARNING" :
59 .BR select ()
60 can monitor only file descriptors numbers that are less than
61 .B FD_SETSIZE
62 (1024)\(eman unreasonably low limit for many modern applications\(emand
63 this limitation will not change.
64 All modern applications should instead use
65 .BR poll (2)
66 or
67 .BR epoll (7),
68 which do not suffer this limitation.
69 .PP
70 .BR select ()
71 allows a program to monitor multiple file descriptors,
72 waiting until one or more of the file descriptors become "ready"
73 for some class of I/O operation (e.g., input possible).
74 A file descriptor is considered ready if it is possible to
75 perform a corresponding I/O operation (e.g.,
76 .BR read (2),
77 or a sufficiently small
78 .BR write (2))
79 without blocking.
80 .\"
81 .SS fd_set
82 A structure type that can represent a set of file descriptors.
83 According to POSIX,
84 the maximum number of file descriptors in an
85 .I fd_set
86 structure is the value of the macro
87 .BR FD_SETSIZE .
88 .\"
89 .SS File descriptor sets
90 The principal arguments of
91 .BR select ()
92 are three "sets" of file descriptors (declared with the type
93 .IR fd_set ),
94 which allow the caller to wait for three classes of events
95 on the specified set of file descriptors.
96 Each of the
97 .I fd_set
98 arguments may be specified as NULL if no file descriptors are
99 to be watched for the corresponding class of events.
100 .PP
101 .BR "Note well" :
102 Upon return, each of the file descriptor sets is modified in place
103 to indicate which file descriptors are currently "ready".
104 Thus, if using
105 .BR select ()
106 within a loop, the sets \fImust be reinitialized\fP before each call.
107 .PP
108 The contents of a file descriptor set can be manipulated
109 using the following macros:
110 .TP
111 .BR FD_ZERO ()
112 This macro clears (removes all file descriptors from)
113 .IR set .
114 It should be employed as the first step in initializing a file descriptor set.
115 .TP
116 .BR FD_SET ()
117 This macro adds the file descriptor
118 .I fd
119 to
120 .IR set .
121 Adding a file descriptor that is already present in the set is a no-op,
122 and does not produce an error.
123 .TP
124 .BR FD_CLR ()
125 This macro removes the file descriptor
126 .I fd
127 from
128 .IR set .
129 Removing a file descriptor that is not present in the set is a no-op,
130 and does not produce an error.
131 .TP
132 .BR FD_ISSET ()
133 .BR select ()
134 modifies the contents of the sets according to the rules
135 described below.
136 After calling
137 .BR select (),
138 the
139 .BR FD_ISSET ()
140 macro
141 can be used to test if a file descriptor is still present in a set.
142 .BR FD_ISSET ()
143 returns nonzero if the file descriptor
144 .I fd
145 is present in
146 .IR set ,
147 and zero if it is not.
148 .\"
149 .SS Arguments
150 The arguments of
151 .BR select ()
152 are as follows:
153 .TP
154 .I readfds
155 The file descriptors in this set are watched to see if they are
156 ready for reading.
157 A file descriptor is ready for reading if a read operation will not
158 block; in particular, a file descriptor is also ready on end-of-file.
159 .IP
160 After
161 .BR select ()
162 has returned, \fIreadfds\fP will be
163 cleared of all file descriptors except for those that are ready for reading.
164 .TP
165 .I writefds
166 The file descriptors in this set are watched to see if they are
167 ready for writing.
168 A file descriptor is ready for writing if a write operation will not block.
169 However, even if a file descriptor indicates as writable,
170 a large write may still block.
171 .IP
172 After
173 .BR select ()
174 has returned, \fIwritefds\fP will be
175 cleared of all file descriptors except for those that are ready for writing.
176 .TP
177 .I exceptfds
178 The file descriptors in this set are watched for "exceptional conditions".
179 For examples of some exceptional conditions, see the discussion of
180 .B POLLPRI
181 in
182 .BR poll (2).
183 .IP
184 After
185 .BR select ()
186 has returned,
187 \fIexceptfds\fP will be cleared of all file descriptors except for those
188 for which an exceptional condition has occurred.
189 .TP
190 .I nfds
191 This argument should be set to the highest-numbered file descriptor in any
192 of the three sets, plus 1.
193 The indicated file descriptors in each set are checked, up to this limit
194 (but see BUGS).
195 .TP
196 .I timeout
197 The
198 .I timeout
199 argument is a
200 .I timeval
201 structure (shown below) that specifies the interval that
202 .BR select ()
203 should block waiting for a file descriptor to become ready.
204 The call will block until either:
205 .RS
206 .IP \(bu 2
207 a file descriptor becomes ready;
208 .IP \(bu
209 the call is interrupted by a signal handler; or
210 .IP \(bu
211 the timeout expires.
212 .RE
213 .IP
214 Note that the
215 .I timeout
216 interval will be rounded up to the system clock granularity,
217 and kernel scheduling delays mean that the blocking interval
218 may overrun by a small amount.
219 .IP
220 If both fields of the
221 .I timeval
222 structure are zero, then
223 .BR select ()
224 returns immediately.
225 (This is useful for polling.)
226 .IP
227 If
228 .I timeout
229 is specified as NULL,
230 .BR select ()
231 blocks indefinitely waiting for a file descriptor to become ready.
232 .\"
233 .SS pselect()
234 The
235 .BR pselect ()
236 system call allows an application to safely wait until either
237 a file descriptor becomes ready or until a signal is caught.
238 .PP
239 The operation of
240 .BR select ()
241 and
242 .BR pselect ()
243 is identical, other than these three differences:
244 .IP \(bu 2
245 .BR select ()
246 uses a timeout that is a
247 .I struct timeval
248 (with seconds and microseconds), while
249 .BR pselect ()
250 uses a
251 .I struct timespec
252 (with seconds and nanoseconds).
253 .IP \(bu
254 .BR select ()
255 may update the
256 .I timeout
257 argument to indicate how much time was left.
258 .BR pselect ()
259 does not change this argument.
260 .IP \(bu
261 .BR select ()
262 has no
263 .I sigmask
264 argument, and behaves as
265 .BR pselect ()
266 called with NULL
267 .IR sigmask .
268 .PP
269 .I sigmask
270 is a pointer to a signal mask (see
271 .BR sigprocmask (2));
272 if it is not NULL, then
273 .BR pselect ()
274 first replaces the current signal mask by the one pointed to by
275 .IR sigmask ,
276 then does the "select" function, and then restores the original
277 signal mask.
278 (If
279 .I sigmask
280 is NULL,
281 the signal mask is not modified during the
282 .BR pselect ()
283 call.)
284 .PP
285 Other than the difference in the precision of the
286 .I timeout
287 argument, the following
288 .BR pselect ()
289 call:
290 .PP
291 .in +4n
292 .EX
293 ready = pselect(nfds, &readfds, &writefds, &exceptfds,
294 timeout, &sigmask);
295 .EE
296 .in
297 .PP
298 is equivalent to
299 .I atomically
300 executing the following calls:
301 .PP
302 .in +4n
303 .EX
304 sigset_t origmask;
305
306 pthread_sigmask(SIG_SETMASK, &sigmask, &origmask);
307 ready = select(nfds, &readfds, &writefds, &exceptfds, timeout);
308 pthread_sigmask(SIG_SETMASK, &origmask, NULL);
309 .EE
310 .in
311 .PP
312 The reason that
313 .BR pselect ()
314 is needed is that if one wants to wait for either a signal
315 or for a file descriptor to become ready, then
316 an atomic test is needed to prevent race conditions.
317 (Suppose the signal handler sets a global flag and
318 returns.
319 Then a test of this global flag followed by a call of
320 .BR select ()
321 could hang indefinitely if the signal arrived just after the test
322 but just before the call.
323 By contrast,
324 .BR pselect ()
325 allows one to first block signals, handle the signals that have come in,
326 then call
327 .BR pselect ()
328 with the desired
329 .IR sigmask ,
330 avoiding the race.)
331 .SS The timeout
332 The
333 .I timeout
334 argument for
335 .BR select ()
336 is a structure of the following type:
337 .PP
338 .in +4n
339 .EX
340 struct timeval {
341 time_t tv_sec; /* seconds */
342 suseconds_t tv_usec; /* microseconds */
343 };
344 .EE
345 .in
346 .PP
347 The corresponding argument for
348 .BR pselect ()
349 is a
350 .BR timespec (3)
351 structure.
352 .PP
353 On Linux,
354 .BR select ()
355 modifies
356 .I timeout
357 to reflect the amount of time not slept; most other implementations
358 do not do this.
359 (POSIX.1 permits either behavior.)
360 This causes problems both when Linux code which reads
361 .I timeout
362 is ported to other operating systems, and when code is ported to Linux
363 that reuses a \fIstruct timeval\fP for multiple
364 .BR select ()s
365 in a loop without reinitializing it.
366 Consider
367 .I timeout
368 to be undefined after
369 .BR select ()
370 returns.
371 .\" .PP - it is rumored that:
372 .\" On BSD, when a timeout occurs, the file descriptor bits are not changed.
373 .\" - it is certainly true that:
374 .\" Linux follows SUSv2 and sets the bit masks to zero upon a timeout.
375 .SH RETURN VALUE
376 On success,
377 .BR select ()
378 and
379 .BR pselect ()
380 return the number of file descriptors contained in the three returned
381 descriptor sets (that is, the total number of bits that are set in
382 .IR readfds ,
383 .IR writefds ,
384 .IR exceptfds ).
385 The return value may be zero if the timeout expired before any
386 file descriptors became ready.
387 .PP
388 On error, \-1 is returned, and
389 .I errno
390 is set to indicate the error;
391 the file descriptor sets are unmodified,
392 and
393 .I timeout
394 becomes undefined.
395 .SH ERRORS
396 .TP
397 .B EBADF
398 An invalid file descriptor was given in one of the sets.
399 (Perhaps a file descriptor that was already closed,
400 or one on which an error has occurred.)
401 However, see BUGS.
402 .TP
403 .B EINTR
404 A signal was caught; see
405 .BR signal (7).
406 .TP
407 .B EINVAL
408 .I nfds
409 is negative or exceeds the
410 .B RLIMIT_NOFILE
411 resource limit (see
412 .BR getrlimit (2)).
413 .TP
414 .B EINVAL
415 The value contained within
416 .I timeout
417 is invalid.
418 .TP
419 .B ENOMEM
420 Unable to allocate memory for internal tables.
421 .SH VERSIONS
422 .BR pselect ()
423 was added to Linux in kernel 2.6.16.
424 Prior to this,
425 .BR pselect ()
426 was emulated in glibc (but see BUGS).
427 .SH STANDARDS
428 .BR select ()
429 conforms to POSIX.1-2001, POSIX.1-2008, and
430 4.4BSD
431 .RB ( select ()
432 first appeared in 4.2BSD).
433 Generally portable to/from
434 non-BSD systems supporting clones of the BSD socket layer (including
435 System\ V variants).
436 However, note that the System\ V variant typically
437 sets the timeout variable before returning, but the BSD variant does not.
438 .PP
439 .BR pselect ()
440 is defined in POSIX.1g, and in
441 POSIX.1-2001 and POSIX.1-2008.
442 .PP
443 .B fd_set
444 is defined in POSIX.1-2001 and later.
445 .SH NOTES
446 The following header also provides the
447 .I fd_set
448 type:
449 .IR <sys/time.h> .
450 .PP
451 An
452 .I fd_set
453 is a fixed size buffer.
454 Executing
455 .BR FD_CLR ()
456 or
457 .BR FD_SET ()
458 with a value of
459 .I fd
460 that is negative or is equal to or larger than
461 .B FD_SETSIZE
462 will result
463 in undefined behavior.
464 Moreover, POSIX requires
465 .I fd
466 to be a valid file descriptor.
467 .PP
468 The operation of
469 .BR select ()
470 and
471 .BR pselect ()
472 is not affected by the
473 .B O_NONBLOCK
474 flag.
475 .PP
476 On some other UNIX systems,
477 .\" Darwin, according to a report by Jeremy Sequoia, relayed by Josh Triplett
478 .BR select ()
479 can fail with the error
480 .B EAGAIN
481 if the system fails to allocate kernel-internal resources, rather than
482 .B ENOMEM
483 as Linux does.
484 POSIX specifies this error for
485 .BR poll (2),
486 but not for
487 .BR select ().
488 Portable programs may wish to check for
489 .B EAGAIN
490 and loop, just as with
491 .BR EINTR .
492 .\"
493 .SS The self-pipe trick
494 On systems that lack
495 .BR pselect (),
496 reliable (and more portable) signal trapping can be achieved
497 using the self-pipe trick.
498 In this technique,
499 a signal handler writes a byte to a pipe whose other end
500 is monitored by
501 .BR select ()
502 in the main program.
503 (To avoid possibly blocking when writing to a pipe that may be full
504 or reading from a pipe that may be empty,
505 nonblocking I/O is used when reading from and writing to the pipe.)
506 .\"
507 .SS Emulating usleep(3)
508 Before the advent of
509 .BR usleep (3),
510 some code employed a call to
511 .BR select ()
512 with all three sets empty,
513 .I nfds
514 zero, and a non-NULL
515 .I timeout
516 as a fairly portable way to sleep with subsecond precision.
517 .\"
518 .SS Correspondence between select() and poll() notifications
519 Within the Linux kernel source,
520 .\" fs/select.c
521 we find the following definitions which show the correspondence
522 between the readable, writable, and exceptional condition notifications of
523 .BR select ()
524 and the event notifications provided by
525 .BR poll (2)
526 and
527 .BR epoll (7):
528 .PP
529 .in +4n
530 .EX
531 #define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
532 EPOLLHUP | EPOLLERR)
533 /* Ready for reading */
534 #define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT |
535 EPOLLERR)
536 /* Ready for writing */
537 #define POLLEX_SET (EPOLLPRI)
538 /* Exceptional condition */
539 .EE
540 .in
541 .\"
542 .SS Multithreaded applications
543 If a file descriptor being monitored by
544 .BR select ()
545 is closed in another thread, the result is unspecified.
546 On some UNIX systems,
547 .BR select ()
548 unblocks and returns, with an indication that the file descriptor is ready
549 (a subsequent I/O operation will likely fail with an error,
550 unless another process reopens the file descriptor between the time
551 .BR select ()
552 returned and the I/O operation is performed).
553 On Linux (and some other systems),
554 closing the file descriptor in another thread has no effect on
555 .BR select ().
556 In summary, any application that relies on a particular behavior
557 in this scenario must be considered buggy.
558 .\"
559 .SS C library/kernel differences
560 The Linux kernel allows file descriptor sets of arbitrary size,
561 determining the length of the sets to be checked from the value of
562 .IR nfds .
563 However, in the glibc implementation, the
564 .I fd_set
565 type is fixed in size.
566 See also BUGS.
567 .PP
568 The
569 .BR pselect ()
570 interface described in this page is implemented by glibc.
571 The underlying Linux system call is named
572 .BR pselect6 ().
573 This system call has somewhat different behavior from the glibc
574 wrapper function.
575 .PP
576 The Linux
577 .BR pselect6 ()
578 system call modifies its
579 .I timeout
580 argument.
581 However, the glibc wrapper function hides this behavior
582 by using a local variable for the timeout argument that
583 is passed to the system call.
584 Thus, the glibc
585 .BR pselect ()
586 function does not modify its
587 .I timeout
588 argument;
589 this is the behavior required by POSIX.1-2001.
590 .PP
591 The final argument of the
592 .BR pselect6 ()
593 system call is not a
594 .I "sigset_t\ *"
595 pointer, but is instead a structure of the form:
596 .PP
597 .in +4n
598 .EX
599 struct {
600 const kernel_sigset_t *ss; /* Pointer to signal set */
601 size_t ss_len; /* Size (in bytes) of object
602 pointed to by \(aqss\(aq */
603 };
604 .EE
605 .in
606 .PP
607 This allows the system call to obtain both
608 a pointer to the signal set and its size,
609 while allowing for the fact that most architectures
610 support a maximum of 6 arguments to a system call.
611 See
612 .BR sigprocmask (2)
613 for a discussion of the difference between the kernel and libc
614 notion of the signal set.
615 .\"
616 .SS Historical glibc details
617 Glibc 2.0 provided an incorrect version of
618 .BR pselect ()
619 that did not take a
620 .I sigmask
621 argument.
622 .PP
623 In glibc versions 2.1 to 2.2.1,
624 one must define
625 .B _GNU_SOURCE
626 in order to obtain the declaration of
627 .BR pselect ()
628 from
629 .IR <sys/select.h> .
630 .SH BUGS
631 POSIX allows an implementation to define an upper limit,
632 advertised via the constant
633 .BR FD_SETSIZE ,
634 on the range of file descriptors that can be specified
635 in a file descriptor set.
636 The Linux kernel imposes no fixed limit, but the glibc implementation makes
637 .I fd_set
638 a fixed-size type, with
639 .B FD_SETSIZE
640 defined as 1024, and the
641 .BR FD_* ()
642 macros operating according to that limit.
643 To monitor file descriptors greater than 1023, use
644 .BR poll (2)
645 or
646 .BR epoll (7)
647 instead.
648 .PP
649 The implementation of the
650 .I fd_set
651 arguments as value-result arguments is a design error that is avoided in
652 .BR poll (2)
653 and
654 .BR epoll (7).
655 .PP
656 According to POSIX,
657 .BR select ()
658 should check all specified file descriptors in the three file descriptor sets,
659 up to the limit
660 .IR nfds\-1 .
661 However, the current implementation ignores any file descriptor in
662 these sets that is greater than the maximum file descriptor number
663 that the process currently has open.
664 According to POSIX, any such file descriptor that is specified in one
665 of the sets should result in the error
666 .BR EBADF .
667 .PP
668 Starting with version 2.1, glibc provided an emulation of
669 .BR pselect ()
670 that was implemented using
671 .BR sigprocmask (2)
672 and
673 .BR select ().
674 This implementation remained vulnerable to the very race condition that
675 .BR pselect ()
676 was designed to prevent.
677 Modern versions of glibc use the (race-free)
678 .BR pselect ()
679 system call on kernels where it is provided.
680 .PP
681 On Linux,
682 .BR select ()
683 may report a socket file descriptor as "ready for reading", while
684 nevertheless a subsequent read blocks.
685 This could for example
686 happen when data has arrived but upon examination has the wrong
687 checksum and is discarded.
688 There may be other circumstances
689 in which a file descriptor is spuriously reported as ready.
690 .\" Stevens discusses a case where accept can block after select
691 .\" returns successfully because of an intervening RST from the client.
692 Thus it may be safer to use
693 .B O_NONBLOCK
694 on sockets that should not block.
695 .\" Maybe the kernel should have returned EIO in such a situation?
696 .PP
697 On Linux,
698 .BR select ()
699 also modifies
700 .I timeout
701 if the call is interrupted by a signal handler (i.e., the
702 .B EINTR
703 error return).
704 This is not permitted by POSIX.1.
705 The Linux
706 .BR pselect ()
707 system call has the same behavior,
708 but the glibc wrapper hides this behavior by internally copying the
709 .I timeout
710 to a local variable and passing that variable to the system call.
711 .SH EXAMPLES
712 .\" SRC BEGIN (select.c)
713 .EX
714 #include <stdio.h>
715 #include <stdlib.h>
716 #include <sys/select.h>
717
718 int
719 main(void)
720 {
721 int retval;
722 fd_set rfds;
723 struct timeval tv;
724
725 /* Watch stdin (fd 0) to see when it has input. */
726
727 FD_ZERO(&rfds);
728 FD_SET(0, &rfds);
729
730 /* Wait up to five seconds. */
731
732 tv.tv_sec = 5;
733 tv.tv_usec = 0;
734
735 retval = select(1, &rfds, NULL, NULL, &tv);
736 /* Don\(aqt rely on the value of tv now! */
737
738 if (retval == \-1)
739 perror("select()");
740 else if (retval)
741 printf("Data is available now.\en");
742 /* FD_ISSET(0, &rfds) will be true. */
743 else
744 printf("No data within five seconds.\en");
745
746 exit(EXIT_SUCCESS);
747 }
748 .EE
749 .\" SRC END
750 .SH SEE ALSO
751 .BR accept (2),
752 .BR connect (2),
753 .BR poll (2),
754 .BR read (2),
755 .BR recv (2),
756 .BR restart_syscall (2),
757 .BR send (2),
758 .BR sigprocmask (2),
759 .BR write (2),
760 .BR timespec (3),
761 .BR epoll (7),
762 .BR time (7)
763 .PP
764 For a tutorial with discussion and examples, see
765 .BR select_tut (2).