]>
Commit | Line | Data |
---|---|---|
fea681da MK |
1 | .\" This manpage is copyright (C) 2001 Paul Sheer. |
2 | .\" | |
3 | .\" Permission is granted to make and distribute verbatim copies of this | |
4 | .\" manual provided the copyright notice and this permission notice are | |
5 | .\" preserved on all copies. | |
6 | .\" | |
7 | .\" Permission is granted to copy and distribute modified versions of this | |
8 | .\" manual under the conditions for verbatim copying, provided that the | |
9 | .\" entire resulting derived work is distributed under the terms of a | |
10 | .\" permission notice identical to this one. | |
c13182ef | 11 | .\" |
fea681da MK |
12 | .\" Since the Linux kernel and libraries are constantly changing, this |
13 | .\" manual page may be incorrect or out-of-date. The author(s) assume no | |
14 | .\" responsibility for errors or omissions, or for damages resulting from | |
15 | .\" the use of the information contained herein. The author(s) may not | |
16 | .\" have taken the same level of care in the production of this manual, | |
17 | .\" which is licensed free of charge, as they might when working | |
18 | .\" professionally. | |
c13182ef | 19 | .\" |
fea681da MK |
20 | .\" Formatted or processed versions of this manual, if unaccompanied by |
21 | .\" the source, must acknowledge the copyright and authors of this work. | |
22 | .\" | |
23 | .\" very minor changes, aeb | |
24 | .\" | |
305a0578 | 25 | .\" Modified 5 June 2002, Michael Kerrisk <mtk-manpages@gmx.net> |
c8e01c78 | 26 | .\" 2006-05-13, mtk, removed much material that is redundant with select.2 |
c13182ef | 27 | .\" various other changes |
fea681da | 28 | .\" |
c8e01c78 | 29 | .TH SELECT_TUT 2 2006-05-13 "Linux" "Linux Programmer's Manual" |
fea681da | 30 | .SH NAME |
c13182ef | 31 | select, pselect, FD_CLR, FD_ISSET, FD_SET, FD_ZERO \- |
35478399 | 32 | synchronous I/O multiplexing |
fea681da | 33 | .SH SYNOPSIS |
b9208776 MK |
34 | .nf |
35 | /* According to POSIX.1-2001 */ | |
36 | .br | |
37 | .B #include <sys/select.h> | |
38 | .sp | |
39 | /* According to earlier standards */ | |
40 | .br | |
fea681da MK |
41 | .B #include <sys/time.h> |
42 | .br | |
43 | .B #include <sys/types.h> | |
44 | .br | |
45 | .B #include <unistd.h> | |
46 | .sp | |
c13182ef | 47 | \fBint select(int \fInfds\fB, fd_set *\fIreadfds\fB, fd_set *\fIwritefds\fB, |
b9208776 | 48 | fd_set *\fIexceptfds\fB, struct timeval *\fItimeout\fB); |
fea681da | 49 | .sp |
b9208776 | 50 | .BI "void FD_CLR(int " fd ", fd_set *" set ); |
fea681da | 51 | .br |
b9208776 | 52 | .BI "int FD_ISSET(int " fd ", fd_set *" set ); |
fea681da | 53 | .br |
b9208776 | 54 | .BI "void FD_SET(int " fd ", fd_set *" set ); |
fea681da | 55 | .br |
b9208776 MK |
56 | .BI "void FD_ZERO(fd_set *" set ); |
57 | .sp | |
58 | .B #define _XOPEN_SOURCE 600 | |
59 | .B #include <sys/select.h> | |
60 | .sp | |
c13182ef MK |
61 | \fBint pselect(int \fInfds\fB, fd_set *\fIreadfds\fB, fd_set *\fIwritefds\fB, |
62 | fd_set *\fIexceptfds\fB, const struct timespec *\fItimeout\fB, | |
b9208776 | 63 | const sigset_t *\fIsigmask\fB); |
fea681da MK |
64 | .fi |
65 | .SH DESCRIPTION | |
66 | ||
c13182ef | 67 | \fBselect\fP() (or \fBpselect\fP()) is the pivot function of |
8e5f22f8 | 68 | most C programs that |
c13182ef | 69 | handle more than one simultaneous file descriptor (or socket handle) |
8e5f22f8 | 70 | in an efficient |
c13182ef MK |
71 | manner. |
72 | Its principal arguments are three arrays of file descriptors: | |
73 | \fIreadfds\fP, \fIwritefds\fP, and \fIexceptfds\fP. | |
74 | The way that | |
e511ffb6 | 75 | \fBselect\fP() is usually used is to block while waiting for a "change of |
c13182ef MK |
76 | status" on one or more of the file descriptors. |
77 | A "change of status" is | |
fea681da MK |
78 | when more characters become available from the file descriptor, \fIor\fP |
79 | when space becomes available within the kernel's internal buffers for | |
80 | more to be written to the file descriptor, \fIor\fP when a file | |
81 | descriptor goes into error (in the case of a socket or pipe this is | |
82 | when the other end of the connection is closed). | |
83 | ||
e511ffb6 | 84 | In summary, \fBselect\fP() just watches multiple file descriptors, |
fea681da MK |
85 | and is the standard Unix call to do so. |
86 | ||
87 | The arrays of file descriptors are called \fIfile descriptor sets\fP. | |
88 | Each set is declared as type \fBfd_set\fP, and its contents can be | |
e511ffb6 MK |
89 | altered with the macros \fBFD_CLR\fP(), \fBFD_ISSET\fP(), \fBFD_SET\fP(), and |
90 | \fBFD_ZERO\fP(). \fBFD_ZERO\fP() is usually the first function to be used on | |
c13182ef MK |
91 | a newly declared set. |
92 | Thereafter, the individual file descriptors that | |
e511ffb6 MK |
93 | you are interested in can be added one by one with \fBFD_SET\fP(). |
94 | \fBselect\fP() modifies the contents of the sets according to the rules | |
95 | described below; after calling \fBselect\fP() you can test if your file | |
96 | descriptor is still present in the set with the \fBFD_ISSET\fP() macro. | |
97 | \fBFD_ISSET\fP() returns non-zero if the descriptor is present and zero if | |
c8e01c78 | 98 | it is not. \fBFD_CLR\fP() removes a file descriptor from the set. |
fea681da MK |
99 | .SH ARGUMENTS |
100 | .TP | |
101 | \fIreadfds\fP | |
102 | This set is watched to see if data is available for reading from any of | |
c13182ef MK |
103 | its file descriptors. |
104 | After \fBselect\fP() has returned, \fIreadfds\fP will be | |
fea681da | 105 | cleared of all file descriptors except for those file descriptors that |
63aa9df0 MK |
106 | are immediately available for reading with a \fBrecv\fP() (for sockets) or |
107 | \fBread\fP() (for pipes, files, and sockets) call. | |
fea681da MK |
108 | .TP |
109 | \fIwritefds\fP | |
110 | This set is watched to see if there is space to write data to any of | |
c13182ef | 111 | its file descriptors. |
c8e01c78 | 112 | After \fBselect\fP() has returned, \fIwritefds\fP will be |
fea681da | 113 | cleared of all file descriptors except for those file descriptors that |
63aa9df0 MK |
114 | are immediately available for writing with a \fBsend\fP() (for sockets) or |
115 | \fBwrite\fP() (for pipes, files, and sockets) call. | |
fea681da MK |
116 | .TP |
117 | \fIexceptfds\fP | |
118 | This set is watched for exceptions or errors on any of the file | |
c13182ef MK |
119 | descriptors. |
120 | However, that is actually just a rumor. | |
121 | How you use | |
122 | \fIexceptfds\fP is to watch for \fIout\-of\-band\fP (OOB) data. | |
123 | OOB data | |
fea681da | 124 | is data sent on a socket using the \fBMSG_OOB\fP flag, and hence |
c13182ef MK |
125 | \fIexceptfds\fP only really applies to sockets. |
126 | See \fBrecv\fP(2) and | |
127 | \fBsend\fP(2) about this. | |
128 | After \fBselect\fP() has returned, | |
fea681da | 129 | \fIexceptfds\fP will be cleared of all file descriptors except for those |
c13182ef MK |
130 | descriptors that are available for reading OOB data. |
131 | You can only ever | |
63aa9df0 | 132 | read one byte of OOB data though (which is done with \fBrecv\fP()), and |
3382bd94 | 133 | writing OOB data (done with \fBsend\fP()) can be done at any time and will |
c13182ef MK |
134 | not block. |
135 | Hence there is no need for a fourth set to check if a socket | |
fea681da MK |
136 | is available for writing OOB data. |
137 | .TP | |
138 | \fInfds\fP | |
139 | This is an integer one more than the maximum of any file descriptor in | |
c13182ef MK |
140 | any of the sets. |
141 | In other words, while you are busy adding file descriptors | |
fea681da MK |
142 | to your sets, you must calculate the maximum integer value of all of |
143 | them, then increment this value by one, and then pass this as \fInfds\fP to | |
e511ffb6 | 144 | \fBselect\fP(). |
fea681da MK |
145 | .TP |
146 | \fIutimeout\fP | |
147 | .RS | |
e511ffb6 | 148 | This is the longest time \fBselect\fP() must wait before returning, even |
c13182ef MK |
149 | if nothing interesting happened. |
150 | If this value is passed as NULL, | |
e511ffb6 MK |
151 | then \fBselect\fP() blocks indefinitely waiting for an event. |
152 | \fIutimeout\fP can be set to zero seconds, which causes \fBselect\fP() to | |
c13182ef MK |
153 | return immediately. |
154 | The structure \fBstruct timeval\fP is defined as, | |
fea681da MK |
155 | .PP |
156 | .nf | |
157 | struct timeval { | |
158 | time_t tv_sec; /* seconds */ | |
159 | long tv_usec; /* microseconds */ | |
160 | }; | |
161 | .fi | |
162 | .RE | |
163 | .TP | |
164 | \fIntimeout\fP | |
165 | .RS | |
c8e01c78 | 166 | This argument has the same meaning as \fIutimeout\fP but \fIstruct timespec\fP |
fea681da MK |
167 | has nanosecond precision as follows, |
168 | .PP | |
169 | .nf | |
170 | struct timespec { | |
171 | long tv_sec; /* seconds */ | |
172 | long tv_nsec; /* nanoseconds */ | |
173 | }; | |
174 | .fi | |
175 | .RE | |
176 | .TP | |
177 | \fIsigmask\fP | |
c13182ef MK |
178 | This argument holds a set of signals to allow while performing a |
179 | \fBpselect\fP() call (see \fBsigaddset\fP(3) and \fBsigprocmask\fP(2)). | |
35478399 | 180 | It can be passed |
fea681da | 181 | as NULL, in which case it does not modify the set of allowed signals on |
c13182ef MK |
182 | entry and exit to the function. |
183 | It will then behave just like \fBselect\fP(). | |
fea681da | 184 | .SH COMBINING SIGNAL AND DATA EVENTS |
e511ffb6 | 185 | \fBpselect\fP() must be used if you are waiting for a signal as well as |
c13182ef MK |
186 | data from a file descriptor. |
187 | Programs that receive signals as events | |
188 | normally use the signal handler only to raise a global flag. | |
189 | The global | |
fea681da | 190 | flag will indicate that the event must be processed in the main loop of |
c13182ef MK |
191 | the program. |
192 | A signal will cause the \fBselect\fP() (or \fBpselect\fP()) | |
193 | call to return with \fIerrno\fP set to \fBEINTR\fP. | |
194 | This behavior is | |
fea681da | 195 | essential so that signals can be processed in the main loop of the |
c13182ef MK |
196 | program, otherwise \fBselect\fP() would block indefinitely. |
197 | Now, somewhere | |
198 | in the main loop will be a conditional to check the global flag. | |
199 | So we | |
fea681da | 200 | must ask: what if a signal arrives after the conditional, but before the |
e511ffb6 | 201 | \fBselect\fP() call? The answer is that \fBselect\fP() would block |
c13182ef MK |
202 | indefinitely, even though an event is actually pending. |
203 | This race | |
204 | condition is solved by the \fBpselect\fP() call. | |
205 | This call can be used to | |
fea681da | 206 | mask out signals that are not to be received except within the |
c13182ef MK |
207 | \fBpselect\fP() call. |
208 | For instance, let us say that the event in question | |
209 | was the exit of a child process. | |
210 | Before the start of the main loop, we | |
211 | would block \fBSIGCHLD\fP using \fBsigprocmask\fP(). | |
212 | Our \fBpselect\fP() | |
213 | call would enable \fBSIGCHLD\fP by using the virgin signal mask. | |
214 | Our | |
fea681da MK |
215 | program would look like: |
216 | .PP | |
217 | .nf | |
218 | int child_events = 0; | |
219 | ||
c13182ef MK |
220 | void |
221 | child_sig_handler(int x) | |
cf0a9ace | 222 | { |
fea681da | 223 | child_events++; |
cf0a9ace | 224 | signal(SIGCHLD, child_sig_handler); |
fea681da MK |
225 | } |
226 | ||
c13182ef MK |
227 | int |
228 | main(int argc, char **argv) | |
cf0a9ace | 229 | { |
fea681da MK |
230 | sigset_t sigmask, orig_sigmask; |
231 | ||
cf0a9ace MK |
232 | sigemptyset(&sigmask); |
233 | sigaddset(&sigmask, SIGCHLD); | |
234 | sigprocmask(SIG_BLOCK, &sigmask, &orig_sigmask); | |
fea681da | 235 | |
cf0a9ace | 236 | signal(SIGCHLD, child_sig_handler); |
fea681da MK |
237 | |
238 | for (;;) { /* main loop */ | |
2bc2f479 | 239 | for (; child_events > 0; child_events\-\-) { |
fea681da MK |
240 | /* do event work here */ |
241 | } | |
cf0a9ace | 242 | r = pselect(nfds, &rd, &wr, &er, 0, &orig_sigmask); |
fea681da MK |
243 | |
244 | /* main body of program */ | |
245 | } | |
246 | } | |
247 | .fi | |
fea681da | 248 | .SH PRACTICAL |
3382bd94 | 249 | So what is the point of \fBselect\fP()? Can't I just read and write to my |
c13182ef | 250 | descriptors whenever I want? |
c8e01c78 | 251 | The point of \fBselect\fP() is that it watches |
fea681da | 252 | multiple descriptors at the same time and properly puts the process to |
c13182ef MK |
253 | sleep if there is no activity. |
254 | It does this while enabling you to handle | |
255 | multiple simultaneous pipes and sockets. | |
256 | Unix programmers often find | |
c8e01c78 | 257 | themselves in a position where they have to handle I/O from more than one |
c13182ef MK |
258 | file descriptor where the data flow may be intermittent. |
259 | If you were to | |
3382bd94 | 260 | merely create a sequence of \fBread\fP() and \fBwrite\fP() calls, you would |
fea681da MK |
261 | find that one of your calls may block waiting for data from/to a file |
262 | descriptor, while another file descriptor is unused though available | |
e511ffb6 | 263 | for data. \fBselect\fP() efficiently copes with this situation. |
fea681da | 264 | |
c13182ef | 265 | A simple example of the use of |
c8e01c78 MK |
266 | .BR select () |
267 | can be found in the | |
268 | .BR select (2) | |
269 | manual page. | |
fea681da | 270 | .SH PORT FORWARDING EXAMPLE |
fea681da | 271 | Here is an example that better demonstrates the true utility of |
e511ffb6 | 272 | \fBselect\fP(). |
dde7d1a9 | 273 | The listing below is a TCP forwarding program that forwards |
fea681da MK |
274 | from one TCP port to another. |
275 | .PP | |
276 | .nf | |
277 | #include <stdlib.h> | |
278 | #include <stdio.h> | |
279 | #include <unistd.h> | |
280 | #include <sys/time.h> | |
281 | #include <sys/types.h> | |
282 | #include <string.h> | |
283 | #include <signal.h> | |
284 | #include <sys/socket.h> | |
285 | #include <netinet/in.h> | |
286 | #include <arpa/inet.h> | |
287 | #include <errno.h> | |
288 | ||
289 | static int forward_port; | |
290 | ||
291 | #undef max | |
292 | #define max(x,y) ((x) > (y) ? (x) : (y)) | |
293 | ||
c13182ef MK |
294 | static int |
295 | listen_socket(int listen_port) | |
cf0a9ace | 296 | { |
fea681da MK |
297 | struct sockaddr_in a; |
298 | int s; | |
299 | int yes; | |
cf0a9ace MK |
300 | if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { |
301 | perror("socket"); | |
2bc2f479 | 302 | return \-1; |
fea681da MK |
303 | } |
304 | yes = 1; | |
cf0a9ace MK |
305 | if (setsockopt(s, SOL_SOCKET, SO_REUSEADDR, |
306 | (char *) &yes, sizeof(yes)) < 0) { | |
307 | perror("setsockopt"); | |
308 | close(s); | |
2bc2f479 | 309 | return \-1; |
fea681da | 310 | } |
cf0a9ace MK |
311 | memset(&a, 0, sizeof(a)); |
312 | a.sin_port = htons(listen_port); | |
fea681da | 313 | a.sin_family = AF_INET; |
cf0a9ace MK |
314 | if (bind(s, (struct sockaddr *) &a, sizeof(a)) < 0) { |
315 | perror("bind"); | |
316 | close(s); | |
2bc2f479 | 317 | return \-1; |
fea681da | 318 | } |
cf0a9ace MK |
319 | printf("accepting connections on port %d\\n", listen_port); |
320 | listen(s, 10); | |
fea681da MK |
321 | return s; |
322 | } | |
323 | ||
c13182ef MK |
324 | static int |
325 | connect_socket(int connect_port, char *address) | |
cf0a9ace | 326 | { |
fea681da MK |
327 | struct sockaddr_in a; |
328 | int s; | |
cf0a9ace MK |
329 | if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) { |
330 | perror("socket"); | |
331 | close(s); | |
2bc2f479 | 332 | return \-1; |
fea681da MK |
333 | } |
334 | ||
cf0a9ace MK |
335 | memset(&a, 0, sizeof(a)); |
336 | a.sin_port = htons(connect_port); | |
fea681da MK |
337 | a.sin_family = AF_INET; |
338 | ||
cf0a9ace MK |
339 | if (!inet_aton(address, (struct in_addr *) &a.sin_addr.s_addr)) { |
340 | perror("bad IP address format"); | |
341 | close(s); | |
2bc2f479 | 342 | return \-1; |
fea681da MK |
343 | } |
344 | ||
cf0a9ace MK |
345 | if (connect(s, (struct sockaddr *) &a, sizeof(a)) < 0) { |
346 | perror("connect()"); | |
347 | shutdown(s, SHUT_RDWR); | |
348 | close(s); | |
2bc2f479 | 349 | return \-1; |
fea681da MK |
350 | } |
351 | return s; | |
352 | } | |
353 | ||
354 | #define SHUT_FD1 { \\ | |
355 | if (fd1 >= 0) { \\ | |
cf0a9ace MK |
356 | shutdown(fd1, SHUT_RDWR); \\ |
357 | close(fd1); \\ | |
2bc2f479 | 358 | fd1 = \-1; \\ |
fea681da MK |
359 | } \\ |
360 | } | |
361 | ||
362 | #define SHUT_FD2 { \\ | |
363 | if (fd2 >= 0) { \\ | |
cf0a9ace MK |
364 | shutdown(fd2, SHUT_RDWR); \\ |
365 | close(fd2); \\ | |
2bc2f479 | 366 | fd2 = \-1; \\ |
fea681da MK |
367 | } \\ |
368 | } | |
369 | ||
370 | #define BUF_SIZE 1024 | |
371 | ||
c13182ef MK |
372 | int |
373 | main(int argc, char **argv) | |
cf0a9ace | 374 | { |
fea681da | 375 | int h; |
2bc2f479 | 376 | int fd1 = \-1, fd2 = \-1; |
fea681da MK |
377 | char buf1[BUF_SIZE], buf2[BUF_SIZE]; |
378 | int buf1_avail, buf1_written; | |
379 | int buf2_avail, buf2_written; | |
380 | ||
381 | if (argc != 4) { | |
cf0a9ace MK |
382 | fprintf(stderr, |
383 | "Usage\\n\\tfwd <listen-port> " | |
384 | "<forward-to-port> <forward-to-ip-address>\\n"); | |
385 | exit(1); | |
fea681da MK |
386 | } |
387 | ||
cf0a9ace | 388 | signal(SIGPIPE, SIG_IGN); |
fea681da | 389 | |
cf0a9ace | 390 | forward_port = atoi(argv[2]); |
fea681da | 391 | |
cf0a9ace | 392 | h = listen_socket(atoi(argv[1])); |
fea681da | 393 | if (h < 0) |
cf0a9ace | 394 | exit(1); |
fea681da MK |
395 | |
396 | for (;;) { | |
397 | int r, nfds = 0; | |
398 | fd_set rd, wr, er; | |
cf0a9ace MK |
399 | FD_ZERO(&rd); |
400 | FD_ZERO(&wr); | |
401 | FD_ZERO(&er); | |
402 | FD_SET(h, &rd); | |
403 | nfds = max(nfds, h); | |
fea681da | 404 | if (fd1 > 0 && buf1_avail < BUF_SIZE) { |
cf0a9ace MK |
405 | FD_SET(fd1, &rd); |
406 | nfds = max(nfds, fd1); | |
fea681da MK |
407 | } |
408 | if (fd2 > 0 && buf2_avail < BUF_SIZE) { | |
cf0a9ace MK |
409 | FD_SET(fd2, &rd); |
410 | nfds = max(nfds, fd2); | |
fea681da MK |
411 | } |
412 | if (fd1 > 0 | |
2bc2f479 | 413 | && buf2_avail \- buf2_written > 0) { |
cf0a9ace MK |
414 | FD_SET(fd1, &wr); |
415 | nfds = max(nfds, fd1); | |
fea681da MK |
416 | } |
417 | if (fd2 > 0 | |
2bc2f479 | 418 | && buf1_avail \- buf1_written > 0) { |
cf0a9ace MK |
419 | FD_SET(fd2, &wr); |
420 | nfds = max(nfds, fd2); | |
fea681da MK |
421 | } |
422 | if (fd1 > 0) { | |
cf0a9ace MK |
423 | FD_SET(fd1, &er); |
424 | nfds = max(nfds, fd1); | |
fea681da MK |
425 | } |
426 | if (fd2 > 0) { | |
cf0a9ace MK |
427 | FD_SET(fd2, &er); |
428 | nfds = max(nfds, fd2); | |
fea681da MK |
429 | } |
430 | ||
cf0a9ace | 431 | r = select(nfds + 1, &rd, &wr, &er, NULL); |
fea681da | 432 | |
2bc2f479 | 433 | if (r == \-1 && errno == EINTR) |
fea681da MK |
434 | continue; |
435 | if (r < 0) { | |
cf0a9ace MK |
436 | perror("select()"); |
437 | exit(1); | |
fea681da | 438 | } |
cf0a9ace | 439 | if (FD_ISSET(h, &rd)) { |
fea681da MK |
440 | unsigned int l; |
441 | struct sockaddr_in client_address; | |
cf0a9ace MK |
442 | memset(&client_address, 0, l = sizeof(client_address)); |
443 | r = accept(h, (struct sockaddr *) &client_address, &l); | |
fea681da | 444 | if (r < 0) { |
cf0a9ace | 445 | perror("accept()"); |
fea681da MK |
446 | } else { |
447 | SHUT_FD1; | |
448 | SHUT_FD2; | |
449 | buf1_avail = buf1_written = 0; | |
450 | buf2_avail = buf2_written = 0; | |
451 | fd1 = r; | |
452 | fd2 = | |
cf0a9ace | 453 | connect_socket(forward_port, argv[3]); |
fea681da MK |
454 | if (fd2 < 0) { |
455 | SHUT_FD1; | |
456 | } else | |
cf0a9ace MK |
457 | printf("connect from %s\\n", |
458 | inet_ntoa(client_address.sin_addr)); | |
fea681da MK |
459 | } |
460 | } | |
461 | /* NB: read oob data before normal reads */ | |
462 | if (fd1 > 0) | |
cf0a9ace | 463 | if (FD_ISSET(fd1, &er)) { |
fea681da MK |
464 | char c; |
465 | errno = 0; | |
cf0a9ace | 466 | r = recv(fd1, &c, 1, MSG_OOB); |
fea681da MK |
467 | if (r < 1) { |
468 | SHUT_FD1; | |
469 | } else | |
cf0a9ace | 470 | send(fd2, &c, 1, MSG_OOB); |
fea681da MK |
471 | } |
472 | if (fd2 > 0) | |
cf0a9ace | 473 | if (FD_ISSET(fd2, &er)) { |
fea681da MK |
474 | char c; |
475 | errno = 0; | |
cf0a9ace | 476 | r = recv(fd2, &c, 1, MSG_OOB); |
fea681da MK |
477 | if (r < 1) { |
478 | SHUT_FD1; | |
479 | } else | |
cf0a9ace | 480 | send(fd1, &c, 1, MSG_OOB); |
fea681da MK |
481 | } |
482 | if (fd1 > 0) | |
cf0a9ace | 483 | if (FD_ISSET(fd1, &rd)) { |
fea681da | 484 | r = |
cf0a9ace | 485 | read(fd1, buf1 + buf1_avail, |
2bc2f479 | 486 | BUF_SIZE \- buf1_avail); |
fea681da MK |
487 | if (r < 1) { |
488 | SHUT_FD1; | |
489 | } else | |
490 | buf1_avail += r; | |
491 | } | |
492 | if (fd2 > 0) | |
cf0a9ace | 493 | if (FD_ISSET(fd2, &rd)) { |
fea681da | 494 | r = |
cf0a9ace | 495 | read(fd2, buf2 + buf2_avail, |
2bc2f479 | 496 | BUF_SIZE \- buf2_avail); |
fea681da MK |
497 | if (r < 1) { |
498 | SHUT_FD2; | |
499 | } else | |
500 | buf2_avail += r; | |
501 | } | |
502 | if (fd1 > 0) | |
cf0a9ace | 503 | if (FD_ISSET(fd1, &wr)) { |
fea681da | 504 | r = |
cf0a9ace MK |
505 | write(fd1, buf2 + buf2_written, |
506 | buf2_avail \- buf2_written); | |
fea681da MK |
507 | if (r < 1) { |
508 | SHUT_FD1; | |
509 | } else | |
510 | buf2_written += r; | |
511 | } | |
512 | if (fd2 > 0) | |
cf0a9ace | 513 | if (FD_ISSET(fd2, &wr)) { |
fea681da | 514 | r = |
cf0a9ace MK |
515 | write(fd2, buf1 + buf1_written, |
516 | buf1_avail \- buf1_written); | |
fea681da MK |
517 | if (r < 1) { |
518 | SHUT_FD2; | |
519 | } else | |
520 | buf1_written += r; | |
521 | } | |
522 | /* check if write data has caught read data */ | |
523 | if (buf1_written == buf1_avail) | |
524 | buf1_written = buf1_avail = 0; | |
525 | if (buf2_written == buf2_avail) | |
526 | buf2_written = buf2_avail = 0; | |
527 | /* one side has closed the connection, keep | |
528 | writing to the other side until empty */ | |
cf0a9ace | 529 | if (fd1 < 0 && buf1_avail \- buf1_written == 0) { |
fea681da MK |
530 | SHUT_FD2; |
531 | } | |
cf0a9ace | 532 | if (fd2 < 0 && buf2_avail \- buf2_written == 0) { |
fea681da MK |
533 | SHUT_FD1; |
534 | } | |
535 | } | |
536 | return 0; | |
537 | } | |
538 | .fi | |
539 | .PP | |
540 | The above program properly forwards most kinds of TCP connections | |
c13182ef MK |
541 | including OOB signal data transmitted by \fBtelnet\fP servers. |
542 | It | |
fea681da | 543 | handles the tricky problem of having data flow in both directions |
c13182ef MK |
544 | simultaneously. |
545 | You might think it more efficient to use a \fBfork\fP() | |
546 | call and devote a thread to each stream. | |
547 | This becomes more tricky than | |
548 | you might suspect. | |
549 | Another idea is to set non-blocking I/O using an | |
550 | \fBioctl\fP() call. | |
551 | This also has its problems because you end up having | |
fea681da MK |
552 | to have inefficient timeouts. |
553 | ||
554 | The program does not handle more than one simultaneous connection at a | |
555 | time, although it could easily be extended to do this with a linked list | |
c13182ef MK |
556 | of buffers \(em one for each connection. |
557 | At the moment, new | |
fea681da | 558 | connections cause the current connection to be dropped. |
fea681da | 559 | .SH SELECT LAW |
e511ffb6 | 560 | Many people who try to use \fBselect\fP() come across behavior that is |
fea681da | 561 | difficult to understand and produces non-portable or borderline |
c13182ef MK |
562 | results. |
563 | For instance, the above program is carefully written not to | |
fea681da | 564 | block at any point, even though it does not set its file descriptors to |
c13182ef MK |
565 | non-blocking mode at all (see \fBioctl\fP(2)). |
566 | It is easy to introduce | |
e511ffb6 | 567 | subtle errors that will remove the advantage of using \fBselect\fP(), |
fea681da | 568 | hence I will present a list of essentials to watch for when using the |
e511ffb6 | 569 | \fBselect\fP() call. |
fea681da MK |
570 | .TP |
571 | \fB1.\fP | |
c13182ef MK |
572 | You should always try to use \fBselect\fP() without a timeout. |
573 | Your program | |
574 | should have nothing to do if there is no data available. | |
575 | Code that | |
c8e01c78 | 576 | depends on timeouts is not usually portable and is difficult to debug. |
fea681da MK |
577 | .TP |
578 | \fB2.\fP | |
579 | The value \fInfds\fP must be properly calculated for efficiency as | |
580 | explained above. | |
581 | .TP | |
582 | \fB3.\fP | |
583 | No file descriptor must be added to any set if you do not intend | |
e511ffb6 | 584 | to check its result after the \fBselect\fP() call, and respond |
c13182ef MK |
585 | appropriately. |
586 | See next rule. | |
fea681da MK |
587 | .TP |
588 | \fB4.\fP | |
e511ffb6 | 589 | After \fBselect\fP() returns, all file descriptors in all sets |
c13182ef | 590 | should be checked to see if they are ready. |
c8e01c78 MK |
591 | .\" mtk, May 2006: the following isn't really true. |
592 | .\" Any file descriptor that is available | |
593 | .\" for writing \fImust\fP be written to, and any file descriptor | |
594 | .\" available for reading \fImust\fP be read, etc. | |
fea681da MK |
595 | .TP |
596 | \fB5.\fP | |
63aa9df0 MK |
597 | The functions \fBread\fP(), \fBrecv\fP(), \fBwrite\fP(), and |
598 | \fBsend\fP() do \fInot\fP necessarily read/write the full amount of data | |
c13182ef MK |
599 | that you have requested. |
600 | If they do read/write the full amount, its | |
601 | because you have a low traffic load and a fast stream. | |
602 | This is not | |
603 | always going to be the case. | |
604 | You should cope with the case of your | |
fea681da MK |
605 | functions only managing to send or receive a single byte. |
606 | .TP | |
607 | \fB6.\fP | |
608 | Never read/write only in single bytes at a time unless your are really | |
c13182ef MK |
609 | sure that you have a small amount of data to process. |
610 | It is extremely | |
fea681da MK |
611 | inefficient not to read/write as much data as you can buffer each time. |
612 | The buffers in the example above are 1024 bytes although they could | |
c8e01c78 | 613 | easily be made larger. |
fea681da MK |
614 | .TP |
615 | \fB7.\fP | |
63aa9df0 | 616 | The functions \fBread\fP(), \fBrecv\fP(), \fBwrite\fP(), and |
c8e01c78 | 617 | \fBsend\fP() as well as the \fBselect\fP() call can return \-1 with |
c13182ef MK |
618 | .I errno |
619 | set to \fBEINTR\fP, | |
620 | or with | |
c8e01c78 MK |
621 | .I errno |
622 | set to \fBEAGAIN\fP (\fBEWOULDBLOCK\fP). | |
623 | These results must be properly managed (not done properly | |
c13182ef MK |
624 | above). |
625 | If your program is not going to receive any signals then | |
626 | it is unlikely you will get \fBEINTR\fP. | |
627 | If your program does not | |
628 | set non-blocking I/O, you will not get \fBEAGAIN\fP. | |
629 | Nonetheless | |
fea681da MK |
630 | you should still cope with these errors for completeness. |
631 | .TP | |
632 | \fB8.\fP | |
63aa9df0 | 633 | Never call \fBread\fP(), \fBrecv\fP(), \fBwrite\fP(), or \fBsend\fP() |
fea681da MK |
634 | with a buffer length of zero. |
635 | .TP | |
636 | \fB9.\fP | |
c8e01c78 MK |
637 | If the functions \fBread\fP(), |
638 | \fBrecv\fP(), \fBwrite\fP(), and \fBsend\fP() fail | |
c13182ef MK |
639 | with errors other than those listed in \fB7.\fP, |
640 | or one of the input functions returns 0, indicating end of file, | |
c8e01c78 MK |
641 | then you should \fInot\fP pass that descriptor to |
642 | .BR select () | |
c13182ef | 643 | again. |
c8e01c78 | 644 | In the above example, |
fea681da MK |
645 | I close the descriptor immediately, and then set it to \-1 |
646 | to prevent it being included in a set. | |
647 | .TP | |
648 | \fB10.\fP | |
e511ffb6 MK |
649 | The timeout value must be initialized with each new call to \fBselect\fP(), |
650 | since some operating systems modify the structure. \fBpselect\fP() | |
fea681da MK |
651 | however does not modify its timeout structure. |
652 | .TP | |
653 | \fB11.\fP | |
654 | I have heard that the Windows socket layer does not cope with OOB data | |
c13182ef MK |
655 | properly. |
656 | It also does not cope with \fBselect\fP() calls when no file | |
657 | descriptors are set at all. | |
658 | Having no file descriptors set is a useful | |
fea681da MK |
659 | way to sleep the process with sub-second precision by using the timeout. |
660 | (See further on.) | |
fea681da | 661 | .SH USLEEP EMULATION |
3382bd94 | 662 | On systems that do not have a \fBusleep\fP() function, you can call |
e511ffb6 | 663 | \fBselect\fP() with a finite timeout and no file descriptors as |
fea681da MK |
664 | follows: |
665 | .PP | |
666 | .nf | |
667 | struct timeval tv; | |
668 | tv.tv_sec = 0; | |
669 | tv.tv_usec = 200000; /* 0.2 seconds */ | |
cf0a9ace | 670 | select(0, NULL, NULL, NULL, &tv); |
fea681da MK |
671 | .fi |
672 | .PP | |
d301ee6c | 673 | This is only guaranteed to work on Unix systems, however. |
fea681da | 674 | .SH RETURN VALUE |
e511ffb6 | 675 | On success, \fBselect\fP() returns the total number of file descriptors |
fea681da MK |
676 | still present in the file descriptor sets. |
677 | ||
c13182ef | 678 | If \fBselect\fP() timed out, then |
c8e01c78 MK |
679 | the return value will be zero. |
680 | The file descriptors set should be all | |
c13182ef | 681 | empty (but may not be on some systems). |
fea681da | 682 | |
dcec8eb5 | 683 | A return value of \-1 indicates an error, with \fIerrno\fP being |
c13182ef MK |
684 | set appropriately. |
685 | In the case of an error, the returned sets and | |
fea681da | 686 | the timeout struct contents are undefined and should not be used. |
e511ffb6 | 687 | \fBpselect\fP() however never modifies \fIntimeout\fP. |
fea681da MK |
688 | .SH NOTES |
689 | Generally speaking, all operating systems that support sockets, also | |
c13182ef | 690 | support \fBselect\fP(). |
c8e01c78 MK |
691 | Many types of programs become |
692 | extremely complicated without the use of | |
c13182ef | 693 | .BR select (). |
c8e01c78 | 694 | \fBselect\fP() can be used to solve |
fea681da | 695 | many problems in a portable and efficient way that naive programmers try |
c13182ef | 696 | to solve in a more complicated manner using |
c8e01c78 | 697 | threads, forking, IPCs, signals, memory sharing, and so on. |
fea681da MK |
698 | .PP |
699 | The | |
700 | .BR poll (2) | |
e511ffb6 | 701 | system call has the same functionality as \fBselect\fP(), |
c13182ef | 702 | and is somewhat more efficient when monitoring sparse |
c8e01c78 | 703 | file descriptor sets. |
c13182ef | 704 | It is nowadays widely available, |
c8e01c78 | 705 | but historically was less portable than \fBselect\fP(). |
fea681da | 706 | .PP |
c13182ef | 707 | The Linux-specific |
c8e01c78 | 708 | .BR epoll (7) |
1954b6a9 | 709 | API provides an interface that is more efficient than |
c8e01c78 MK |
710 | .BR select (2) |
711 | and | |
712 | .BR poll (2) | |
713 | when monitoring large numbers of file descriptors. | |
fea681da MK |
714 | .SH SEE ALSO |
715 | .BR accept (2), | |
716 | .BR connect (2), | |
717 | .BR ioctl (2), | |
718 | .BR poll (2), | |
719 | .BR read (2), | |
720 | .BR recv (2), | |
721 | .BR select (2), | |
722 | .BR send (2), | |
723 | .BR sigprocmask (2), | |
724 | .BR write (2), | |
725 | .BR sigaddset (3), | |
726 | .BR sigdelset (3), | |
727 | .BR sigemptyset (3), | |
728 | .BR sigfillset (3), | |
8e5f22f8 MK |
729 | .BR sigismember (3), |
730 | .BR epoll (7) | |
c8e01c78 MK |
731 | .\" .SH AUTHORS |
732 | .\" This man page was written by Paul Sheer. |