]>
Commit | Line | Data |
---|---|---|
0d3eb016 | 1 | /* Guts of both `select' and `poll' for Hurd. |
80081a0a RM |
2 | Copyright (C) 1991,92,93,94,95,96,97,98,99,2001 |
3 | Free Software Foundation, Inc. | |
0d3eb016 RM |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
0d3eb016 RM |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
0d3eb016 | 15 | |
41bdb6e2 AJ |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, write to the Free | |
18 | Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA. */ | |
0d3eb016 RM |
20 | |
21 | #include <sys/types.h> | |
22 | #include <sys/poll.h> | |
23 | #include <hurd.h> | |
24 | #include <hurd/fd.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
27 | #include <assert.h> | |
28 | #include <stdint.h> | |
29 | ||
30 | /* All user select types. */ | |
31 | #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG) | |
32 | ||
33 | /* Used to record that a particular select rpc returned. Must be distinct | |
34 | from SELECT_ALL (which better not have the high bit set). */ | |
35 | #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL) | |
36 | ||
37 | /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in | |
38 | each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not | |
39 | NULL, time out after waiting the interval specified therein. Returns | |
40 | the number of ready descriptors, or -1 for errors. */ | |
41 | int | |
42 | _hurd_select (int nfds, | |
43 | struct pollfd *pollfds, | |
44 | fd_set *readfds, fd_set *writefds, fd_set *exceptfds, | |
45 | const struct timespec *timeout, const sigset_t *sigmask) | |
46 | { | |
47 | int i; | |
48 | mach_port_t portset; | |
49 | int got; | |
50 | error_t err; | |
51 | fd_set rfds, wfds, xfds; | |
52 | int firstfd, lastfd; | |
53 | mach_msg_timeout_t to = (timeout != NULL ? | |
54 | (timeout->tv_sec * 1000 + | |
55 | timeout->tv_nsec / 1000000) : | |
56 | 0); | |
57 | struct | |
58 | { | |
59 | struct hurd_userlink ulink; | |
60 | struct hurd_fd *cell; | |
61 | mach_port_t io_port; | |
62 | int type; | |
63 | mach_port_t reply_port; | |
64 | } d[nfds]; | |
65 | sigset_t oset; | |
66 | ||
db6b51ad RM |
67 | union typeword /* Use this to avoid unkosher casts. */ |
68 | { | |
69 | mach_msg_type_t type; | |
70 | uint32_t word; | |
71 | }; | |
72 | assert (sizeof (union typeword) == sizeof (mach_msg_type_t)); | |
73 | assert (sizeof (uint32_t) == sizeof (mach_msg_type_t)); | |
74 | ||
0d3eb016 RM |
75 | if (sigmask && __sigprocmask (SIG_SETMASK, sigmask, &oset)) |
76 | return -1; | |
77 | ||
78 | if (pollfds) | |
79 | { | |
80 | /* Collect interesting descriptors from the user's `pollfd' array. | |
81 | We do a first pass that reads the user's array before taking | |
82 | any locks. The second pass then only touches our own stack, | |
83 | and gets the port references. */ | |
84 | ||
85 | for (i = 0; i < nfds; ++i) | |
86 | if (pollfds[i].fd >= 0) | |
87 | { | |
88 | int type = 0; | |
89 | if (pollfds[i].events & POLLIN) | |
90 | type |= SELECT_READ; | |
91 | if (pollfds[i].events & POLLOUT) | |
92 | type |= SELECT_WRITE; | |
93 | if (pollfds[i].events & POLLPRI) | |
94 | type |= SELECT_URG; | |
95 | ||
96 | d[i].io_port = pollfds[i].fd; | |
97 | d[i].type = type; | |
98 | } | |
99 | else | |
100 | d[i].type = 0; | |
101 | ||
102 | HURD_CRITICAL_BEGIN; | |
103 | __mutex_lock (&_hurd_dtable_lock); | |
104 | ||
105 | for (i = 0; i < nfds; ++i) | |
106 | if (d[i].type != 0) | |
107 | { | |
108 | const int fd = (int) d[i].io_port; | |
109 | ||
110 | if (fd < _hurd_dtablesize) | |
111 | { | |
112 | d[i].cell = _hurd_dtable[fd]; | |
113 | d[i].io_port = _hurd_port_get (&d[i].cell->port, &d[i].ulink); | |
114 | if (d[i].io_port != MACH_PORT_NULL) | |
115 | continue; | |
116 | } | |
117 | ||
118 | /* If one descriptor is bogus, we fail completely. */ | |
119 | while (i-- > 0) | |
120 | if (d[i].type != 0) | |
121 | _hurd_port_free (&d[i].cell->port, | |
122 | &d[i].ulink, d[i].io_port); | |
123 | break; | |
124 | } | |
125 | ||
126 | __mutex_unlock (&_hurd_dtable_lock); | |
127 | HURD_CRITICAL_END; | |
128 | ||
129 | if (i < nfds) | |
130 | { | |
131 | if (sigmask) | |
132 | __sigprocmask (SIG_SETMASK, &oset, NULL); | |
133 | errno = EBADF; | |
134 | return -1; | |
135 | } | |
136 | ||
137 | lastfd = i - 1; | |
138 | firstfd = i == 0 ? lastfd : 0; | |
139 | } | |
140 | else | |
141 | { | |
142 | /* Collect interested descriptors from the user's fd_set arguments. | |
143 | Use local copies so we can't crash from user bogosity. */ | |
144 | ||
145 | if (readfds == NULL) | |
146 | FD_ZERO (&rfds); | |
147 | else | |
148 | rfds = *readfds; | |
149 | if (writefds == NULL) | |
150 | FD_ZERO (&wfds); | |
151 | else | |
152 | wfds = *writefds; | |
153 | if (exceptfds == NULL) | |
154 | FD_ZERO (&xfds); | |
155 | else | |
156 | xfds = *exceptfds; | |
157 | ||
158 | HURD_CRITICAL_BEGIN; | |
159 | __mutex_lock (&_hurd_dtable_lock); | |
160 | ||
161 | if (nfds > _hurd_dtablesize) | |
162 | nfds = _hurd_dtablesize; | |
163 | ||
164 | /* Collect the ports for interesting FDs. */ | |
165 | firstfd = lastfd = -1; | |
166 | for (i = 0; i < nfds; ++i) | |
167 | { | |
168 | int type = 0; | |
169 | if (readfds != NULL && FD_ISSET (i, &rfds)) | |
170 | type |= SELECT_READ; | |
171 | if (writefds != NULL && FD_ISSET (i, &wfds)) | |
172 | type |= SELECT_WRITE; | |
173 | if (exceptfds != NULL && FD_ISSET (i, &xfds)) | |
174 | type |= SELECT_URG; | |
175 | d[i].type = type; | |
176 | if (type) | |
177 | { | |
178 | d[i].cell = _hurd_dtable[i]; | |
179 | d[i].io_port = _hurd_port_get (&d[i].cell->port, &d[i].ulink); | |
180 | if (d[i].io_port == MACH_PORT_NULL) | |
181 | { | |
182 | /* If one descriptor is bogus, we fail completely. */ | |
183 | while (i-- > 0) | |
c3aba1be RM |
184 | if (d[i].type != 0) |
185 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, | |
186 | d[i].io_port); | |
0d3eb016 RM |
187 | break; |
188 | } | |
189 | lastfd = i; | |
190 | if (firstfd == -1) | |
191 | firstfd = i; | |
192 | } | |
193 | } | |
194 | ||
195 | __mutex_unlock (&_hurd_dtable_lock); | |
196 | HURD_CRITICAL_END; | |
197 | ||
198 | if (i < nfds) | |
199 | { | |
200 | if (sigmask) | |
201 | __sigprocmask (SIG_SETMASK, &oset, NULL); | |
202 | errno = EBADF; | |
203 | return -1; | |
204 | } | |
205 | } | |
206 | ||
207 | ||
208 | err = 0; | |
209 | got = 0; | |
210 | ||
211 | /* Send them all io_select request messages. */ | |
212 | ||
213 | if (firstfd == -1) | |
214 | /* But not if there were no ports to deal with at all. | |
215 | We are just a pure timeout. */ | |
216 | portset = __mach_reply_port (); | |
217 | else | |
218 | { | |
219 | portset = MACH_PORT_NULL; | |
220 | ||
221 | for (i = firstfd; i <= lastfd; ++i) | |
222 | if (d[i].type) | |
223 | { | |
224 | int type = d[i].type; | |
225 | d[i].reply_port = __mach_reply_port (); | |
226 | err = __io_select (d[i].io_port, d[i].reply_port, | |
227 | /* Poll only if there's a single descriptor. */ | |
228 | (firstfd == lastfd) ? to : 0, | |
229 | &type); | |
230 | switch (err) | |
231 | { | |
232 | case MACH_RCV_TIMED_OUT: | |
233 | /* No immediate response. This is normal. */ | |
234 | err = 0; | |
235 | if (firstfd == lastfd) | |
236 | /* When there's a single descriptor, we don't need a | |
237 | portset, so just pretend we have one, but really | |
238 | use the single reply port. */ | |
239 | portset = d[i].reply_port; | |
240 | else if (got == 0) | |
241 | /* We've got multiple reply ports, so we need a port set to | |
242 | multiplex them. */ | |
243 | { | |
244 | /* We will wait again for a reply later. */ | |
245 | if (portset == MACH_PORT_NULL) | |
246 | /* Create the portset to receive all the replies on. */ | |
247 | err = __mach_port_allocate (__mach_task_self (), | |
248 | MACH_PORT_RIGHT_PORT_SET, | |
249 | &portset); | |
250 | if (! err) | |
251 | /* Put this reply port in the port set. */ | |
252 | __mach_port_move_member (__mach_task_self (), | |
253 | d[i].reply_port, portset); | |
254 | } | |
255 | break; | |
256 | ||
257 | default: | |
258 | /* No other error should happen. Callers of select | |
259 | don't expect to see errors, so we simulate | |
260 | readiness of the erring object and the next call | |
261 | hopefully will get the error again. */ | |
262 | type = SELECT_ALL; | |
263 | /* FALLTHROUGH */ | |
264 | ||
265 | case 0: | |
266 | /* We got an answer. */ | |
267 | if ((type & SELECT_ALL) == 0) | |
268 | /* Bogus answer; treat like an error, as a fake positive. */ | |
269 | type = SELECT_ALL; | |
270 | ||
271 | /* This port is already ready already. */ | |
272 | d[i].type &= type; | |
273 | d[i].type |= SELECT_RETURNED; | |
274 | ++got; | |
275 | break; | |
276 | } | |
277 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); | |
278 | } | |
279 | } | |
280 | ||
281 | /* Now wait for reply messages. */ | |
282 | if (!err && got == 0) | |
283 | { | |
284 | /* Now wait for io_select_reply messages on PORT, | |
285 | timing out as appropriate. */ | |
286 | ||
287 | union | |
288 | { | |
289 | mach_msg_header_t head; | |
f22a77e1 RM |
290 | #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE |
291 | struct | |
292 | { | |
293 | mach_msg_header_t head; | |
294 | NDR_record_t ndr; | |
295 | error_t err; | |
296 | } error; | |
297 | struct | |
298 | { | |
299 | mach_msg_header_t head; | |
300 | NDR_record_t ndr; | |
301 | error_t err; | |
302 | int result; | |
303 | mach_msg_trailer_t trailer; | |
304 | } success; | |
305 | #else | |
0d3eb016 RM |
306 | struct |
307 | { | |
308 | mach_msg_header_t head; | |
db6b51ad | 309 | union typeword err_type; |
0d3eb016 RM |
310 | error_t err; |
311 | } error; | |
312 | struct | |
313 | { | |
314 | mach_msg_header_t head; | |
db6b51ad | 315 | union typeword err_type; |
0d3eb016 | 316 | error_t err; |
db6b51ad | 317 | union typeword result_type; |
0d3eb016 RM |
318 | int result; |
319 | } success; | |
f22a77e1 | 320 | #endif |
0d3eb016 RM |
321 | } msg; |
322 | mach_msg_option_t options = (timeout == NULL ? 0 : MACH_RCV_TIMEOUT); | |
323 | error_t msgerr; | |
324 | while ((msgerr = __mach_msg (&msg.head, | |
325 | MACH_RCV_MSG | options, | |
326 | 0, sizeof msg, portset, to, | |
327 | MACH_PORT_NULL)) == MACH_MSG_SUCCESS) | |
328 | { | |
329 | /* We got a message. Decode it. */ | |
330 | #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ | |
f22a77e1 | 331 | #ifdef MACH_MSG_TYPE_BIT |
db6b51ad RM |
332 | const union typeword inttype = |
333 | { type: | |
334 | { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 } | |
335 | }; | |
f22a77e1 | 336 | #endif |
db6b51ad RM |
337 | if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && |
338 | msg.head.msgh_size >= sizeof msg.error && | |
339 | !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && | |
f22a77e1 | 340 | #ifdef MACH_MSG_TYPE_BIT |
db6b51ad | 341 | msg.error.err_type.word == inttype.word |
f22a77e1 RM |
342 | #endif |
343 | ) | |
0d3eb016 RM |
344 | { |
345 | /* This is a properly formatted message so far. | |
346 | See if it is a success or a failure. */ | |
347 | if (msg.error.err == EINTR && | |
348 | msg.head.msgh_size == sizeof msg.error) | |
349 | { | |
350 | /* EINTR response; poll for further responses | |
351 | and then return quickly. */ | |
352 | err = EINTR; | |
353 | goto poll; | |
354 | } | |
355 | if (msg.error.err || | |
356 | msg.head.msgh_size != sizeof msg.success || | |
f22a77e1 | 357 | #ifdef MACH_MSG_TYPE_BIT |
db6b51ad | 358 | msg.success.result_type.word != inttype.word || |
f22a77e1 | 359 | #endif |
0d3eb016 RM |
360 | (msg.success.result & SELECT_ALL) == 0) |
361 | { | |
362 | /* Error or bogus reply. Simulate readiness. */ | |
363 | __mach_msg_destroy (&msg.head); | |
364 | msg.success.result = SELECT_ALL; | |
365 | } | |
366 | ||
367 | /* Look up the respondent's reply port and record its | |
368 | readiness. */ | |
369 | { | |
370 | int had = got; | |
371 | if (firstfd != -1) | |
372 | for (i = firstfd; i <= lastfd; ++i) | |
373 | if (d[i].type | |
374 | && d[i].reply_port == msg.head.msgh_local_port) | |
375 | { | |
376 | d[i].type &= msg.success.result; | |
377 | d[i].type |= SELECT_RETURNED; | |
378 | ++got; | |
379 | } | |
380 | assert (got > had); | |
381 | } | |
382 | } | |
383 | ||
384 | if (msg.head.msgh_remote_port != MACH_PORT_NULL) | |
385 | __mach_port_deallocate (__mach_task_self (), | |
386 | msg.head.msgh_remote_port); | |
387 | ||
388 | if (got) | |
389 | poll: | |
390 | { | |
391 | /* Poll for another message. */ | |
392 | to = 0; | |
393 | options |= MACH_RCV_TIMEOUT; | |
394 | } | |
395 | } | |
396 | ||
397 | if (err == MACH_RCV_TIMED_OUT) | |
398 | /* This is the normal value for ERR. We might have timed out and | |
399 | read no messages. Otherwise, after receiving the first message, | |
400 | we poll for more messages. We receive with a timeout of 0 to | |
401 | effect a poll, so ERR is MACH_RCV_TIMED_OUT when the poll finds no | |
402 | message waiting. */ | |
403 | err = 0; | |
404 | ||
405 | if (got) | |
406 | /* At least one descriptor is known to be ready now, so we will | |
407 | return success. */ | |
408 | err = 0; | |
409 | } | |
410 | ||
411 | if (firstfd != -1) | |
412 | for (i = firstfd; i <= lastfd; ++i) | |
413 | if (d[i].type) | |
414 | __mach_port_destroy (__mach_task_self (), d[i].reply_port); | |
415 | if (firstfd == -1 || (firstfd != lastfd && portset != MACH_PORT_NULL)) | |
416 | /* Destroy PORTSET, but only if it's not actually the reply port for a | |
417 | single descriptor (in which case it's destroyed in the previous loop; | |
418 | not doing it here is just a bit more efficient). */ | |
419 | __mach_port_destroy (__mach_task_self (), portset); | |
420 | ||
421 | if (err) | |
422 | { | |
423 | if (sigmask) | |
424 | __sigprocmask (SIG_SETMASK, &oset, NULL); | |
425 | return __hurd_fail (err); | |
426 | } | |
427 | ||
428 | if (pollfds) | |
429 | /* Fill in the `revents' members of the user's array. */ | |
430 | for (i = 0; i < nfds; ++i) | |
431 | { | |
80081a0a | 432 | int type = d[i].type; |
0d3eb016 RM |
433 | int_fast16_t revents = 0; |
434 | ||
80081a0a RM |
435 | if (type & SELECT_RETURNED) |
436 | { | |
437 | if (type & SELECT_READ) | |
438 | revents |= POLLIN; | |
439 | if (type & SELECT_WRITE) | |
440 | revents |= POLLOUT; | |
441 | if (type & SELECT_URG) | |
442 | revents |= POLLPRI; | |
443 | } | |
0d3eb016 RM |
444 | |
445 | pollfds[i].revents = revents; | |
446 | } | |
447 | else | |
448 | { | |
449 | /* Below we recalculate GOT to include an increment for each operation | |
450 | allowed on each fd. */ | |
451 | got = 0; | |
452 | ||
453 | /* Set the user bitarrays. We only ever have to clear bits, as all | |
454 | desired ones are initially set. */ | |
455 | if (firstfd != -1) | |
456 | for (i = firstfd; i <= lastfd; ++i) | |
457 | { | |
458 | int type = d[i].type; | |
459 | ||
460 | if ((type & SELECT_RETURNED) == 0) | |
461 | type = 0; | |
462 | ||
463 | if (type & SELECT_READ) | |
464 | got++; | |
465 | else if (readfds) | |
466 | FD_CLR (i, readfds); | |
467 | if (type & SELECT_WRITE) | |
468 | got++; | |
469 | else if (writefds) | |
470 | FD_CLR (i, writefds); | |
471 | if (type & SELECT_URG) | |
472 | got++; | |
473 | else if (exceptfds) | |
474 | FD_CLR (i, exceptfds); | |
475 | } | |
476 | } | |
477 | ||
478 | if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL)) | |
479 | return -1; | |
480 | ||
481 | return got; | |
482 | } |