]> git.ipfire.org Git - thirdparty/glibc.git/blob - hurd/hurdselect.c
hurd: Fix pselect atomicity
[thirdparty/glibc.git] / hurd / hurdselect.c
1 /* Guts of both `select' and `poll' for Hurd.
2 Copyright (C) 1991-2020 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
18
19 #include <sys/time.h>
20 #include <sys/types.h>
21 #include <sys/poll.h>
22 #include <hurd.h>
23 #include <hurd/fd.h>
24 #include <hurd/io_request.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <assert.h>
28 #include <stdint.h>
29 #include <limits.h>
30 #include <time.h>
31
32 /* All user select types. */
33 #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG)
34
35 /* Used to record that a particular select rpc returned. Must be distinct
36 from SELECT_ALL (which better not have the high bit set). */
37 #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL)
38 #define SELECT_ERROR (SELECT_RETURNED << 1)
39
40 /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in
41 each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not
42 NULL, time out after waiting the interval specified therein. Returns
43 the number of ready descriptors, or -1 for errors. */
44 int
45 _hurd_select (int nfds,
46 struct pollfd *pollfds,
47 fd_set *readfds, fd_set *writefds, fd_set *exceptfds,
48 const struct timespec *timeout, const sigset_t *sigmask)
49 {
50 int i;
51 mach_port_t portset, sigport;
52 int got, ready;
53 error_t err;
54 fd_set rfds, wfds, xfds;
55 int firstfd, lastfd;
56 mach_msg_id_t reply_msgid;
57 mach_msg_timeout_t to;
58 struct timespec ts;
59 struct
60 {
61 struct hurd_userlink ulink;
62 struct hurd_fd *cell;
63 mach_port_t io_port;
64 int type;
65 mach_port_t reply_port;
66 int error;
67 } d[nfds];
68 sigset_t oset;
69 struct hurd_sigstate *ss;
70
71 union typeword /* Use this to avoid unkosher casts. */
72 {
73 mach_msg_type_t type;
74 uint32_t word;
75 };
76 assert (sizeof (union typeword) == sizeof (mach_msg_type_t));
77 assert (sizeof (uint32_t) == sizeof (mach_msg_type_t));
78
79 if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE))
80 {
81 errno = EINVAL;
82 return -1;
83 }
84
85 #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */
86 #define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */
87
88 if (timeout == NULL)
89 reply_msgid = IO_SELECT_REPLY_MSGID;
90 else
91 {
92 struct timespec now;
93
94 if (timeout->tv_sec < 0 || ! valid_nanoseconds (timeout->tv_nsec))
95 {
96 errno = EINVAL;
97 return -1;
98 }
99
100 err = __clock_gettime (CLOCK_REALTIME, &now);
101 if (err)
102 return -1;
103
104 ts.tv_sec = now.tv_sec + timeout->tv_sec;
105 ts.tv_nsec = now.tv_nsec + timeout->tv_nsec;
106
107 if (ts.tv_nsec >= 1000000000)
108 {
109 ts.tv_sec++;
110 ts.tv_nsec -= 1000000000;
111 }
112
113 if (ts.tv_sec < 0)
114 ts.tv_sec = LONG_MAX; /* XXX */
115
116 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
117 }
118
119 if (sigmask)
120 {
121 /* Add a port to the portset for the case when we get the signal even
122 before calling __mach_msg. */
123
124 sigport = __mach_reply_port ();
125
126 ss = _hurd_self_sigstate ();
127 _hurd_sigstate_lock (ss);
128 /* And tell the signal thread to message us when a signal arrives. */
129 ss->suspended = sigport;
130 _hurd_sigstate_unlock (ss);
131
132 if (__sigprocmask (SIG_SETMASK, sigmask, &oset))
133 {
134 _hurd_sigstate_lock (ss);
135 ss->suspended = MACH_PORT_NULL;
136 _hurd_sigstate_unlock (ss);
137 __mach_port_destroy (__mach_task_self (), sigport);
138 return -1;
139 }
140 }
141 else
142 sigport = MACH_PORT_NULL;
143
144 if (pollfds)
145 {
146 int error = 0;
147 /* Collect interesting descriptors from the user's `pollfd' array.
148 We do a first pass that reads the user's array before taking
149 any locks. The second pass then only touches our own stack,
150 and gets the port references. */
151
152 for (i = 0; i < nfds; ++i)
153 if (pollfds[i].fd >= 0)
154 {
155 int type = 0;
156 if (pollfds[i].events & POLLIN)
157 type |= SELECT_READ;
158 if (pollfds[i].events & POLLOUT)
159 type |= SELECT_WRITE;
160 if (pollfds[i].events & POLLPRI)
161 type |= SELECT_URG;
162
163 d[i].io_port = pollfds[i].fd;
164 d[i].type = type;
165 }
166 else
167 d[i].type = 0;
168
169 HURD_CRITICAL_BEGIN;
170 __mutex_lock (&_hurd_dtable_lock);
171
172 for (i = 0; i < nfds; ++i)
173 if (d[i].type != 0)
174 {
175 const int fd = (int) d[i].io_port;
176
177 if (fd < _hurd_dtablesize)
178 {
179 d[i].cell = _hurd_dtable[fd];
180 if (d[i].cell != NULL)
181 {
182 d[i].io_port = _hurd_port_get (&d[i].cell->port,
183 &d[i].ulink);
184 if (d[i].io_port != MACH_PORT_NULL)
185 continue;
186 }
187 }
188
189 /* Bogus descriptor, make it EBADF already. */
190 d[i].error = EBADF;
191 d[i].type = SELECT_ERROR;
192 error = 1;
193 }
194
195 __mutex_unlock (&_hurd_dtable_lock);
196 HURD_CRITICAL_END;
197
198 if (error)
199 {
200 /* Set timeout to 0. */
201 err = __clock_gettime (CLOCK_REALTIME, &ts);
202 if (err)
203 {
204 /* Really bad luck. */
205 err = errno;
206 HURD_CRITICAL_BEGIN;
207 __mutex_lock (&_hurd_dtable_lock);
208 while (i-- > 0)
209 if (d[i].type & ~SELECT_ERROR != 0)
210 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
211 d[i].io_port);
212 __mutex_unlock (&_hurd_dtable_lock);
213 HURD_CRITICAL_END;
214 if (sigmask)
215 __sigprocmask (SIG_SETMASK, &oset, NULL);
216 errno = err;
217 return -1;
218 }
219 reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID;
220 }
221
222 lastfd = i - 1;
223 firstfd = i == 0 ? lastfd : 0;
224 }
225 else
226 {
227 /* Collect interested descriptors from the user's fd_set arguments.
228 Use local copies so we can't crash from user bogosity. */
229
230 if (readfds == NULL)
231 FD_ZERO (&rfds);
232 else
233 rfds = *readfds;
234 if (writefds == NULL)
235 FD_ZERO (&wfds);
236 else
237 wfds = *writefds;
238 if (exceptfds == NULL)
239 FD_ZERO (&xfds);
240 else
241 xfds = *exceptfds;
242
243 HURD_CRITICAL_BEGIN;
244 __mutex_lock (&_hurd_dtable_lock);
245
246 /* Collect the ports for interesting FDs. */
247 firstfd = lastfd = -1;
248 for (i = 0; i < nfds; ++i)
249 {
250 int type = 0;
251 if (readfds != NULL && FD_ISSET (i, &rfds))
252 type |= SELECT_READ;
253 if (writefds != NULL && FD_ISSET (i, &wfds))
254 type |= SELECT_WRITE;
255 if (exceptfds != NULL && FD_ISSET (i, &xfds))
256 type |= SELECT_URG;
257 d[i].type = type;
258 if (type)
259 {
260 if (i < _hurd_dtablesize)
261 {
262 d[i].cell = _hurd_dtable[i];
263 if (d[i].cell != NULL)
264 d[i].io_port = _hurd_port_get (&d[i].cell->port,
265 &d[i].ulink);
266 }
267 if (i >= _hurd_dtablesize || d[i].cell == NULL ||
268 d[i].io_port == MACH_PORT_NULL)
269 {
270 /* If one descriptor is bogus, we fail completely. */
271 while (i-- > 0)
272 if (d[i].type != 0)
273 _hurd_port_free (&d[i].cell->port, &d[i].ulink,
274 d[i].io_port);
275 break;
276 }
277 lastfd = i;
278 if (firstfd == -1)
279 firstfd = i;
280 }
281 }
282
283 __mutex_unlock (&_hurd_dtable_lock);
284 HURD_CRITICAL_END;
285
286 if (i < nfds)
287 {
288 if (sigmask)
289 __sigprocmask (SIG_SETMASK, &oset, NULL);
290 errno = EBADF;
291 return -1;
292 }
293
294 if (nfds > _hurd_dtablesize)
295 nfds = _hurd_dtablesize;
296 }
297
298
299 err = 0;
300 got = 0;
301
302 /* Send them all io_select request messages. */
303
304 if (firstfd == -1)
305 {
306 if (sigport == MACH_PORT_NULL)
307 /* But not if there were no ports to deal with at all.
308 We are just a pure timeout. */
309 portset = __mach_reply_port ();
310 else
311 portset = sigport;
312 }
313 else
314 {
315 portset = MACH_PORT_NULL;
316
317 for (i = firstfd; i <= lastfd; ++i)
318 if (!(d[i].type & ~SELECT_ERROR))
319 d[i].reply_port = MACH_PORT_NULL;
320 else
321 {
322 int type = d[i].type;
323 d[i].reply_port = __mach_reply_port ();
324 if (timeout == NULL)
325 err = __io_select_request (d[i].io_port, d[i].reply_port, type);
326 else
327 err = __io_select_timeout_request (d[i].io_port, d[i].reply_port,
328 ts, type);
329 if (!err)
330 {
331 if (firstfd == lastfd && sigport == MACH_PORT_NULL)
332 /* When there's a single descriptor, we don't need a
333 portset, so just pretend we have one, but really
334 use the single reply port. */
335 portset = d[i].reply_port;
336 else if (got == 0)
337 /* We've got multiple reply ports, so we need a port set to
338 multiplex them. */
339 {
340 /* We will wait again for a reply later. */
341 if (portset == MACH_PORT_NULL)
342 /* Create the portset to receive all the replies on. */
343 err = __mach_port_allocate (__mach_task_self (),
344 MACH_PORT_RIGHT_PORT_SET,
345 &portset);
346 if (! err)
347 /* Put this reply port in the port set. */
348 __mach_port_move_member (__mach_task_self (),
349 d[i].reply_port, portset);
350 }
351 }
352 else
353 {
354 /* No error should happen, but record it for later
355 processing. */
356 d[i].error = err;
357 d[i].type |= SELECT_ERROR;
358 ++got;
359 }
360 _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port);
361 }
362
363 if (got == 0 && sigport != MACH_PORT_NULL)
364 {
365 if (portset == MACH_PORT_NULL)
366 /* Create the portset to receive the signal message on. */
367 __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET,
368 &portset);
369 /* Put the signal reply port in the port set. */
370 __mach_port_move_member (__mach_task_self (), sigport, portset);
371 }
372 }
373
374 /* GOT is the number of replies (or errors), while READY is the number of
375 replies with at least one type bit set. */
376 ready = 0;
377
378 /* Now wait for reply messages. */
379 if (!err && got == 0)
380 {
381 /* Now wait for io_select_reply messages on PORT,
382 timing out as appropriate. */
383
384 union
385 {
386 mach_msg_header_t head;
387 #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE
388 struct
389 {
390 mach_msg_header_t head;
391 NDR_record_t ndr;
392 error_t err;
393 } error;
394 struct
395 {
396 mach_msg_header_t head;
397 NDR_record_t ndr;
398 error_t err;
399 int result;
400 mach_msg_trailer_t trailer;
401 } success;
402 #else
403 struct
404 {
405 mach_msg_header_t head;
406 union typeword err_type;
407 error_t err;
408 } error;
409 struct
410 {
411 mach_msg_header_t head;
412 union typeword err_type;
413 error_t err;
414 union typeword result_type;
415 int result;
416 } success;
417 #endif
418 } msg;
419 mach_msg_option_t options;
420 error_t msgerr;
421
422 /* We rely on servers to implement the timeout, but when there are none,
423 do it on the client side. */
424 if (timeout != NULL && firstfd == -1)
425 {
426 options = MACH_RCV_TIMEOUT;
427 to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000;
428 }
429 else
430 {
431 options = 0;
432 to = MACH_MSG_TIMEOUT_NONE;
433 }
434
435 while ((msgerr = __mach_msg (&msg.head,
436 MACH_RCV_MSG | MACH_RCV_INTERRUPT | options,
437 0, sizeof msg, portset, to,
438 MACH_PORT_NULL)) == MACH_MSG_SUCCESS)
439 {
440 /* We got a message. Decode it. */
441 #ifdef MACH_MSG_TYPE_BIT
442 const union typeword inttype =
443 { type:
444 { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 }
445 };
446 #endif
447
448 if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port)
449 {
450 /* We actually got interrupted by a signal before
451 __mach_msg; poll for further responses and then
452 return quickly. */
453 err = EINTR;
454 goto poll;
455 }
456
457 if (msg.head.msgh_id == reply_msgid
458 && msg.head.msgh_size >= sizeof msg.error
459 && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX)
460 #ifdef MACH_MSG_TYPE_BIT
461 && msg.error.err_type.word == inttype.word
462 #endif
463 )
464 {
465 /* This is a properly formatted message so far.
466 See if it is a success or a failure. */
467 if (msg.error.err == EINTR
468 && msg.head.msgh_size == sizeof msg.error)
469 {
470 /* EINTR response; poll for further responses
471 and then return quickly. */
472 err = EINTR;
473 goto poll;
474 }
475 /* Keep in mind msg.success.result can be 0 if a timeout
476 occurred. */
477 if (msg.error.err
478 #ifdef MACH_MSG_TYPE_BIT
479 || msg.success.result_type.word != inttype.word
480 #endif
481 || msg.head.msgh_size != sizeof msg.success)
482 {
483 /* Error or bogus reply. */
484 if (!msg.error.err)
485 msg.error.err = EIO;
486 __mach_msg_destroy (&msg.head);
487 }
488
489 /* Look up the respondent's reply port and record its
490 readiness. */
491 {
492 int had = got;
493 if (firstfd != -1)
494 for (i = firstfd; i <= lastfd; ++i)
495 if (d[i].type
496 && d[i].reply_port == msg.head.msgh_local_port)
497 {
498 if (msg.error.err)
499 {
500 d[i].error = msg.error.err;
501 d[i].type = SELECT_ERROR;
502 ++ready;
503 }
504 else
505 {
506 d[i].type &= msg.success.result;
507 if (d[i].type)
508 ++ready;
509 }
510
511 d[i].type |= SELECT_RETURNED;
512 ++got;
513 }
514 assert (got > had);
515 }
516 }
517
518 if (msg.head.msgh_remote_port != MACH_PORT_NULL)
519 __mach_port_deallocate (__mach_task_self (),
520 msg.head.msgh_remote_port);
521
522 if (got)
523 poll:
524 {
525 /* Poll for another message. */
526 to = 0;
527 options |= MACH_RCV_TIMEOUT;
528 }
529 }
530
531 if (msgerr == MACH_RCV_INTERRUPTED)
532 /* Interruption on our side (e.g. signal reception). */
533 err = EINTR;
534
535 if (ready)
536 /* At least one descriptor is known to be ready now, so we will
537 return success. */
538 err = 0;
539 }
540
541 if (firstfd != -1)
542 for (i = firstfd; i <= lastfd; ++i)
543 if (d[i].reply_port != MACH_PORT_NULL)
544 __mach_port_destroy (__mach_task_self (), d[i].reply_port);
545
546 if (sigport != MACH_PORT_NULL)
547 {
548 _hurd_sigstate_lock (ss);
549 ss->suspended = MACH_PORT_NULL;
550 _hurd_sigstate_unlock (ss);
551 __mach_port_destroy (__mach_task_self (), sigport);
552 }
553
554 if ((firstfd == -1 && sigport == MACH_PORT_NULL)
555 || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL))
556 /* Destroy PORTSET, but only if it's not actually the reply port for a
557 single descriptor (in which case it's destroyed in the previous loop;
558 not doing it here is just a bit more efficient). */
559 __mach_port_destroy (__mach_task_self (), portset);
560
561 if (err)
562 {
563 if (sigmask)
564 __sigprocmask (SIG_SETMASK, &oset, NULL);
565 return __hurd_fail (err);
566 }
567
568 if (pollfds)
569 /* Fill in the `revents' members of the user's array. */
570 for (i = 0; i < nfds; ++i)
571 {
572 int type = d[i].type;
573 int_fast16_t revents = 0;
574
575 if (type & SELECT_ERROR)
576 switch (d[i].error)
577 {
578 case EPIPE:
579 revents = POLLHUP;
580 break;
581 case EBADF:
582 revents = POLLNVAL;
583 break;
584 default:
585 revents = POLLERR;
586 break;
587 }
588 else
589 if (type & SELECT_RETURNED)
590 {
591 if (type & SELECT_READ)
592 revents |= POLLIN;
593 if (type & SELECT_WRITE)
594 revents |= POLLOUT;
595 if (type & SELECT_URG)
596 revents |= POLLPRI;
597 }
598
599 pollfds[i].revents = revents;
600 }
601 else
602 {
603 /* Below we recalculate READY to include an increment for each operation
604 allowed on each fd. */
605 ready = 0;
606
607 /* Set the user bitarrays. We only ever have to clear bits, as all
608 desired ones are initially set. */
609 if (firstfd != -1)
610 for (i = firstfd; i <= lastfd; ++i)
611 {
612 int type = d[i].type;
613
614 if ((type & SELECT_RETURNED) == 0)
615 type = 0;
616
617 /* Callers of select don't expect to see errors, so we simulate
618 readiness of the erring object and the next call hopefully
619 will get the error again. */
620 if (type & SELECT_ERROR)
621 {
622 type = 0;
623 if (readfds != NULL && FD_ISSET (i, readfds))
624 type |= SELECT_READ;
625 if (writefds != NULL && FD_ISSET (i, writefds))
626 type |= SELECT_WRITE;
627 if (exceptfds != NULL && FD_ISSET (i, exceptfds))
628 type |= SELECT_URG;
629 }
630
631 if (type & SELECT_READ)
632 ready++;
633 else if (readfds)
634 FD_CLR (i, readfds);
635 if (type & SELECT_WRITE)
636 ready++;
637 else if (writefds)
638 FD_CLR (i, writefds);
639 if (type & SELECT_URG)
640 ready++;
641 else if (exceptfds)
642 FD_CLR (i, exceptfds);
643 }
644 }
645
646 if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL))
647 return -1;
648
649 return ready;
650 }