]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
Try harder to re-exec nscd in paranoia mode.
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #ifdef HAVE_INOTIFY
39 # include <sys/inotify.h>
40 #endif
41 #include <sys/mman.h>
42 #include <sys/param.h>
43 #include <sys/poll.h>
44 #ifdef HAVE_SENDFILE
45 # include <sys/sendfile.h>
46 #endif
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/un.h>
50
51 #include "nscd.h"
52 #include "dbg_log.h"
53 #include "selinux.h"
54 #include <resolv/resolv.h>
55 #ifdef HAVE_SENDFILE
56 # include <kernel-features.h>
57 #endif
58
59
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n);
62 extern void *xcalloc (size_t n, size_t s);
63 extern void *xrealloc (void *o, size_t n);
64
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
76
77 static pthread_attr_t attr;
78
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
81
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
84 {
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV"
104 };
105
106 /* The control data structures for the services. */
107 struct database_dyn dbs[lastdb] =
108 {
109 [pwddb] = {
110 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
111 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
112 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
113 .enabled = 0,
114 .check_file = 1,
115 .persistent = 0,
116 .propagate = 1,
117 .shared = 0,
118 .max_db_size = DEFAULT_MAX_DB_SIZE,
119 .suggested_module = DEFAULT_SUGGESTED_MODULE,
120 .reset_res = 0,
121 .filename = "/etc/passwd",
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
124 .postimeout = 3600,
125 .negtimeout = 20,
126 .wr_fd = -1,
127 .ro_fd = -1,
128 .mmap_used = false
129 },
130 [grpdb] = {
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
134 .enabled = 0,
135 .check_file = 1,
136 .persistent = 0,
137 .propagate = 1,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .reset_res = 0,
142 .filename = "/etc/group",
143 .db_filename = _PATH_NSCD_GROUP_DB,
144 .disabled_iov = &grp_iov_disabled,
145 .postimeout = 3600,
146 .negtimeout = 60,
147 .wr_fd = -1,
148 .ro_fd = -1,
149 .mmap_used = false
150 },
151 [hstdb] = {
152 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
153 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
154 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
155 .enabled = 0,
156 .check_file = 1,
157 .persistent = 0,
158 .propagate = 0, /* Not used. */
159 .shared = 0,
160 .max_db_size = DEFAULT_MAX_DB_SIZE,
161 .suggested_module = DEFAULT_SUGGESTED_MODULE,
162 .reset_res = 1,
163 .filename = "/etc/hosts",
164 .db_filename = _PATH_NSCD_HOSTS_DB,
165 .disabled_iov = &hst_iov_disabled,
166 .postimeout = 3600,
167 .negtimeout = 20,
168 .wr_fd = -1,
169 .ro_fd = -1,
170 .mmap_used = false
171 },
172 [servdb] = {
173 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
174 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
175 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
176 .enabled = 0,
177 .check_file = 1,
178 .persistent = 0,
179 .propagate = 0, /* Not used. */
180 .shared = 0,
181 .max_db_size = DEFAULT_MAX_DB_SIZE,
182 .suggested_module = DEFAULT_SUGGESTED_MODULE,
183 .reset_res = 0,
184 .filename = "/etc/services",
185 .db_filename = _PATH_NSCD_SERVICES_DB,
186 .disabled_iov = &serv_iov_disabled,
187 .postimeout = 28800,
188 .negtimeout = 20,
189 .wr_fd = -1,
190 .ro_fd = -1,
191 .mmap_used = false
192 }
193 };
194
195
196 /* Mapping of request type to database. */
197 static struct
198 {
199 bool data_request;
200 struct database_dyn *db;
201 } const reqinfo[LASTREQ] =
202 {
203 [GETPWBYNAME] = { true, &dbs[pwddb] },
204 [GETPWBYUID] = { true, &dbs[pwddb] },
205 [GETGRBYNAME] = { true, &dbs[grpdb] },
206 [GETGRBYGID] = { true, &dbs[grpdb] },
207 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
208 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
209 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
210 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
211 [SHUTDOWN] = { false, NULL },
212 [GETSTAT] = { false, NULL },
213 [SHUTDOWN] = { false, NULL },
214 [GETFDPW] = { false, &dbs[pwddb] },
215 [GETFDGR] = { false, &dbs[grpdb] },
216 [GETFDHST] = { false, &dbs[hstdb] },
217 [GETAI] = { true, &dbs[hstdb] },
218 [INITGROUPS] = { true, &dbs[grpdb] },
219 [GETSERVBYNAME] = { true, &dbs[servdb] },
220 [GETSERVBYPORT] = { true, &dbs[servdb] },
221 [GETFDSERV] = { false, &dbs[servdb] }
222 };
223
224
225 /* Initial number of threads to use. */
226 int nthreads = -1;
227 /* Maximum number of threads to use. */
228 int max_nthreads = 32;
229
230 /* Socket for incoming connections. */
231 static int sock;
232
233 #ifdef HAVE_INOTIFY
234 /* Inotify descriptor. */
235 static int inotify_fd = -1;
236
237 /* Watch descriptor for resolver configuration file. */
238 static int resolv_conf_descr = -1;
239 #endif
240
241 #ifndef __ASSUME_SOCK_CLOEXEC
242 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
243 before be know the result. */
244 static int have_sock_cloexec;
245 #endif
246 #ifndef __ASSUME_ACCEPT4
247 static int have_accept4;
248 #endif
249
250 /* Number of times clients had to wait. */
251 unsigned long int client_queued;
252
253
254 ssize_t
255 writeall (int fd, const void *buf, size_t len)
256 {
257 size_t n = len;
258 ssize_t ret;
259 do
260 {
261 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
262 if (ret <= 0)
263 break;
264 buf = (const char *) buf + ret;
265 n -= ret;
266 }
267 while (n > 0);
268 return ret < 0 ? ret : len - n;
269 }
270
271
272 #ifdef HAVE_SENDFILE
273 ssize_t
274 sendfileall (int tofd, int fromfd, off_t off, size_t len)
275 {
276 ssize_t n = len;
277 ssize_t ret;
278
279 do
280 {
281 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
282 if (ret <= 0)
283 break;
284 n -= ret;
285 }
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
288 }
289 #endif
290
291
292 enum usekey
293 {
294 use_not = 0,
295 /* The following three are not really used, they are symbolic constants. */
296 use_first = 16,
297 use_begin = 32,
298 use_end = 64,
299
300 use_he = 1,
301 use_he_begin = use_he | use_begin,
302 use_he_end = use_he | use_end,
303 #if SEPARATE_KEY
304 use_key = 2,
305 use_key_begin = use_key | use_begin,
306 use_key_end = use_key | use_end,
307 use_key_first = use_key_begin | use_first,
308 #endif
309 use_data = 3,
310 use_data_begin = use_data | use_begin,
311 use_data_end = use_data | use_end,
312 use_data_first = use_data_begin | use_first
313 };
314
315
316 static int
317 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
318 enum usekey use, ref_t start, size_t len)
319 {
320 assert (len >= 2);
321
322 if (start > first_free || start + len > first_free
323 || (start & BLOCK_ALIGN_M1))
324 return 0;
325
326 if (usemap[start] == use_not)
327 {
328 /* Add the start marker. */
329 usemap[start] = use | use_begin;
330 use &= ~use_first;
331
332 while (--len > 0)
333 if (usemap[++start] != use_not)
334 return 0;
335 else
336 usemap[start] = use;
337
338 /* Add the end marker. */
339 usemap[start] = use | use_end;
340 }
341 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
342 {
343 /* Hash entries can't be shared. */
344 if (use == use_he)
345 return 0;
346
347 usemap[start] |= (use & use_first);
348 use &= ~use_first;
349
350 while (--len > 1)
351 if (usemap[++start] != use)
352 return 0;
353
354 if (usemap[++start] != (use | use_end))
355 return 0;
356 }
357 else
358 /* Points to a wrong object or somewhere in the middle. */
359 return 0;
360
361 return 1;
362 }
363
364
365 /* Verify data in persistent database. */
366 static int
367 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
368 {
369 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
370
371 time_t now = time (NULL);
372
373 struct database_pers_head *head = mem;
374 struct database_pers_head head_copy = *head;
375
376 /* Check that the header that was read matches the head in the database. */
377 if (memcmp (head, readhead, sizeof (*head)) != 0)
378 return 0;
379
380 /* First some easy tests: make sure the database header is sane. */
381 if (head->version != DB_VERSION
382 || head->header_size != sizeof (*head)
383 /* We allow a timestamp to be one hour ahead of the current time.
384 This should cover daylight saving time changes. */
385 || head->timestamp > now + 60 * 60 + 60
386 || (head->gc_cycle & 1)
387 || head->module == 0
388 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
389 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
390 || head->first_free < 0
391 || head->first_free > head->data_size
392 || (head->first_free & BLOCK_ALIGN_M1) != 0
393 || head->maxnentries < 0
394 || head->maxnsearched < 0)
395 return 0;
396
397 uint8_t *usemap = calloc (head->first_free, 1);
398 if (usemap == NULL)
399 return 0;
400
401 const char *data = (char *) &head->array[roundup (head->module,
402 ALIGN / sizeof (ref_t))];
403
404 nscd_ssize_t he_cnt = 0;
405 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
406 {
407 ref_t trail = head->array[cnt];
408 ref_t work = trail;
409 int tick = 0;
410
411 while (work != ENDREF)
412 {
413 if (! check_use (data, head->first_free, usemap, use_he, work,
414 sizeof (struct hashentry)))
415 goto fail;
416
417 /* Now we know we can dereference the record. */
418 struct hashentry *here = (struct hashentry *) (data + work);
419
420 ++he_cnt;
421
422 /* Make sure the record is for this type of service. */
423 if (here->type >= LASTREQ
424 || reqinfo[here->type].db != &dbs[dbnr])
425 goto fail;
426
427 /* Validate boolean field value. */
428 if (here->first != false && here->first != true)
429 goto fail;
430
431 if (here->len < 0)
432 goto fail;
433
434 /* Now the data. */
435 if (here->packet < 0
436 || here->packet > head->first_free
437 || here->packet + sizeof (struct datahead) > head->first_free)
438 goto fail;
439
440 struct datahead *dh = (struct datahead *) (data + here->packet);
441
442 if (! check_use (data, head->first_free, usemap,
443 use_data | (here->first ? use_first : 0),
444 here->packet, dh->allocsize))
445 goto fail;
446
447 if (dh->allocsize < sizeof (struct datahead)
448 || dh->recsize > dh->allocsize
449 || (dh->notfound != false && dh->notfound != true)
450 || (dh->usable != false && dh->usable != true))
451 goto fail;
452
453 if (here->key < here->packet + sizeof (struct datahead)
454 || here->key > here->packet + dh->allocsize
455 || here->key + here->len > here->packet + dh->allocsize)
456 {
457 #if SEPARATE_KEY
458 /* If keys can appear outside of data, this should be done
459 instead. But gc doesn't mark the data in that case. */
460 if (! check_use (data, head->first_free, usemap,
461 use_key | (here->first ? use_first : 0),
462 here->key, here->len))
463 #endif
464 goto fail;
465 }
466
467 work = here->next;
468
469 if (work == trail)
470 /* A circular list, this must not happen. */
471 goto fail;
472 if (tick)
473 trail = ((struct hashentry *) (data + trail))->next;
474 tick = 1 - tick;
475 }
476 }
477
478 if (he_cnt != head->nentries)
479 goto fail;
480
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx = 0; idx < head->first_free; ++idx)
484 {
485 #if SEPARATE_KEY
486 if (usemap[idx] == use_key_begin)
487 goto fail;
488 #endif
489 if (usemap[idx] == use_data_begin)
490 goto fail;
491 }
492
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
495 goto fail;
496
497 free (usemap);
498 return 1;
499
500 fail:
501 free (usemap);
502 return 0;
503 }
504
505
506 #ifdef O_CLOEXEC
507 # define EXTRA_O_FLAGS O_CLOEXEC
508 #else
509 # define EXTRA_O_FLAGS 0
510 #endif
511
512
513 /* Initialize database information structures. */
514 void
515 nscd_init (void)
516 {
517 /* Look up unprivileged uid/gid/groups before we start listening on the
518 socket */
519 if (server_user != NULL)
520 begin_drop_privileges ();
521
522 if (nthreads == -1)
523 /* No configuration for this value, assume a default. */
524 nthreads = 4;
525
526 #ifdef HAVE_INOTIFY
527 /* Use inotify to recognize changed files. */
528 inotify_fd = inotify_init1 (IN_NONBLOCK);
529 # ifndef __ASSUME_IN_NONBLOCK
530 if (inotify_fd == -1 && errno == ENOSYS)
531 {
532 inotify_fd = inotify_init ();
533 if (inotify_fd != -1)
534 fcntl (inotify_fd, F_SETFL, O_RDONLY | O_NONBLOCK);
535 }
536 # endif
537 #endif
538
539 for (size_t cnt = 0; cnt < lastdb; ++cnt)
540 if (dbs[cnt].enabled)
541 {
542 pthread_rwlock_init (&dbs[cnt].lock, NULL);
543 pthread_mutex_init (&dbs[cnt].memlock, NULL);
544
545 if (dbs[cnt].persistent)
546 {
547 /* Try to open the appropriate file on disk. */
548 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
549 if (fd != -1)
550 {
551 char *msg = NULL;
552 struct stat64 st;
553 void *mem;
554 size_t total;
555 struct database_pers_head head;
556 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
557 sizeof (head)));
558 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
559 {
560 fail_db_errno:
561 /* The code is single-threaded at this point so
562 using strerror is just fine. */
563 msg = strerror (errno);
564 fail_db:
565 dbg_log (_("invalid persistent database file \"%s\": %s"),
566 dbs[cnt].db_filename, msg);
567 unlink (dbs[cnt].db_filename);
568 }
569 else if (head.module == 0 && head.data_size == 0)
570 {
571 /* The file has been created, but the head has not
572 been initialized yet. */
573 msg = _("uninitialized header");
574 goto fail_db;
575 }
576 else if (head.header_size != (int) sizeof (head))
577 {
578 msg = _("header size does not match");
579 goto fail_db;
580 }
581 else if ((total = (sizeof (head)
582 + roundup (head.module * sizeof (ref_t),
583 ALIGN)
584 + head.data_size))
585 > st.st_size
586 || total < sizeof (head))
587 {
588 msg = _("file size does not match");
589 goto fail_db;
590 }
591 /* Note we map with the maximum size allowed for the
592 database. This is likely much larger than the
593 actual file size. This is OK on most OSes since
594 extensions of the underlying file will
595 automatically translate more pages available for
596 memory access. */
597 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
598 PROT_READ | PROT_WRITE,
599 MAP_SHARED, fd, 0))
600 == MAP_FAILED)
601 goto fail_db_errno;
602 else if (!verify_persistent_db (mem, &head, cnt))
603 {
604 munmap (mem, total);
605 msg = _("verification failed");
606 goto fail_db;
607 }
608 else
609 {
610 /* Success. We have the database. */
611 dbs[cnt].head = mem;
612 dbs[cnt].memsize = total;
613 dbs[cnt].data = (char *)
614 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
615 ALIGN / sizeof (ref_t))];
616 dbs[cnt].mmap_used = true;
617
618 if (dbs[cnt].suggested_module > head.module)
619 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
620 dbnames[cnt]);
621
622 dbs[cnt].wr_fd = fd;
623 fd = -1;
624 /* We also need a read-only descriptor. */
625 if (dbs[cnt].shared)
626 {
627 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
628 O_RDONLY | EXTRA_O_FLAGS);
629 if (dbs[cnt].ro_fd == -1)
630 dbg_log (_("\
631 cannot create read-only descriptor for \"%s\"; no mmap"),
632 dbs[cnt].db_filename);
633 }
634
635 // XXX Shall we test whether the descriptors actually
636 // XXX point to the same file?
637 }
638
639 /* Close the file descriptors in case something went
640 wrong in which case the variable have not been
641 assigned -1. */
642 if (fd != -1)
643 close (fd);
644 }
645 else if (errno == EACCES)
646 error (EXIT_FAILURE, 0, _("cannot access '%s'"),
647 dbs[cnt].db_filename);
648 }
649
650 if (dbs[cnt].head == NULL)
651 {
652 /* No database loaded. Allocate the data structure,
653 possibly on disk. */
654 struct database_pers_head head;
655 size_t total = (sizeof (head)
656 + roundup (dbs[cnt].suggested_module
657 * sizeof (ref_t), ALIGN)
658 + (dbs[cnt].suggested_module
659 * DEFAULT_DATASIZE_PER_BUCKET));
660
661 /* Try to create the database. If we do not need a
662 persistent database create a temporary file. */
663 int fd;
664 int ro_fd = -1;
665 if (dbs[cnt].persistent)
666 {
667 fd = open (dbs[cnt].db_filename,
668 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
669 S_IRUSR | S_IWUSR);
670 if (fd != -1 && dbs[cnt].shared)
671 ro_fd = open (dbs[cnt].db_filename,
672 O_RDONLY | EXTRA_O_FLAGS);
673 }
674 else
675 {
676 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
677 fd = mkostemp (fname, EXTRA_O_FLAGS);
678
679 /* We do not need the file name anymore after we
680 opened another file descriptor in read-only mode. */
681 if (fd != -1)
682 {
683 if (dbs[cnt].shared)
684 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
685
686 unlink (fname);
687 }
688 }
689
690 if (fd == -1)
691 {
692 if (errno == EEXIST)
693 {
694 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
695 dbnames[cnt], dbs[cnt].db_filename);
696 // XXX Correct way to terminate?
697 exit (1);
698 }
699
700 if (dbs[cnt].persistent)
701 dbg_log (_("cannot create %s; no persistent database used"),
702 dbs[cnt].db_filename);
703 else
704 dbg_log (_("cannot create %s; no sharing possible"),
705 dbs[cnt].db_filename);
706
707 dbs[cnt].persistent = 0;
708 // XXX remember: no mmap
709 }
710 else
711 {
712 /* Tell the user if we could not create the read-only
713 descriptor. */
714 if (ro_fd == -1 && dbs[cnt].shared)
715 dbg_log (_("\
716 cannot create read-only descriptor for \"%s\"; no mmap"),
717 dbs[cnt].db_filename);
718
719 /* Before we create the header, initialiye the hash
720 table. So that if we get interrupted if writing
721 the header we can recognize a partially initialized
722 database. */
723 size_t ps = sysconf (_SC_PAGESIZE);
724 char tmpbuf[ps];
725 assert (~ENDREF == 0);
726 memset (tmpbuf, '\xff', ps);
727
728 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
729 off_t offset = sizeof (head);
730
731 size_t towrite;
732 if (offset % ps != 0)
733 {
734 towrite = MIN (remaining, ps - (offset % ps));
735 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
736 goto write_fail;
737 offset += towrite;
738 remaining -= towrite;
739 }
740
741 while (remaining > ps)
742 {
743 if (pwrite (fd, tmpbuf, ps, offset) == -1)
744 goto write_fail;
745 offset += ps;
746 remaining -= ps;
747 }
748
749 if (remaining > 0
750 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
751 goto write_fail;
752
753 /* Create the header of the file. */
754 struct database_pers_head head =
755 {
756 .version = DB_VERSION,
757 .header_size = sizeof (head),
758 .module = dbs[cnt].suggested_module,
759 .data_size = (dbs[cnt].suggested_module
760 * DEFAULT_DATASIZE_PER_BUCKET),
761 .first_free = 0
762 };
763 void *mem;
764
765 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
766 != sizeof (head))
767 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
768 != 0)
769 || (mem = mmap (NULL, dbs[cnt].max_db_size,
770 PROT_READ | PROT_WRITE,
771 MAP_SHARED, fd, 0)) == MAP_FAILED)
772 {
773 write_fail:
774 unlink (dbs[cnt].db_filename);
775 dbg_log (_("cannot write to database file %s: %s"),
776 dbs[cnt].db_filename, strerror (errno));
777 dbs[cnt].persistent = 0;
778 }
779 else
780 {
781 /* Success. */
782 dbs[cnt].head = mem;
783 dbs[cnt].data = (char *)
784 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
785 ALIGN / sizeof (ref_t))];
786 dbs[cnt].memsize = total;
787 dbs[cnt].mmap_used = true;
788
789 /* Remember the descriptors. */
790 dbs[cnt].wr_fd = fd;
791 dbs[cnt].ro_fd = ro_fd;
792 fd = -1;
793 ro_fd = -1;
794 }
795
796 if (fd != -1)
797 close (fd);
798 if (ro_fd != -1)
799 close (ro_fd);
800 }
801 }
802
803 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
804 /* We do not check here whether the O_CLOEXEC provided to the
805 open call was successful or not. The two fcntl calls are
806 only performed once each per process start-up and therefore
807 is not noticeable at all. */
808 if (paranoia
809 && ((dbs[cnt].wr_fd != -1
810 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
811 || (dbs[cnt].ro_fd != -1
812 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
813 {
814 dbg_log (_("\
815 cannot set socket to close on exec: %s; disabling paranoia mode"),
816 strerror (errno));
817 paranoia = 0;
818 }
819 #endif
820
821 if (dbs[cnt].head == NULL)
822 {
823 /* We do not use the persistent database. Just
824 create an in-memory data structure. */
825 assert (! dbs[cnt].persistent);
826
827 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
828 + (dbs[cnt].suggested_module
829 * sizeof (ref_t)));
830 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
831 assert (~ENDREF == 0);
832 memset (dbs[cnt].head->array, '\xff',
833 dbs[cnt].suggested_module * sizeof (ref_t));
834 dbs[cnt].head->module = dbs[cnt].suggested_module;
835 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
836 * dbs[cnt].head->module);
837 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
838 dbs[cnt].head->first_free = 0;
839
840 dbs[cnt].shared = 0;
841 assert (dbs[cnt].ro_fd == -1);
842 }
843
844 dbs[cnt].inotify_descr = -1;
845 if (dbs[cnt].check_file)
846 {
847 #ifdef HAVE_INOTIFY
848 if (inotify_fd < 0
849 || (dbs[cnt].inotify_descr
850 = inotify_add_watch (inotify_fd, dbs[cnt].filename,
851 IN_DELETE_SELF | IN_MODIFY)) < 0)
852 /* We cannot notice changes in the main thread. */
853 #endif
854 {
855 /* We need the modification date of the file. */
856 struct stat64 st;
857
858 if (stat64 (dbs[cnt].filename, &st) < 0)
859 {
860 /* We cannot stat() the file, disable file checking. */
861 dbg_log (_("cannot stat() file `%s': %s"),
862 dbs[cnt].filename, strerror (errno));
863 dbs[cnt].check_file = 0;
864 }
865 else
866 dbs[cnt].file_mtime = st.st_mtime;
867 }
868 }
869
870 #ifdef HAVE_INOTIFY
871 if (cnt == hstdb && inotify_fd >= -1)
872 /* We also monitor the resolver configuration file. */
873 resolv_conf_descr = inotify_add_watch (inotify_fd,
874 _PATH_RESCONF,
875 IN_DELETE_SELF | IN_MODIFY);
876 #endif
877 }
878
879 /* Create the socket. */
880 #ifndef __ASSUME_SOCK_CLOEXEC
881 sock = -1;
882 if (have_sock_cloexec >= 0)
883 #endif
884 {
885 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
886 #ifndef __ASSUME_SOCK_CLOEXEC
887 if (have_sock_cloexec == 0)
888 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
889 #endif
890 }
891 #ifndef __ASSUME_SOCK_CLOEXEC
892 if (have_sock_cloexec < 0)
893 sock = socket (AF_UNIX, SOCK_STREAM, 0);
894 #endif
895 if (sock < 0)
896 {
897 dbg_log (_("cannot open socket: %s"), strerror (errno));
898 exit (errno == EACCES ? 4 : 1);
899 }
900 /* Bind a name to the socket. */
901 struct sockaddr_un sock_addr;
902 sock_addr.sun_family = AF_UNIX;
903 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
904 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
905 {
906 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
907 exit (errno == EACCES ? 4 : 1);
908 }
909
910 #ifndef __ASSUME_SOCK_CLOEXEC
911 if (have_sock_cloexec < 0)
912 {
913 /* We don't want to get stuck on accept. */
914 int fl = fcntl (sock, F_GETFL);
915 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
916 {
917 dbg_log (_("cannot change socket to nonblocking mode: %s"),
918 strerror (errno));
919 exit (1);
920 }
921
922 /* The descriptor needs to be closed on exec. */
923 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
924 {
925 dbg_log (_("cannot set socket to close on exec: %s"),
926 strerror (errno));
927 exit (1);
928 }
929 }
930 #endif
931
932 /* Set permissions for the socket. */
933 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
934
935 /* Set the socket up to accept connections. */
936 if (listen (sock, SOMAXCONN) < 0)
937 {
938 dbg_log (_("cannot enable socket to accept connections: %s"),
939 strerror (errno));
940 exit (1);
941 }
942
943 /* Change to unprivileged uid/gid/groups if specifed in config file */
944 if (server_user != NULL)
945 finish_drop_privileges ();
946 }
947
948
949 /* Close the connections. */
950 void
951 close_sockets (void)
952 {
953 close (sock);
954 }
955
956
957 static void
958 invalidate_cache (char *key, int fd)
959 {
960 dbtype number;
961 int32_t resp;
962
963 for (number = pwddb; number < lastdb; ++number)
964 if (strcmp (key, dbnames[number]) == 0)
965 {
966 if (dbs[number].reset_res)
967 res_init ();
968
969 break;
970 }
971
972 if (number == lastdb)
973 {
974 resp = EINVAL;
975 writeall (fd, &resp, sizeof (resp));
976 return;
977 }
978
979 if (dbs[number].enabled)
980 {
981 pthread_mutex_lock (&dbs[number].prune_run_lock);
982 prune_cache (&dbs[number], LONG_MAX, fd);
983 pthread_mutex_unlock (&dbs[number].prune_run_lock);
984 }
985 else
986 {
987 resp = 0;
988 writeall (fd, &resp, sizeof (resp));
989 }
990 }
991
992
993 #ifdef SCM_RIGHTS
994 static void
995 send_ro_fd (struct database_dyn *db, char *key, int fd)
996 {
997 /* If we do not have an read-only file descriptor do nothing. */
998 if (db->ro_fd == -1)
999 return;
1000
1001 /* We need to send some data along with the descriptor. */
1002 uint64_t mapsize = (db->head->data_size
1003 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1004 + sizeof (struct database_pers_head));
1005 struct iovec iov[2];
1006 iov[0].iov_base = key;
1007 iov[0].iov_len = strlen (key) + 1;
1008 iov[1].iov_base = &mapsize;
1009 iov[1].iov_len = sizeof (mapsize);
1010
1011 /* Prepare the control message to transfer the descriptor. */
1012 union
1013 {
1014 struct cmsghdr hdr;
1015 char bytes[CMSG_SPACE (sizeof (int))];
1016 } buf;
1017 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1018 .msg_control = buf.bytes,
1019 .msg_controllen = sizeof (buf) };
1020 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1021
1022 cmsg->cmsg_level = SOL_SOCKET;
1023 cmsg->cmsg_type = SCM_RIGHTS;
1024 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1025
1026 int *ip = (int *) CMSG_DATA (cmsg);
1027 *ip = db->ro_fd;
1028
1029 msg.msg_controllen = cmsg->cmsg_len;
1030
1031 /* Send the control message. We repeat when we are interrupted but
1032 everything else is ignored. */
1033 #ifndef MSG_NOSIGNAL
1034 # define MSG_NOSIGNAL 0
1035 #endif
1036 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1037
1038 if (__builtin_expect (debug_level > 0, 0))
1039 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1040 }
1041 #endif /* SCM_RIGHTS */
1042
1043
1044 /* Handle new request. */
1045 static void
1046 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1047 {
1048 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1049 {
1050 if (debug_level > 0)
1051 dbg_log (_("\
1052 cannot handle old request version %d; current version is %d"),
1053 req->version, NSCD_VERSION);
1054 return;
1055 }
1056
1057 /* Perform the SELinux check before we go on to the standard checks. */
1058 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1059 {
1060 if (debug_level > 0)
1061 {
1062 #ifdef SO_PEERCRED
1063 # ifdef PATH_MAX
1064 char buf[PATH_MAX];
1065 # else
1066 char buf[4096];
1067 # endif
1068
1069 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1070 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1071
1072 if (n <= 0)
1073 dbg_log (_("\
1074 request from %ld not handled due to missing permission"), (long int) pid);
1075 else
1076 {
1077 buf[n] = '\0';
1078 dbg_log (_("\
1079 request from '%s' [%ld] not handled due to missing permission"),
1080 buf, (long int) pid);
1081 }
1082 #else
1083 dbg_log (_("request not handled due to missing permission"));
1084 #endif
1085 }
1086 return;
1087 }
1088
1089 struct database_dyn *db = reqinfo[req->type].db;
1090
1091 /* See whether we can service the request from the cache. */
1092 if (__builtin_expect (reqinfo[req->type].data_request, true))
1093 {
1094 if (__builtin_expect (debug_level, 0) > 0)
1095 {
1096 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1097 {
1098 char buf[INET6_ADDRSTRLEN];
1099
1100 dbg_log ("\t%s (%s)", serv2str[req->type],
1101 inet_ntop (req->type == GETHOSTBYADDR
1102 ? AF_INET : AF_INET6,
1103 key, buf, sizeof (buf)));
1104 }
1105 else
1106 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1107 }
1108
1109 /* Is this service enabled? */
1110 if (__builtin_expect (!db->enabled, 0))
1111 {
1112 /* No, sent the prepared record. */
1113 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1114 db->disabled_iov->iov_len,
1115 MSG_NOSIGNAL))
1116 != (ssize_t) db->disabled_iov->iov_len
1117 && __builtin_expect (debug_level, 0) > 0)
1118 {
1119 /* We have problems sending the result. */
1120 char buf[256];
1121 dbg_log (_("cannot write result: %s"),
1122 strerror_r (errno, buf, sizeof (buf)));
1123 }
1124
1125 return;
1126 }
1127
1128 /* Be sure we can read the data. */
1129 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1130 {
1131 ++db->head->rdlockdelayed;
1132 pthread_rwlock_rdlock (&db->lock);
1133 }
1134
1135 /* See whether we can handle it from the cache. */
1136 struct datahead *cached;
1137 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1138 db, uid);
1139 if (cached != NULL)
1140 {
1141 /* Hurray it's in the cache. */
1142 ssize_t nwritten;
1143
1144 #ifdef HAVE_SENDFILE
1145 if (__builtin_expect (db->mmap_used, 1))
1146 {
1147 assert (db->wr_fd != -1);
1148 assert ((char *) cached->data > (char *) db->data);
1149 assert ((char *) cached->data - (char *) db->head
1150 + cached->recsize
1151 <= (sizeof (struct database_pers_head)
1152 + db->head->module * sizeof (ref_t)
1153 + db->head->data_size));
1154 nwritten = sendfileall (fd, db->wr_fd,
1155 (char *) cached->data
1156 - (char *) db->head, cached->recsize);
1157 # ifndef __ASSUME_SENDFILE
1158 if (nwritten == -1 && errno == ENOSYS)
1159 goto use_write;
1160 # endif
1161 }
1162 else
1163 # ifndef __ASSUME_SENDFILE
1164 use_write:
1165 # endif
1166 #endif
1167 nwritten = writeall (fd, cached->data, cached->recsize);
1168
1169 if (nwritten != cached->recsize
1170 && __builtin_expect (debug_level, 0) > 0)
1171 {
1172 /* We have problems sending the result. */
1173 char buf[256];
1174 dbg_log (_("cannot write result: %s"),
1175 strerror_r (errno, buf, sizeof (buf)));
1176 }
1177
1178 pthread_rwlock_unlock (&db->lock);
1179
1180 return;
1181 }
1182
1183 pthread_rwlock_unlock (&db->lock);
1184 }
1185 else if (__builtin_expect (debug_level, 0) > 0)
1186 {
1187 if (req->type == INVALIDATE)
1188 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1189 else
1190 dbg_log ("\t%s", serv2str[req->type]);
1191 }
1192
1193 /* Handle the request. */
1194 switch (req->type)
1195 {
1196 case GETPWBYNAME:
1197 addpwbyname (db, fd, req, key, uid);
1198 break;
1199
1200 case GETPWBYUID:
1201 addpwbyuid (db, fd, req, key, uid);
1202 break;
1203
1204 case GETGRBYNAME:
1205 addgrbyname (db, fd, req, key, uid);
1206 break;
1207
1208 case GETGRBYGID:
1209 addgrbygid (db, fd, req, key, uid);
1210 break;
1211
1212 case GETHOSTBYNAME:
1213 addhstbyname (db, fd, req, key, uid);
1214 break;
1215
1216 case GETHOSTBYNAMEv6:
1217 addhstbynamev6 (db, fd, req, key, uid);
1218 break;
1219
1220 case GETHOSTBYADDR:
1221 addhstbyaddr (db, fd, req, key, uid);
1222 break;
1223
1224 case GETHOSTBYADDRv6:
1225 addhstbyaddrv6 (db, fd, req, key, uid);
1226 break;
1227
1228 case GETAI:
1229 addhstai (db, fd, req, key, uid);
1230 break;
1231
1232 case INITGROUPS:
1233 addinitgroups (db, fd, req, key, uid);
1234 break;
1235
1236 case GETSERVBYNAME:
1237 addservbyname (db, fd, req, key, uid);
1238 break;
1239
1240 case GETSERVBYPORT:
1241 addservbyport (db, fd, req, key, uid);
1242 break;
1243
1244 case GETSTAT:
1245 case SHUTDOWN:
1246 case INVALIDATE:
1247 {
1248 /* Get the callers credentials. */
1249 #ifdef SO_PEERCRED
1250 struct ucred caller;
1251 socklen_t optlen = sizeof (caller);
1252
1253 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1254 {
1255 char buf[256];
1256
1257 dbg_log (_("error getting caller's id: %s"),
1258 strerror_r (errno, buf, sizeof (buf)));
1259 break;
1260 }
1261
1262 uid = caller.uid;
1263 #else
1264 /* Some systems have no SO_PEERCRED implementation. They don't
1265 care about security so we don't as well. */
1266 uid = 0;
1267 #endif
1268 }
1269
1270 /* Accept shutdown, getstat and invalidate only from root. For
1271 the stat call also allow the user specified in the config file. */
1272 if (req->type == GETSTAT)
1273 {
1274 if (uid == 0 || uid == stat_uid)
1275 send_stats (fd, dbs);
1276 }
1277 else if (uid == 0)
1278 {
1279 if (req->type == INVALIDATE)
1280 invalidate_cache (key, fd);
1281 else
1282 termination_handler (0);
1283 }
1284 break;
1285
1286 case GETFDPW:
1287 case GETFDGR:
1288 case GETFDHST:
1289 case GETFDSERV:
1290 #ifdef SCM_RIGHTS
1291 send_ro_fd (reqinfo[req->type].db, key, fd);
1292 #endif
1293 break;
1294
1295 default:
1296 /* Ignore the command, it's nothing we know. */
1297 break;
1298 }
1299 }
1300
1301
1302 /* Restart the process. */
1303 static void
1304 restart (void)
1305 {
1306 /* First determine the parameters. We do not use the parameters
1307 passed to main() since in case nscd is started by running the
1308 dynamic linker this will not work. Yes, this is not the usual
1309 case but nscd is part of glibc and we occasionally do this. */
1310 size_t buflen = 1024;
1311 char *buf = alloca (buflen);
1312 size_t readlen = 0;
1313 int fd = open ("/proc/self/cmdline", O_RDONLY);
1314 if (fd == -1)
1315 {
1316 dbg_log (_("\
1317 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1318 strerror (errno));
1319
1320 paranoia = 0;
1321 return;
1322 }
1323
1324 while (1)
1325 {
1326 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1327 buflen - readlen));
1328 if (n == -1)
1329 {
1330 dbg_log (_("\
1331 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1332 strerror (errno));
1333
1334 close (fd);
1335 paranoia = 0;
1336 return;
1337 }
1338
1339 readlen += n;
1340
1341 if (readlen < buflen)
1342 break;
1343
1344 /* We might have to extend the buffer. */
1345 size_t old_buflen = buflen;
1346 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1347 buf = memmove (newp, buf, old_buflen);
1348 }
1349
1350 close (fd);
1351
1352 /* Parse the command line. Worst case scenario: every two
1353 characters form one parameter (one character plus NUL). */
1354 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1355 int argc = 0;
1356
1357 char *cp = buf;
1358 while (cp < buf + readlen)
1359 {
1360 argv[argc++] = cp;
1361 cp = (char *) rawmemchr (cp, '\0') + 1;
1362 }
1363 argv[argc] = NULL;
1364
1365 /* Second, change back to the old user if we changed it. */
1366 if (server_user != NULL)
1367 {
1368 if (setresuid (old_uid, old_uid, old_uid) != 0)
1369 {
1370 dbg_log (_("\
1371 cannot change to old UID: %s; disabling paranoia mode"),
1372 strerror (errno));
1373
1374 paranoia = 0;
1375 return;
1376 }
1377
1378 if (setresgid (old_gid, old_gid, old_gid) != 0)
1379 {
1380 dbg_log (_("\
1381 cannot change to old GID: %s; disabling paranoia mode"),
1382 strerror (errno));
1383
1384 setuid (server_uid);
1385 paranoia = 0;
1386 return;
1387 }
1388 }
1389
1390 /* Next change back to the old working directory. */
1391 if (chdir (oldcwd) == -1)
1392 {
1393 dbg_log (_("\
1394 cannot change to old working directory: %s; disabling paranoia mode"),
1395 strerror (errno));
1396
1397 if (server_user != NULL)
1398 {
1399 setuid (server_uid);
1400 setgid (server_gid);
1401 }
1402 paranoia = 0;
1403 return;
1404 }
1405
1406 /* Synchronize memory. */
1407 int32_t certainly[lastdb];
1408 for (int cnt = 0; cnt < lastdb; ++cnt)
1409 if (dbs[cnt].enabled)
1410 {
1411 /* Make sure nobody keeps using the database. */
1412 dbs[cnt].head->timestamp = 0;
1413 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1414 dbs[cnt].head->nscd_certainly_running = 0;
1415
1416 if (dbs[cnt].persistent)
1417 // XXX async OK?
1418 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1419 }
1420
1421 /* The preparations are done. */
1422 #ifdef PATH_MAX
1423 char pathbuf[PATH_MAX];
1424 #else
1425 char pathbuf[256];
1426 #endif
1427 /* Try to exec the real nscd program so the process name (as reported
1428 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1429 if readlink or the exec with the result of the readlink call fails. */
1430 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1431 if (n != -1)
1432 {
1433 pathbuf[n] = '\0';
1434 execv (pathbuf, argv);
1435 }
1436 execv ("/proc/self/exe", argv);
1437
1438 /* If we come here, we will never be able to re-exec. */
1439 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1440 strerror (errno));
1441
1442 if (server_user != NULL)
1443 {
1444 setuid (server_uid);
1445 setgid (server_gid);
1446 }
1447 if (chdir ("/") != 0)
1448 dbg_log (_("cannot change current working directory to \"/\": %s"),
1449 strerror (errno));
1450 paranoia = 0;
1451
1452 /* Reenable the databases. */
1453 time_t now = time (NULL);
1454 for (int cnt = 0; cnt < lastdb; ++cnt)
1455 if (dbs[cnt].enabled)
1456 {
1457 dbs[cnt].head->timestamp = now;
1458 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1459 }
1460 }
1461
1462
1463 /* List of file descriptors. */
1464 struct fdlist
1465 {
1466 int fd;
1467 struct fdlist *next;
1468 };
1469 /* Memory allocated for the list. */
1470 static struct fdlist *fdlist;
1471 /* List of currently ready-to-read file descriptors. */
1472 static struct fdlist *readylist;
1473
1474 /* Conditional variable and mutex to signal availability of entries in
1475 READYLIST. The condvar is initialized dynamically since we might
1476 use a different clock depending on availability. */
1477 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1478 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1479
1480 /* The clock to use with the condvar. */
1481 static clockid_t timeout_clock = CLOCK_REALTIME;
1482
1483 /* Number of threads ready to handle the READYLIST. */
1484 static unsigned long int nready;
1485
1486
1487 /* Function for the clean-up threads. */
1488 static void *
1489 __attribute__ ((__noreturn__))
1490 nscd_run_prune (void *p)
1491 {
1492 const long int my_number = (long int) p;
1493 assert (dbs[my_number].enabled);
1494
1495 int dont_need_update = setup_thread (&dbs[my_number]);
1496
1497 time_t now = time (NULL);
1498
1499 /* We are running. */
1500 dbs[my_number].head->timestamp = now;
1501
1502 struct timespec prune_ts;
1503 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1504 /* Should never happen. */
1505 abort ();
1506
1507 /* Compute the initial timeout time. Prevent all the timers to go
1508 off at the same time by adding a db-based value. */
1509 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1510 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1511
1512 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1513 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1514 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1515
1516 pthread_mutex_lock (prune_lock);
1517 while (1)
1518 {
1519 /* Wait, but not forever. */
1520 int e = 0;
1521 if (! dbs[my_number].clear_cache)
1522 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1523 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1524
1525 time_t next_wait;
1526 now = time (NULL);
1527 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1528 || dbs[my_number].clear_cache)
1529 {
1530 /* We will determine the new timout values based on the
1531 cache content. Should there be concurrent additions to
1532 the cache which are not accounted for in the cache
1533 pruning we want to know about it. Therefore set the
1534 timeout to the maximum. It will be descreased when adding
1535 new entries to the cache, if necessary. */
1536 if (sizeof (time_t) == sizeof (long int))
1537 dbs[my_number].wakeup_time = LONG_MAX;
1538 else
1539 dbs[my_number].wakeup_time = INT_MAX;
1540
1541 /* Unconditionally reset the flag. */
1542 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1543 dbs[my_number].clear_cache = 0;
1544
1545 pthread_mutex_unlock (prune_lock);
1546
1547 /* We use a separate lock for running the prune function (instead
1548 of keeping prune_lock locked) because this enables concurrent
1549 invocations of cache_add which might modify the timeout value. */
1550 pthread_mutex_lock (prune_run_lock);
1551 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1552 pthread_mutex_unlock (prune_run_lock);
1553
1554 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1555 /* If clients cannot determine for sure whether nscd is running
1556 we need to wake up occasionally to update the timestamp.
1557 Wait 90% of the update period. */
1558 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1559 if (__builtin_expect (! dont_need_update, 0))
1560 {
1561 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1562 dbs[my_number].head->timestamp = now;
1563 }
1564
1565 pthread_mutex_lock (prune_lock);
1566
1567 /* Make it known when we will wake up again. */
1568 if (now + next_wait < dbs[my_number].wakeup_time)
1569 dbs[my_number].wakeup_time = now + next_wait;
1570 else
1571 next_wait = dbs[my_number].wakeup_time - now;
1572 }
1573 else
1574 /* The cache was just pruned. Do not do it again now. Just
1575 use the new timeout value. */
1576 next_wait = dbs[my_number].wakeup_time - now;
1577
1578 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1579 /* Should never happen. */
1580 abort ();
1581
1582 /* Compute next timeout time. */
1583 prune_ts.tv_sec += next_wait;
1584 }
1585 }
1586
1587
1588 /* This is the main loop. It is replicated in different threads but
1589 the the use of the ready list makes sure only one thread handles an
1590 incoming connection. */
1591 static void *
1592 __attribute__ ((__noreturn__))
1593 nscd_run_worker (void *p)
1594 {
1595 char buf[256];
1596
1597 /* Initial locking. */
1598 pthread_mutex_lock (&readylist_lock);
1599
1600 /* One more thread available. */
1601 ++nready;
1602
1603 while (1)
1604 {
1605 while (readylist == NULL)
1606 pthread_cond_wait (&readylist_cond, &readylist_lock);
1607
1608 struct fdlist *it = readylist->next;
1609 if (readylist->next == readylist)
1610 /* Just one entry on the list. */
1611 readylist = NULL;
1612 else
1613 readylist->next = it->next;
1614
1615 /* Extract the information and mark the record ready to be used
1616 again. */
1617 int fd = it->fd;
1618 it->next = NULL;
1619
1620 /* One more thread available. */
1621 --nready;
1622
1623 /* We are done with the list. */
1624 pthread_mutex_unlock (&readylist_lock);
1625
1626 #ifndef __ASSUME_ACCEPT4
1627 if (have_accept4 < 0)
1628 {
1629 /* We do not want to block on a short read or so. */
1630 int fl = fcntl (fd, F_GETFL);
1631 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1632 goto close_and_out;
1633 }
1634 #endif
1635
1636 /* Now read the request. */
1637 request_header req;
1638 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1639 != sizeof (req), 0))
1640 {
1641 /* We failed to read data. Note that this also might mean we
1642 failed because we would have blocked. */
1643 if (debug_level > 0)
1644 dbg_log (_("short read while reading request: %s"),
1645 strerror_r (errno, buf, sizeof (buf)));
1646 goto close_and_out;
1647 }
1648
1649 /* Check whether this is a valid request type. */
1650 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1651 goto close_and_out;
1652
1653 /* Some systems have no SO_PEERCRED implementation. They don't
1654 care about security so we don't as well. */
1655 uid_t uid = -1;
1656 #ifdef SO_PEERCRED
1657 pid_t pid = 0;
1658
1659 if (__builtin_expect (debug_level > 0, 0))
1660 {
1661 struct ucred caller;
1662 socklen_t optlen = sizeof (caller);
1663
1664 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1665 pid = caller.pid;
1666 }
1667 #else
1668 const pid_t pid = 0;
1669 #endif
1670
1671 /* It should not be possible to crash the nscd with a silly
1672 request (i.e., a terribly large key). We limit the size to 1kb. */
1673 if (__builtin_expect (req.key_len, 1) < 0
1674 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1675 {
1676 if (debug_level > 0)
1677 dbg_log (_("key length in request too long: %d"), req.key_len);
1678 }
1679 else
1680 {
1681 /* Get the key. */
1682 char keybuf[MAXKEYLEN];
1683
1684 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1685 req.key_len))
1686 != req.key_len, 0))
1687 {
1688 /* Again, this can also mean we would have blocked. */
1689 if (debug_level > 0)
1690 dbg_log (_("short read while reading request key: %s"),
1691 strerror_r (errno, buf, sizeof (buf)));
1692 goto close_and_out;
1693 }
1694
1695 if (__builtin_expect (debug_level, 0) > 0)
1696 {
1697 #ifdef SO_PEERCRED
1698 if (pid != 0)
1699 dbg_log (_("\
1700 handle_request: request received (Version = %d) from PID %ld"),
1701 req.version, (long int) pid);
1702 else
1703 #endif
1704 dbg_log (_("\
1705 handle_request: request received (Version = %d)"), req.version);
1706 }
1707
1708 /* Phew, we got all the data, now process it. */
1709 handle_request (fd, &req, keybuf, uid, pid);
1710 }
1711
1712 close_and_out:
1713 /* We are done. */
1714 close (fd);
1715
1716 /* Re-locking. */
1717 pthread_mutex_lock (&readylist_lock);
1718
1719 /* One more thread available. */
1720 ++nready;
1721 }
1722 /* NOTREACHED */
1723 }
1724
1725
1726 static unsigned int nconns;
1727
1728 static void
1729 fd_ready (int fd)
1730 {
1731 pthread_mutex_lock (&readylist_lock);
1732
1733 /* Find an empty entry in FDLIST. */
1734 size_t inner;
1735 for (inner = 0; inner < nconns; ++inner)
1736 if (fdlist[inner].next == NULL)
1737 break;
1738 assert (inner < nconns);
1739
1740 fdlist[inner].fd = fd;
1741
1742 if (readylist == NULL)
1743 readylist = fdlist[inner].next = &fdlist[inner];
1744 else
1745 {
1746 fdlist[inner].next = readylist->next;
1747 readylist = readylist->next = &fdlist[inner];
1748 }
1749
1750 bool do_signal = true;
1751 if (__builtin_expect (nready == 0, 0))
1752 {
1753 ++client_queued;
1754 do_signal = false;
1755
1756 /* Try to start another thread to help out. */
1757 pthread_t th;
1758 if (nthreads < max_nthreads
1759 && pthread_create (&th, &attr, nscd_run_worker,
1760 (void *) (long int) nthreads) == 0)
1761 {
1762 /* We got another thread. */
1763 ++nthreads;
1764 /* The new thread might need a kick. */
1765 do_signal = true;
1766 }
1767
1768 }
1769
1770 pthread_mutex_unlock (&readylist_lock);
1771
1772 /* Tell one of the worker threads there is work to do. */
1773 if (do_signal)
1774 pthread_cond_signal (&readylist_cond);
1775 }
1776
1777
1778 /* Check whether restarting should happen. */
1779 static inline int
1780 restart_p (time_t now)
1781 {
1782 return (paranoia && readylist == NULL && nready == nthreads
1783 && now >= restart_time);
1784 }
1785
1786
1787 /* Array for times a connection was accepted. */
1788 static time_t *starttime;
1789
1790
1791 static void
1792 __attribute__ ((__noreturn__))
1793 main_loop_poll (void)
1794 {
1795 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1796 * sizeof (conns[0]));
1797
1798 conns[0].fd = sock;
1799 conns[0].events = POLLRDNORM;
1800 size_t nused = 1;
1801 size_t firstfree = 1;
1802
1803 #ifdef HAVE_INOTIFY
1804 if (inotify_fd != -1)
1805 {
1806 conns[1].fd = inotify_fd;
1807 conns[1].events = POLLRDNORM;
1808 nused = 2;
1809 firstfree = 2;
1810 }
1811 #endif
1812
1813 while (1)
1814 {
1815 /* Wait for any event. We wait at most a couple of seconds so
1816 that we can check whether we should close any of the accepted
1817 connections since we have not received a request. */
1818 #define MAX_ACCEPT_TIMEOUT 30
1819 #define MIN_ACCEPT_TIMEOUT 5
1820 #define MAIN_THREAD_TIMEOUT \
1821 (MAX_ACCEPT_TIMEOUT * 1000 \
1822 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1823
1824 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1825
1826 time_t now = time (NULL);
1827
1828 /* If there is a descriptor ready for reading or there is a new
1829 connection, process this now. */
1830 if (n > 0)
1831 {
1832 if (conns[0].revents != 0)
1833 {
1834 /* We have a new incoming connection. Accept the connection. */
1835 int fd;
1836
1837 #ifndef __ASSUME_ACCEPT4
1838 fd = -1;
1839 if (have_accept4 >= 0)
1840 #endif
1841 {
1842 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1843 SOCK_NONBLOCK));
1844 #ifndef __ASSUME_ACCEPT4
1845 if (have_accept4 == 0)
1846 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1847 #endif
1848 }
1849 #ifndef __ASSUME_ACCEPT4
1850 if (have_accept4 < 0)
1851 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1852 #endif
1853
1854 /* Use the descriptor if we have not reached the limit. */
1855 if (fd >= 0)
1856 {
1857 if (firstfree < nconns)
1858 {
1859 conns[firstfree].fd = fd;
1860 conns[firstfree].events = POLLRDNORM;
1861 starttime[firstfree] = now;
1862 if (firstfree >= nused)
1863 nused = firstfree + 1;
1864
1865 do
1866 ++firstfree;
1867 while (firstfree < nused && conns[firstfree].fd != -1);
1868 }
1869 else
1870 /* We cannot use the connection so close it. */
1871 close (fd);
1872 }
1873
1874 --n;
1875 }
1876
1877 size_t first = 1;
1878 #ifdef HAVE_INOTIFY
1879 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1880 {
1881 if (conns[1].revents != 0)
1882 {
1883 bool to_clear[lastdb] = { false, };
1884 union
1885 {
1886 # ifndef PATH_MAX
1887 # define PATH_MAX 1024
1888 # endif
1889 struct inotify_event i;
1890 char buf[sizeof (struct inotify_event) + PATH_MAX];
1891 } inev;
1892
1893 while (1)
1894 {
1895 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1896 sizeof (inev)));
1897 if (nb < (ssize_t) sizeof (struct inotify_event))
1898 {
1899 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1900 0))
1901 {
1902 /* Something went wrong when reading the inotify
1903 data. Better disable inotify. */
1904 dbg_log (_("\
1905 disabled inotify after read error %d"),
1906 errno);
1907 conns[1].fd = -1;
1908 firstfree = 1;
1909 if (nused == 2)
1910 nused = 1;
1911 close (inotify_fd);
1912 inotify_fd = -1;
1913 }
1914 break;
1915 }
1916
1917 /* Check which of the files changed. */
1918 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1919 if (inev.i.wd == dbs[dbcnt].inotify_descr)
1920 {
1921 to_clear[dbcnt] = true;
1922 goto next;
1923 }
1924
1925 if (inev.i.wd == resolv_conf_descr)
1926 {
1927 res_init ();
1928 to_clear[hstdb] = true;
1929 }
1930 next:;
1931 }
1932
1933 /* Actually perform the cache clearing. */
1934 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1935 if (to_clear[dbcnt])
1936 {
1937 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1938 dbs[dbcnt].clear_cache = 1;
1939 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1940 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1941 }
1942
1943 --n;
1944 }
1945
1946 first = 2;
1947 }
1948 #endif
1949
1950 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
1951 if (conns[cnt].revents != 0)
1952 {
1953 fd_ready (conns[cnt].fd);
1954
1955 /* Clean up the CONNS array. */
1956 conns[cnt].fd = -1;
1957 if (cnt < firstfree)
1958 firstfree = cnt;
1959 if (cnt == nused - 1)
1960 do
1961 --nused;
1962 while (conns[nused - 1].fd == -1);
1963
1964 --n;
1965 }
1966 }
1967
1968 /* Now find entries which have timed out. */
1969 assert (nused > 0);
1970
1971 /* We make the timeout length depend on the number of file
1972 descriptors currently used. */
1973 #define ACCEPT_TIMEOUT \
1974 (MAX_ACCEPT_TIMEOUT \
1975 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1976 time_t laststart = now - ACCEPT_TIMEOUT;
1977
1978 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1979 {
1980 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1981 {
1982 /* Remove the entry, it timed out. */
1983 (void) close (conns[cnt].fd);
1984 conns[cnt].fd = -1;
1985
1986 if (cnt < firstfree)
1987 firstfree = cnt;
1988 if (cnt == nused - 1)
1989 do
1990 --nused;
1991 while (conns[nused - 1].fd == -1);
1992 }
1993 }
1994
1995 if (restart_p (now))
1996 restart ();
1997 }
1998 }
1999
2000
2001 #ifdef HAVE_EPOLL
2002 static void
2003 main_loop_epoll (int efd)
2004 {
2005 struct epoll_event ev = { 0, };
2006 int nused = 1;
2007 size_t highest = 0;
2008
2009 /* Add the socket. */
2010 ev.events = EPOLLRDNORM;
2011 ev.data.fd = sock;
2012 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2013 /* We cannot use epoll. */
2014 return;
2015
2016 # ifdef HAVE_INOTIFY
2017 if (inotify_fd != -1)
2018 {
2019 ev.events = EPOLLRDNORM;
2020 ev.data.fd = inotify_fd;
2021 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2022 /* We cannot use epoll. */
2023 return;
2024 nused = 2;
2025 }
2026 # endif
2027
2028 while (1)
2029 {
2030 struct epoll_event revs[100];
2031 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2032
2033 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2034
2035 time_t now = time (NULL);
2036
2037 for (int cnt = 0; cnt < n; ++cnt)
2038 if (revs[cnt].data.fd == sock)
2039 {
2040 /* A new connection. */
2041 int fd;
2042
2043 # ifndef __ASSUME_ACCEPT4
2044 fd = -1;
2045 if (have_accept4 >= 0)
2046 # endif
2047 {
2048 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2049 SOCK_NONBLOCK));
2050 # ifndef __ASSUME_ACCEPT4
2051 if (have_accept4 == 0)
2052 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2053 # endif
2054 }
2055 # ifndef __ASSUME_ACCEPT4
2056 if (have_accept4 < 0)
2057 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2058 # endif
2059
2060 /* Use the descriptor if we have not reached the limit. */
2061 if (fd >= 0)
2062 {
2063 /* Try to add the new descriptor. */
2064 ev.data.fd = fd;
2065 if (fd >= nconns
2066 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2067 /* The descriptor is too large or something went
2068 wrong. Close the descriptor. */
2069 close (fd);
2070 else
2071 {
2072 /* Remember when we accepted the connection. */
2073 starttime[fd] = now;
2074
2075 if (fd > highest)
2076 highest = fd;
2077
2078 ++nused;
2079 }
2080 }
2081 }
2082 # ifdef HAVE_INOTIFY
2083 else if (revs[cnt].data.fd == inotify_fd)
2084 {
2085 bool to_clear[lastdb] = { false, };
2086 union
2087 {
2088 struct inotify_event i;
2089 char buf[sizeof (struct inotify_event) + PATH_MAX];
2090 } inev;
2091
2092 while (1)
2093 {
2094 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2095 sizeof (inev)));
2096 if (nb < (ssize_t) sizeof (struct inotify_event))
2097 {
2098 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2099 {
2100 /* Something went wrong when reading the inotify
2101 data. Better disable inotify. */
2102 dbg_log (_("disabled inotify after read error %d"),
2103 errno);
2104 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2105 NULL);
2106 close (inotify_fd);
2107 inotify_fd = -1;
2108 }
2109 break;
2110 }
2111
2112 /* Check which of the files changed. */
2113 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2114 if (inev.i.wd == dbs[dbcnt].inotify_descr)
2115 {
2116 to_clear[dbcnt] = true;
2117 goto next;
2118 }
2119
2120 if (inev.i.wd == resolv_conf_descr)
2121 {
2122 res_init ();
2123 to_clear[hstdb] = true;
2124 }
2125 next:;
2126 }
2127
2128 /* Actually perform the cache clearing. */
2129 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2130 if (to_clear[dbcnt])
2131 {
2132 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2133 dbs[dbcnt].clear_cache = 1;
2134 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2135 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2136 }
2137 }
2138 # endif
2139 else
2140 {
2141 /* Remove the descriptor from the epoll descriptor. */
2142 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2143
2144 /* Get a worker to handle the request. */
2145 fd_ready (revs[cnt].data.fd);
2146
2147 /* Reset the time. */
2148 starttime[revs[cnt].data.fd] = 0;
2149 if (revs[cnt].data.fd == highest)
2150 do
2151 --highest;
2152 while (highest > 0 && starttime[highest] == 0);
2153
2154 --nused;
2155 }
2156
2157 /* Now look for descriptors for accepted connections which have
2158 no reply in too long of a time. */
2159 time_t laststart = now - ACCEPT_TIMEOUT;
2160 assert (starttime[sock] == 0);
2161 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2162 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2163 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2164 {
2165 /* We are waiting for this one for too long. Close it. */
2166 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2167
2168 (void) close (cnt);
2169
2170 starttime[cnt] = 0;
2171 if (cnt == highest)
2172 --highest;
2173 }
2174 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2175 --highest;
2176
2177 if (restart_p (now))
2178 restart ();
2179 }
2180 }
2181 #endif
2182
2183
2184 /* Start all the threads we want. The initial process is thread no. 1. */
2185 void
2186 start_threads (void)
2187 {
2188 /* Initialize the conditional variable we will use. The only
2189 non-standard attribute we might use is the clock selection. */
2190 pthread_condattr_t condattr;
2191 pthread_condattr_init (&condattr);
2192
2193 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2194 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2195 /* Determine whether the monotonous clock is available. */
2196 struct timespec dummy;
2197 # if _POSIX_MONOTONIC_CLOCK == 0
2198 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2199 # endif
2200 # if _POSIX_CLOCK_SELECTION == 0
2201 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2202 # endif
2203 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2204 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2205 timeout_clock = CLOCK_MONOTONIC;
2206 #endif
2207
2208 /* Create the attribute for the threads. They are all created
2209 detached. */
2210 pthread_attr_init (&attr);
2211 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2212 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2213 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2214
2215 /* We allow less than LASTDB threads only for debugging. */
2216 if (debug_level == 0)
2217 nthreads = MAX (nthreads, lastdb);
2218
2219 /* Create the threads which prune the databases. */
2220 // XXX Ideally this work would be done by some of the worker threads.
2221 // XXX But this is problematic since we would need to be able to wake
2222 // XXX them up explicitly as well as part of the group handling the
2223 // XXX ready-list. This requires an operation where we can wait on
2224 // XXX two conditional variables at the same time. This operation
2225 // XXX does not exist (yet).
2226 for (long int i = 0; i < lastdb; ++i)
2227 {
2228 /* Initialize the conditional variable. */
2229 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2230 {
2231 dbg_log (_("could not initialize conditional variable"));
2232 exit (1);
2233 }
2234
2235 pthread_t th;
2236 if (dbs[i].enabled
2237 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2238 {
2239 dbg_log (_("could not start clean-up thread; terminating"));
2240 exit (1);
2241 }
2242 }
2243
2244 pthread_condattr_destroy (&condattr);
2245
2246 for (long int i = 0; i < nthreads; ++i)
2247 {
2248 pthread_t th;
2249 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2250 {
2251 if (i == 0)
2252 {
2253 dbg_log (_("could not start any worker thread; terminating"));
2254 exit (1);
2255 }
2256
2257 break;
2258 }
2259 }
2260
2261 /* Determine how much room for descriptors we should initially
2262 allocate. This might need to change later if we cap the number
2263 with MAXCONN. */
2264 const long int nfds = sysconf (_SC_OPEN_MAX);
2265 #define MINCONN 32
2266 #define MAXCONN 16384
2267 if (nfds == -1 || nfds > MAXCONN)
2268 nconns = MAXCONN;
2269 else if (nfds < MINCONN)
2270 nconns = MINCONN;
2271 else
2272 nconns = nfds;
2273
2274 /* We need memory to pass descriptors on to the worker threads. */
2275 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2276 /* Array to keep track when connection was accepted. */
2277 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2278
2279 /* In the main thread we execute the loop which handles incoming
2280 connections. */
2281 #ifdef HAVE_EPOLL
2282 int efd = epoll_create (100);
2283 if (efd != -1)
2284 {
2285 main_loop_epoll (efd);
2286 close (efd);
2287 }
2288 #endif
2289
2290 main_loop_poll ();
2291 }
2292
2293
2294 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2295 this function is called, we are not listening on the nscd socket yet so
2296 we can just use the ordinary lookup functions without causing a lockup */
2297 static void
2298 begin_drop_privileges (void)
2299 {
2300 struct passwd *pwd = getpwnam (server_user);
2301
2302 if (pwd == NULL)
2303 {
2304 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2305 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2306 server_user);
2307 }
2308
2309 server_uid = pwd->pw_uid;
2310 server_gid = pwd->pw_gid;
2311
2312 /* Save the old UID/GID if we have to change back. */
2313 if (paranoia)
2314 {
2315 old_uid = getuid ();
2316 old_gid = getgid ();
2317 }
2318
2319 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2320 {
2321 /* This really must never happen. */
2322 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2323 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2324 }
2325
2326 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2327
2328 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2329 == -1)
2330 {
2331 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2332 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2333 }
2334 }
2335
2336
2337 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2338 run nscd as the user specified in the configuration file. */
2339 static void
2340 finish_drop_privileges (void)
2341 {
2342 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2343 /* We need to preserve the capabilities to connect to the audit daemon. */
2344 cap_t new_caps = preserve_capabilities ();
2345 #endif
2346
2347 if (setgroups (server_ngroups, server_groups) == -1)
2348 {
2349 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2350 error (EXIT_FAILURE, errno, _("setgroups failed"));
2351 }
2352
2353 int res;
2354 if (paranoia)
2355 res = setresgid (server_gid, server_gid, old_gid);
2356 else
2357 res = setgid (server_gid);
2358 if (res == -1)
2359 {
2360 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2361 perror ("setgid");
2362 exit (4);
2363 }
2364
2365 if (paranoia)
2366 res = setresuid (server_uid, server_uid, old_uid);
2367 else
2368 res = setuid (server_uid);
2369 if (res == -1)
2370 {
2371 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2372 perror ("setuid");
2373 exit (4);
2374 }
2375
2376 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2377 /* Remove the temporary capabilities. */
2378 install_real_capabilities (new_caps);
2379 #endif
2380 }