]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
* nscd/connections.c: Disable use of paccept for now.
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #ifdef HAVE_INOTIFY
39 # include <sys/inotify.h>
40 #endif
41 #include <sys/mman.h>
42 #include <sys/param.h>
43 #include <sys/poll.h>
44 #ifdef HAVE_SENDFILE
45 # include <sys/sendfile.h>
46 #endif
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/un.h>
50
51 #include "nscd.h"
52 #include "dbg_log.h"
53 #include "selinux.h"
54 #include <resolv/resolv.h>
55 #ifdef HAVE_SENDFILE
56 # include <kernel-features.h>
57 #endif
58
59
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n);
62 extern void *xcalloc (size_t n, size_t s);
63 extern void *xrealloc (void *o, size_t n);
64
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
76
77 static pthread_attr_t attr;
78
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
81
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
84 {
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV"
104 };
105
106 /* The control data structures for the services. */
107 struct database_dyn dbs[lastdb] =
108 {
109 [pwddb] = {
110 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
111 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
112 .enabled = 0,
113 .check_file = 1,
114 .persistent = 0,
115 .propagate = 1,
116 .shared = 0,
117 .max_db_size = DEFAULT_MAX_DB_SIZE,
118 .suggested_module = DEFAULT_SUGGESTED_MODULE,
119 .reset_res = 0,
120 .filename = "/etc/passwd",
121 .db_filename = _PATH_NSCD_PASSWD_DB,
122 .disabled_iov = &pwd_iov_disabled,
123 .postimeout = 3600,
124 .negtimeout = 20,
125 .wr_fd = -1,
126 .ro_fd = -1,
127 .mmap_used = false
128 },
129 [grpdb] = {
130 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
131 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
132 .enabled = 0,
133 .check_file = 1,
134 .persistent = 0,
135 .propagate = 1,
136 .shared = 0,
137 .max_db_size = DEFAULT_MAX_DB_SIZE,
138 .suggested_module = DEFAULT_SUGGESTED_MODULE,
139 .reset_res = 0,
140 .filename = "/etc/group",
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
148 },
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .enabled = 0,
153 .check_file = 1,
154 .persistent = 0,
155 .propagate = 0, /* Not used. */
156 .shared = 0,
157 .max_db_size = DEFAULT_MAX_DB_SIZE,
158 .suggested_module = DEFAULT_SUGGESTED_MODULE,
159 .reset_res = 1,
160 .filename = "/etc/hosts",
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
168 },
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .reset_res = 0,
180 .filename = "/etc/services",
181 .db_filename = _PATH_NSCD_SERVICES_DB,
182 .disabled_iov = &serv_iov_disabled,
183 .postimeout = 28800,
184 .negtimeout = 20,
185 .wr_fd = -1,
186 .ro_fd = -1,
187 .mmap_used = false
188 }
189 };
190
191
192 /* Mapping of request type to database. */
193 static struct
194 {
195 bool data_request;
196 struct database_dyn *db;
197 } const reqinfo[LASTREQ] =
198 {
199 [GETPWBYNAME] = { true, &dbs[pwddb] },
200 [GETPWBYUID] = { true, &dbs[pwddb] },
201 [GETGRBYNAME] = { true, &dbs[grpdb] },
202 [GETGRBYGID] = { true, &dbs[grpdb] },
203 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
204 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
205 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
206 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
207 [SHUTDOWN] = { false, NULL },
208 [GETSTAT] = { false, NULL },
209 [SHUTDOWN] = { false, NULL },
210 [GETFDPW] = { false, &dbs[pwddb] },
211 [GETFDGR] = { false, &dbs[grpdb] },
212 [GETFDHST] = { false, &dbs[hstdb] },
213 [GETAI] = { true, &dbs[hstdb] },
214 [INITGROUPS] = { true, &dbs[grpdb] },
215 [GETSERVBYNAME] = { true, &dbs[servdb] },
216 [GETSERVBYPORT] = { true, &dbs[servdb] },
217 [GETFDSERV] = { false, &dbs[servdb] }
218 };
219
220
221 /* Initial number of threads to use. */
222 int nthreads = -1;
223 /* Maximum number of threads to use. */
224 int max_nthreads = 32;
225
226 /* Socket for incoming connections. */
227 static int sock;
228
229 #ifdef HAVE_INOTIFY
230 /* Inotify descriptor. */
231 static int inotify_fd = -1;
232
233 /* Watch descriptor for resolver configuration file. */
234 static int resolv_conf_descr = -1;
235 #endif
236
237 #ifndef __ASSUME_SOCK_CLOEXEC
238 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
239 before be know the result. */
240 static int have_sock_cloexec;
241 /* The paccept syscall was introduced at the same time as SOCK_CLOEXEC. */
242 # define have_paccept -1 // XXX For the time being there is no such call
243 #endif
244
245 /* Number of times clients had to wait. */
246 unsigned long int client_queued;
247
248 /* Data structure for recording in-flight memory allocation. */
249 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
250 /* Global list of the mem_in_flight variables of all the threads. */
251 struct mem_in_flight *mem_in_flight_list;
252
253
254 ssize_t
255 writeall (int fd, const void *buf, size_t len)
256 {
257 size_t n = len;
258 ssize_t ret;
259 do
260 {
261 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
262 if (ret <= 0)
263 break;
264 buf = (const char *) buf + ret;
265 n -= ret;
266 }
267 while (n > 0);
268 return ret < 0 ? ret : len - n;
269 }
270
271
272 #ifdef HAVE_SENDFILE
273 ssize_t
274 sendfileall (int tofd, int fromfd, off_t off, size_t len)
275 {
276 ssize_t n = len;
277 ssize_t ret;
278
279 do
280 {
281 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
282 if (ret <= 0)
283 break;
284 n -= ret;
285 }
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
288 }
289 #endif
290
291
292 enum usekey
293 {
294 use_not = 0,
295 /* The following three are not really used, they are symbolic constants. */
296 use_first = 16,
297 use_begin = 32,
298 use_end = 64,
299
300 use_he = 1,
301 use_he_begin = use_he | use_begin,
302 use_he_end = use_he | use_end,
303 #if SEPARATE_KEY
304 use_key = 2,
305 use_key_begin = use_key | use_begin,
306 use_key_end = use_key | use_end,
307 use_key_first = use_key_begin | use_first,
308 #endif
309 use_data = 3,
310 use_data_begin = use_data | use_begin,
311 use_data_end = use_data | use_end,
312 use_data_first = use_data_begin | use_first
313 };
314
315
316 static int
317 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
318 enum usekey use, ref_t start, size_t len)
319 {
320 assert (len >= 2);
321
322 if (start > first_free || start + len > first_free
323 || (start & BLOCK_ALIGN_M1))
324 return 0;
325
326 if (usemap[start] == use_not)
327 {
328 /* Add the start marker. */
329 usemap[start] = use | use_begin;
330 use &= ~use_first;
331
332 while (--len > 0)
333 if (usemap[++start] != use_not)
334 return 0;
335 else
336 usemap[start] = use;
337
338 /* Add the end marker. */
339 usemap[start] = use | use_end;
340 }
341 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
342 {
343 /* Hash entries can't be shared. */
344 if (use == use_he)
345 return 0;
346
347 usemap[start] |= (use & use_first);
348 use &= ~use_first;
349
350 while (--len > 1)
351 if (usemap[++start] != use)
352 return 0;
353
354 if (usemap[++start] != (use | use_end))
355 return 0;
356 }
357 else
358 /* Points to a wrong object or somewhere in the middle. */
359 return 0;
360
361 return 1;
362 }
363
364
365 /* Verify data in persistent database. */
366 static int
367 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
368 {
369 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
370
371 time_t now = time (NULL);
372
373 struct database_pers_head *head = mem;
374 struct database_pers_head head_copy = *head;
375
376 /* Check that the header that was read matches the head in the database. */
377 if (memcmp (head, readhead, sizeof (*head)) != 0)
378 return 0;
379
380 /* First some easy tests: make sure the database header is sane. */
381 if (head->version != DB_VERSION
382 || head->header_size != sizeof (*head)
383 /* We allow a timestamp to be one hour ahead of the current time.
384 This should cover daylight saving time changes. */
385 || head->timestamp > now + 60 * 60 + 60
386 || (head->gc_cycle & 1)
387 || head->module == 0
388 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
389 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
390 || head->first_free < 0
391 || head->first_free > head->data_size
392 || (head->first_free & BLOCK_ALIGN_M1) != 0
393 || head->maxnentries < 0
394 || head->maxnsearched < 0)
395 return 0;
396
397 uint8_t *usemap = calloc (head->first_free, 1);
398 if (usemap == NULL)
399 return 0;
400
401 const char *data = (char *) &head->array[roundup (head->module,
402 ALIGN / sizeof (ref_t))];
403
404 nscd_ssize_t he_cnt = 0;
405 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
406 {
407 ref_t trail = head->array[cnt];
408 ref_t work = trail;
409 int tick = 0;
410
411 while (work != ENDREF)
412 {
413 if (! check_use (data, head->first_free, usemap, use_he, work,
414 sizeof (struct hashentry)))
415 goto fail;
416
417 /* Now we know we can dereference the record. */
418 struct hashentry *here = (struct hashentry *) (data + work);
419
420 ++he_cnt;
421
422 /* Make sure the record is for this type of service. */
423 if (here->type >= LASTREQ
424 || reqinfo[here->type].db != &dbs[dbnr])
425 goto fail;
426
427 /* Validate boolean field value. */
428 if (here->first != false && here->first != true)
429 goto fail;
430
431 if (here->len < 0)
432 goto fail;
433
434 /* Now the data. */
435 if (here->packet < 0
436 || here->packet > head->first_free
437 || here->packet + sizeof (struct datahead) > head->first_free)
438 goto fail;
439
440 struct datahead *dh = (struct datahead *) (data + here->packet);
441
442 if (! check_use (data, head->first_free, usemap,
443 use_data | (here->first ? use_first : 0),
444 here->packet, dh->allocsize))
445 goto fail;
446
447 if (dh->allocsize < sizeof (struct datahead)
448 || dh->recsize > dh->allocsize
449 || (dh->notfound != false && dh->notfound != true)
450 || (dh->usable != false && dh->usable != true))
451 goto fail;
452
453 if (here->key < here->packet + sizeof (struct datahead)
454 || here->key > here->packet + dh->allocsize
455 || here->key + here->len > here->packet + dh->allocsize)
456 {
457 #if SEPARATE_KEY
458 /* If keys can appear outside of data, this should be done
459 instead. But gc doesn't mark the data in that case. */
460 if (! check_use (data, head->first_free, usemap,
461 use_key | (here->first ? use_first : 0),
462 here->key, here->len))
463 #endif
464 goto fail;
465 }
466
467 work = here->next;
468
469 if (work == trail)
470 /* A circular list, this must not happen. */
471 goto fail;
472 if (tick)
473 trail = ((struct hashentry *) (data + trail))->next;
474 tick = 1 - tick;
475 }
476 }
477
478 if (he_cnt != head->nentries)
479 goto fail;
480
481 /* See if all data and keys had at least one reference from
482 he->first == true hashentry. */
483 for (ref_t idx = 0; idx < head->first_free; ++idx)
484 {
485 #if SEPARATE_KEY
486 if (usemap[idx] == use_key_begin)
487 goto fail;
488 #endif
489 if (usemap[idx] == use_data_begin)
490 goto fail;
491 }
492
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
495 goto fail;
496
497 free (usemap);
498 return 1;
499
500 fail:
501 free (usemap);
502 return 0;
503 }
504
505
506 #ifdef O_CLOEXEC
507 # define EXTRA_O_FLAGS O_CLOEXEC
508 #else
509 # define EXTRA_O_FLAGS 0
510 #endif
511
512
513 /* Initialize database information structures. */
514 void
515 nscd_init (void)
516 {
517 /* Look up unprivileged uid/gid/groups before we start listening on the
518 socket */
519 if (server_user != NULL)
520 begin_drop_privileges ();
521
522 if (nthreads == -1)
523 /* No configuration for this value, assume a default. */
524 nthreads = 4;
525
526 #ifdef HAVE_INOTIFY
527 /* Use inotify to recognize changed files. */
528 inotify_fd = inotify_init1 (IN_NONBLOCK);
529 # ifndef __ASSUME_IN_NONBLOCK
530 if (inotify_fd == -1 && errno == ENOSYS)
531 {
532 inotify_fd = inotify_init ();
533 if (inotify_fd != -1)
534 fcntl (inotify_fd, F_SETFL, O_RDONLY | O_NONBLOCK);
535 }
536 # endif
537 #endif
538
539 for (size_t cnt = 0; cnt < lastdb; ++cnt)
540 if (dbs[cnt].enabled)
541 {
542 pthread_rwlock_init (&dbs[cnt].lock, NULL);
543 pthread_mutex_init (&dbs[cnt].memlock, NULL);
544
545 if (dbs[cnt].persistent)
546 {
547 /* Try to open the appropriate file on disk. */
548 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
549 if (fd != -1)
550 {
551 char *msg = NULL;
552 struct stat64 st;
553 void *mem;
554 size_t total;
555 struct database_pers_head head;
556 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
557 sizeof (head)));
558 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
559 {
560 fail_db_errno:
561 /* The code is single-threaded at this point so
562 using strerror is just fine. */
563 msg = strerror (errno);
564 fail_db:
565 dbg_log (_("invalid persistent database file \"%s\": %s"),
566 dbs[cnt].db_filename, msg);
567 unlink (dbs[cnt].db_filename);
568 }
569 else if (head.module == 0 && head.data_size == 0)
570 {
571 /* The file has been created, but the head has not
572 been initialized yet. */
573 msg = _("uninitialized header");
574 goto fail_db;
575 }
576 else if (head.header_size != (int) sizeof (head))
577 {
578 msg = _("header size does not match");
579 goto fail_db;
580 }
581 else if ((total = (sizeof (head)
582 + roundup (head.module * sizeof (ref_t),
583 ALIGN)
584 + head.data_size))
585 > st.st_size
586 || total < sizeof (head))
587 {
588 msg = _("file size does not match");
589 goto fail_db;
590 }
591 /* Note we map with the maximum size allowed for the
592 database. This is likely much larger than the
593 actual file size. This is OK on most OSes since
594 extensions of the underlying file will
595 automatically translate more pages available for
596 memory access. */
597 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
598 PROT_READ | PROT_WRITE,
599 MAP_SHARED, fd, 0))
600 == MAP_FAILED)
601 goto fail_db_errno;
602 else if (!verify_persistent_db (mem, &head, cnt))
603 {
604 munmap (mem, total);
605 msg = _("verification failed");
606 goto fail_db;
607 }
608 else
609 {
610 /* Success. We have the database. */
611 dbs[cnt].head = mem;
612 dbs[cnt].memsize = total;
613 dbs[cnt].data = (char *)
614 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
615 ALIGN / sizeof (ref_t))];
616 dbs[cnt].mmap_used = true;
617
618 if (dbs[cnt].suggested_module > head.module)
619 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
620 dbnames[cnt]);
621
622 dbs[cnt].wr_fd = fd;
623 fd = -1;
624 /* We also need a read-only descriptor. */
625 if (dbs[cnt].shared)
626 {
627 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
628 O_RDONLY | EXTRA_O_FLAGS);
629 if (dbs[cnt].ro_fd == -1)
630 dbg_log (_("\
631 cannot create read-only descriptor for \"%s\"; no mmap"),
632 dbs[cnt].db_filename);
633 }
634
635 // XXX Shall we test whether the descriptors actually
636 // XXX point to the same file?
637 }
638
639 /* Close the file descriptors in case something went
640 wrong in which case the variable have not been
641 assigned -1. */
642 if (fd != -1)
643 close (fd);
644 }
645 }
646
647 if (dbs[cnt].head == NULL)
648 {
649 /* No database loaded. Allocate the data structure,
650 possibly on disk. */
651 struct database_pers_head head;
652 size_t total = (sizeof (head)
653 + roundup (dbs[cnt].suggested_module
654 * sizeof (ref_t), ALIGN)
655 + (dbs[cnt].suggested_module
656 * DEFAULT_DATASIZE_PER_BUCKET));
657
658 /* Try to create the database. If we do not need a
659 persistent database create a temporary file. */
660 int fd;
661 int ro_fd = -1;
662 if (dbs[cnt].persistent)
663 {
664 fd = open (dbs[cnt].db_filename,
665 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
666 S_IRUSR | S_IWUSR);
667 if (fd != -1 && dbs[cnt].shared)
668 ro_fd = open (dbs[cnt].db_filename,
669 O_RDONLY | EXTRA_O_FLAGS);
670 }
671 else
672 {
673 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
674 fd = mkostemp (fname, EXTRA_O_FLAGS);
675
676 /* We do not need the file name anymore after we
677 opened another file descriptor in read-only mode. */
678 if (fd != -1)
679 {
680 if (dbs[cnt].shared)
681 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
682
683 unlink (fname);
684 }
685 }
686
687 if (fd == -1)
688 {
689 if (errno == EEXIST)
690 {
691 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
692 dbnames[cnt], dbs[cnt].db_filename);
693 // XXX Correct way to terminate?
694 exit (1);
695 }
696
697 if (dbs[cnt].persistent)
698 dbg_log (_("cannot create %s; no persistent database used"),
699 dbs[cnt].db_filename);
700 else
701 dbg_log (_("cannot create %s; no sharing possible"),
702 dbs[cnt].db_filename);
703
704 dbs[cnt].persistent = 0;
705 // XXX remember: no mmap
706 }
707 else
708 {
709 /* Tell the user if we could not create the read-only
710 descriptor. */
711 if (ro_fd == -1 && dbs[cnt].shared)
712 dbg_log (_("\
713 cannot create read-only descriptor for \"%s\"; no mmap"),
714 dbs[cnt].db_filename);
715
716 /* Before we create the header, initialiye the hash
717 table. So that if we get interrupted if writing
718 the header we can recognize a partially initialized
719 database. */
720 size_t ps = sysconf (_SC_PAGESIZE);
721 char tmpbuf[ps];
722 assert (~ENDREF == 0);
723 memset (tmpbuf, '\xff', ps);
724
725 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
726 off_t offset = sizeof (head);
727
728 size_t towrite;
729 if (offset % ps != 0)
730 {
731 towrite = MIN (remaining, ps - (offset % ps));
732 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
733 goto write_fail;
734 offset += towrite;
735 remaining -= towrite;
736 }
737
738 while (remaining > ps)
739 {
740 if (pwrite (fd, tmpbuf, ps, offset) == -1)
741 goto write_fail;
742 offset += ps;
743 remaining -= ps;
744 }
745
746 if (remaining > 0
747 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
748 goto write_fail;
749
750 /* Create the header of the file. */
751 struct database_pers_head head =
752 {
753 .version = DB_VERSION,
754 .header_size = sizeof (head),
755 .module = dbs[cnt].suggested_module,
756 .data_size = (dbs[cnt].suggested_module
757 * DEFAULT_DATASIZE_PER_BUCKET),
758 .first_free = 0
759 };
760 void *mem;
761
762 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
763 != sizeof (head))
764 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
765 != 0)
766 || (mem = mmap (NULL, dbs[cnt].max_db_size,
767 PROT_READ | PROT_WRITE,
768 MAP_SHARED, fd, 0)) == MAP_FAILED)
769 {
770 write_fail:
771 unlink (dbs[cnt].db_filename);
772 dbg_log (_("cannot write to database file %s: %s"),
773 dbs[cnt].db_filename, strerror (errno));
774 dbs[cnt].persistent = 0;
775 }
776 else
777 {
778 /* Success. */
779 dbs[cnt].head = mem;
780 dbs[cnt].data = (char *)
781 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
782 ALIGN / sizeof (ref_t))];
783 dbs[cnt].memsize = total;
784 dbs[cnt].mmap_used = true;
785
786 /* Remember the descriptors. */
787 dbs[cnt].wr_fd = fd;
788 dbs[cnt].ro_fd = ro_fd;
789 fd = -1;
790 ro_fd = -1;
791 }
792
793 if (fd != -1)
794 close (fd);
795 if (ro_fd != -1)
796 close (ro_fd);
797 }
798 }
799
800 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
801 /* We do not check here whether the O_CLOEXEC provided to the
802 open call was successful or not. The two fcntl calls are
803 only performed once each per process start-up and therefore
804 is not noticeable at all. */
805 if (paranoia
806 && ((dbs[cnt].wr_fd != -1
807 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
808 || (dbs[cnt].ro_fd != -1
809 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
810 {
811 dbg_log (_("\
812 cannot set socket to close on exec: %s; disabling paranoia mode"),
813 strerror (errno));
814 paranoia = 0;
815 }
816 #endif
817
818 if (dbs[cnt].head == NULL)
819 {
820 /* We do not use the persistent database. Just
821 create an in-memory data structure. */
822 assert (! dbs[cnt].persistent);
823
824 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
825 + (dbs[cnt].suggested_module
826 * sizeof (ref_t)));
827 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
828 assert (~ENDREF == 0);
829 memset (dbs[cnt].head->array, '\xff',
830 dbs[cnt].suggested_module * sizeof (ref_t));
831 dbs[cnt].head->module = dbs[cnt].suggested_module;
832 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
833 * dbs[cnt].head->module);
834 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
835 dbs[cnt].head->first_free = 0;
836
837 dbs[cnt].shared = 0;
838 assert (dbs[cnt].ro_fd == -1);
839 }
840
841 dbs[cnt].inotify_descr = -1;
842 if (dbs[cnt].check_file)
843 {
844 #ifdef HAVE_INOTIFY
845 if (inotify_fd < 0
846 || (dbs[cnt].inotify_descr
847 = inotify_add_watch (inotify_fd, dbs[cnt].filename,
848 IN_DELETE_SELF | IN_MODIFY)) < 0)
849 /* We cannot notice changes in the main thread. */
850 #endif
851 {
852 /* We need the modification date of the file. */
853 struct stat64 st;
854
855 if (stat64 (dbs[cnt].filename, &st) < 0)
856 {
857 /* We cannot stat() the file, disable file checking. */
858 dbg_log (_("cannot stat() file `%s': %s"),
859 dbs[cnt].filename, strerror (errno));
860 dbs[cnt].check_file = 0;
861 }
862 else
863 dbs[cnt].file_mtime = st.st_mtime;
864 }
865 }
866
867 #ifdef HAVE_INOTIFY
868 if (cnt == hstdb && inotify_fd >= -1)
869 /* We also monitor the resolver configuration file. */
870 resolv_conf_descr = inotify_add_watch (inotify_fd,
871 _PATH_RESCONF,
872 IN_DELETE_SELF | IN_MODIFY);
873 #endif
874 }
875
876 /* Create the socket. */
877 #ifndef __ASSUME_SOCK_CLOEXEC
878 sock = -1;
879 if (have_sock_cloexec >= 0)
880 #endif
881 {
882 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
883 #ifndef __ASSUME_SOCK_CLOEXEC
884 if (have_sock_cloexec == 0)
885 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
886 #endif
887 }
888 #ifndef __ASSUME_SOCK_CLOEXEC
889 if (have_sock_cloexec < 0)
890 sock = socket (AF_UNIX, SOCK_STREAM, 0);
891 #endif
892 if (sock < 0)
893 {
894 dbg_log (_("cannot open socket: %s"), strerror (errno));
895 exit (errno == EACCES ? 4 : 1);
896 }
897 /* Bind a name to the socket. */
898 struct sockaddr_un sock_addr;
899 sock_addr.sun_family = AF_UNIX;
900 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
901 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
902 {
903 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
904 exit (errno == EACCES ? 4 : 1);
905 }
906
907 #ifndef __ASSUME_SOCK_CLOEXEC
908 if (have_sock_cloexec < 0)
909 {
910 /* We don't want to get stuck on accept. */
911 int fl = fcntl (sock, F_GETFL);
912 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
913 {
914 dbg_log (_("cannot change socket to nonblocking mode: %s"),
915 strerror (errno));
916 exit (1);
917 }
918
919 /* The descriptor needs to be closed on exec. */
920 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
921 {
922 dbg_log (_("cannot set socket to close on exec: %s"),
923 strerror (errno));
924 exit (1);
925 }
926 }
927 #endif
928
929 /* Set permissions for the socket. */
930 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
931
932 /* Set the socket up to accept connections. */
933 if (listen (sock, SOMAXCONN) < 0)
934 {
935 dbg_log (_("cannot enable socket to accept connections: %s"),
936 strerror (errno));
937 exit (1);
938 }
939
940 /* Change to unprivileged uid/gid/groups if specifed in config file */
941 if (server_user != NULL)
942 finish_drop_privileges ();
943 }
944
945
946 /* Close the connections. */
947 void
948 close_sockets (void)
949 {
950 close (sock);
951 }
952
953
954 static void
955 invalidate_cache (char *key, int fd)
956 {
957 dbtype number;
958 int32_t resp;
959
960 for (number = pwddb; number < lastdb; ++number)
961 if (strcmp (key, dbnames[number]) == 0)
962 {
963 if (dbs[number].reset_res)
964 res_init ();
965
966 break;
967 }
968
969 if (number == lastdb)
970 {
971 resp = EINVAL;
972 writeall (fd, &resp, sizeof (resp));
973 return;
974 }
975
976 if (dbs[number].enabled)
977 {
978 pthread_mutex_lock (&dbs[number].prune_lock);
979 prune_cache (&dbs[number], LONG_MAX, fd);
980 pthread_mutex_unlock (&dbs[number].prune_lock);
981 }
982 else
983 {
984 resp = 0;
985 writeall (fd, &resp, sizeof (resp));
986 }
987 }
988
989
990 #ifdef SCM_RIGHTS
991 static void
992 send_ro_fd (struct database_dyn *db, char *key, int fd)
993 {
994 /* If we do not have an read-only file descriptor do nothing. */
995 if (db->ro_fd == -1)
996 return;
997
998 /* We need to send some data along with the descriptor. */
999 uint64_t mapsize = (db->head->data_size
1000 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1001 + sizeof (struct database_pers_head));
1002 struct iovec iov[2];
1003 iov[0].iov_base = key;
1004 iov[0].iov_len = strlen (key) + 1;
1005 iov[1].iov_base = &mapsize;
1006 iov[1].iov_len = sizeof (mapsize);
1007
1008 /* Prepare the control message to transfer the descriptor. */
1009 union
1010 {
1011 struct cmsghdr hdr;
1012 char bytes[CMSG_SPACE (sizeof (int))];
1013 } buf;
1014 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1015 .msg_control = buf.bytes,
1016 .msg_controllen = sizeof (buf) };
1017 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1018
1019 cmsg->cmsg_level = SOL_SOCKET;
1020 cmsg->cmsg_type = SCM_RIGHTS;
1021 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1022
1023 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
1024
1025 msg.msg_controllen = cmsg->cmsg_len;
1026
1027 /* Send the control message. We repeat when we are interrupted but
1028 everything else is ignored. */
1029 #ifndef MSG_NOSIGNAL
1030 # define MSG_NOSIGNAL 0
1031 #endif
1032 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1033
1034 if (__builtin_expect (debug_level > 0, 0))
1035 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1036 }
1037 #endif /* SCM_RIGHTS */
1038
1039
1040 /* Handle new request. */
1041 static void
1042 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1043 {
1044 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1045 {
1046 if (debug_level > 0)
1047 dbg_log (_("\
1048 cannot handle old request version %d; current version is %d"),
1049 req->version, NSCD_VERSION);
1050 return;
1051 }
1052
1053 /* Perform the SELinux check before we go on to the standard checks. */
1054 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1055 {
1056 if (debug_level > 0)
1057 {
1058 #ifdef SO_PEERCRED
1059 # ifdef PATH_MAX
1060 char buf[PATH_MAX];
1061 # else
1062 char buf[4096];
1063 # endif
1064
1065 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1066 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1067
1068 if (n <= 0)
1069 dbg_log (_("\
1070 request from %ld not handled due to missing permission"), (long int) pid);
1071 else
1072 {
1073 buf[n] = '\0';
1074 dbg_log (_("\
1075 request from '%s' [%ld] not handled due to missing permission"),
1076 buf, (long int) pid);
1077 }
1078 #else
1079 dbg_log (_("request not handled due to missing permission"));
1080 #endif
1081 }
1082 return;
1083 }
1084
1085 struct database_dyn *db = reqinfo[req->type].db;
1086
1087 /* See whether we can service the request from the cache. */
1088 if (__builtin_expect (reqinfo[req->type].data_request, true))
1089 {
1090 if (__builtin_expect (debug_level, 0) > 0)
1091 {
1092 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1093 {
1094 char buf[INET6_ADDRSTRLEN];
1095
1096 dbg_log ("\t%s (%s)", serv2str[req->type],
1097 inet_ntop (req->type == GETHOSTBYADDR
1098 ? AF_INET : AF_INET6,
1099 key, buf, sizeof (buf)));
1100 }
1101 else
1102 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1103 }
1104
1105 /* Is this service enabled? */
1106 if (__builtin_expect (!db->enabled, 0))
1107 {
1108 /* No, sent the prepared record. */
1109 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1110 db->disabled_iov->iov_len,
1111 MSG_NOSIGNAL))
1112 != (ssize_t) db->disabled_iov->iov_len
1113 && __builtin_expect (debug_level, 0) > 0)
1114 {
1115 /* We have problems sending the result. */
1116 char buf[256];
1117 dbg_log (_("cannot write result: %s"),
1118 strerror_r (errno, buf, sizeof (buf)));
1119 }
1120
1121 return;
1122 }
1123
1124 /* Be sure we can read the data. */
1125 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1126 {
1127 ++db->head->rdlockdelayed;
1128 pthread_rwlock_rdlock (&db->lock);
1129 }
1130
1131 /* See whether we can handle it from the cache. */
1132 struct datahead *cached;
1133 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1134 db, uid);
1135 if (cached != NULL)
1136 {
1137 /* Hurray it's in the cache. */
1138 ssize_t nwritten;
1139
1140 #ifdef HAVE_SENDFILE
1141 if (__builtin_expect (db->mmap_used, 1))
1142 {
1143 assert (db->wr_fd != -1);
1144 assert ((char *) cached->data > (char *) db->data);
1145 assert ((char *) cached->data - (char *) db->head
1146 + cached->recsize
1147 <= (sizeof (struct database_pers_head)
1148 + db->head->module * sizeof (ref_t)
1149 + db->head->data_size));
1150 nwritten = sendfileall (fd, db->wr_fd,
1151 (char *) cached->data
1152 - (char *) db->head, cached->recsize);
1153 # ifndef __ASSUME_SENDFILE
1154 if (nwritten == -1 && errno == ENOSYS)
1155 goto use_write;
1156 # endif
1157 }
1158 else
1159 # ifndef __ASSUME_SENDFILE
1160 use_write:
1161 # endif
1162 #endif
1163 nwritten = writeall (fd, cached->data, cached->recsize);
1164
1165 if (nwritten != cached->recsize
1166 && __builtin_expect (debug_level, 0) > 0)
1167 {
1168 /* We have problems sending the result. */
1169 char buf[256];
1170 dbg_log (_("cannot write result: %s"),
1171 strerror_r (errno, buf, sizeof (buf)));
1172 }
1173
1174 pthread_rwlock_unlock (&db->lock);
1175
1176 return;
1177 }
1178
1179 pthread_rwlock_unlock (&db->lock);
1180 }
1181 else if (__builtin_expect (debug_level, 0) > 0)
1182 {
1183 if (req->type == INVALIDATE)
1184 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1185 else
1186 dbg_log ("\t%s", serv2str[req->type]);
1187 }
1188
1189 /* Handle the request. */
1190 switch (req->type)
1191 {
1192 case GETPWBYNAME:
1193 addpwbyname (db, fd, req, key, uid);
1194 break;
1195
1196 case GETPWBYUID:
1197 addpwbyuid (db, fd, req, key, uid);
1198 break;
1199
1200 case GETGRBYNAME:
1201 addgrbyname (db, fd, req, key, uid);
1202 break;
1203
1204 case GETGRBYGID:
1205 addgrbygid (db, fd, req, key, uid);
1206 break;
1207
1208 case GETHOSTBYNAME:
1209 addhstbyname (db, fd, req, key, uid);
1210 break;
1211
1212 case GETHOSTBYNAMEv6:
1213 addhstbynamev6 (db, fd, req, key, uid);
1214 break;
1215
1216 case GETHOSTBYADDR:
1217 addhstbyaddr (db, fd, req, key, uid);
1218 break;
1219
1220 case GETHOSTBYADDRv6:
1221 addhstbyaddrv6 (db, fd, req, key, uid);
1222 break;
1223
1224 case GETAI:
1225 addhstai (db, fd, req, key, uid);
1226 break;
1227
1228 case INITGROUPS:
1229 addinitgroups (db, fd, req, key, uid);
1230 break;
1231
1232 case GETSERVBYNAME:
1233 addservbyname (db, fd, req, key, uid);
1234 break;
1235
1236 case GETSERVBYPORT:
1237 addservbyport (db, fd, req, key, uid);
1238 break;
1239
1240 case GETSTAT:
1241 case SHUTDOWN:
1242 case INVALIDATE:
1243 {
1244 /* Get the callers credentials. */
1245 #ifdef SO_PEERCRED
1246 struct ucred caller;
1247 socklen_t optlen = sizeof (caller);
1248
1249 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1250 {
1251 char buf[256];
1252
1253 dbg_log (_("error getting caller's id: %s"),
1254 strerror_r (errno, buf, sizeof (buf)));
1255 break;
1256 }
1257
1258 uid = caller.uid;
1259 #else
1260 /* Some systems have no SO_PEERCRED implementation. They don't
1261 care about security so we don't as well. */
1262 uid = 0;
1263 #endif
1264 }
1265
1266 /* Accept shutdown, getstat and invalidate only from root. For
1267 the stat call also allow the user specified in the config file. */
1268 if (req->type == GETSTAT)
1269 {
1270 if (uid == 0 || uid == stat_uid)
1271 send_stats (fd, dbs);
1272 }
1273 else if (uid == 0)
1274 {
1275 if (req->type == INVALIDATE)
1276 invalidate_cache (key, fd);
1277 else
1278 termination_handler (0);
1279 }
1280 break;
1281
1282 case GETFDPW:
1283 case GETFDGR:
1284 case GETFDHST:
1285 case GETFDSERV:
1286 #ifdef SCM_RIGHTS
1287 send_ro_fd (reqinfo[req->type].db, key, fd);
1288 #endif
1289 break;
1290
1291 default:
1292 /* Ignore the command, it's nothing we know. */
1293 break;
1294 }
1295 }
1296
1297
1298 /* Restart the process. */
1299 static void
1300 restart (void)
1301 {
1302 /* First determine the parameters. We do not use the parameters
1303 passed to main() since in case nscd is started by running the
1304 dynamic linker this will not work. Yes, this is not the usual
1305 case but nscd is part of glibc and we occasionally do this. */
1306 size_t buflen = 1024;
1307 char *buf = alloca (buflen);
1308 size_t readlen = 0;
1309 int fd = open ("/proc/self/cmdline", O_RDONLY);
1310 if (fd == -1)
1311 {
1312 dbg_log (_("\
1313 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1314 strerror (errno));
1315
1316 paranoia = 0;
1317 return;
1318 }
1319
1320 while (1)
1321 {
1322 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1323 buflen - readlen));
1324 if (n == -1)
1325 {
1326 dbg_log (_("\
1327 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1328 strerror (errno));
1329
1330 close (fd);
1331 paranoia = 0;
1332 return;
1333 }
1334
1335 readlen += n;
1336
1337 if (readlen < buflen)
1338 break;
1339
1340 /* We might have to extend the buffer. */
1341 size_t old_buflen = buflen;
1342 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1343 buf = memmove (newp, buf, old_buflen);
1344 }
1345
1346 close (fd);
1347
1348 /* Parse the command line. Worst case scenario: every two
1349 characters form one parameter (one character plus NUL). */
1350 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1351 int argc = 0;
1352
1353 char *cp = buf;
1354 while (cp < buf + readlen)
1355 {
1356 argv[argc++] = cp;
1357 cp = (char *) rawmemchr (cp, '\0') + 1;
1358 }
1359 argv[argc] = NULL;
1360
1361 /* Second, change back to the old user if we changed it. */
1362 if (server_user != NULL)
1363 {
1364 if (setresuid (old_uid, old_uid, old_uid) != 0)
1365 {
1366 dbg_log (_("\
1367 cannot change to old UID: %s; disabling paranoia mode"),
1368 strerror (errno));
1369
1370 paranoia = 0;
1371 return;
1372 }
1373
1374 if (setresgid (old_gid, old_gid, old_gid) != 0)
1375 {
1376 dbg_log (_("\
1377 cannot change to old GID: %s; disabling paranoia mode"),
1378 strerror (errno));
1379
1380 setuid (server_uid);
1381 paranoia = 0;
1382 return;
1383 }
1384 }
1385
1386 /* Next change back to the old working directory. */
1387 if (chdir (oldcwd) == -1)
1388 {
1389 dbg_log (_("\
1390 cannot change to old working directory: %s; disabling paranoia mode"),
1391 strerror (errno));
1392
1393 if (server_user != NULL)
1394 {
1395 setuid (server_uid);
1396 setgid (server_gid);
1397 }
1398 paranoia = 0;
1399 return;
1400 }
1401
1402 /* Synchronize memory. */
1403 int32_t certainly[lastdb];
1404 for (int cnt = 0; cnt < lastdb; ++cnt)
1405 if (dbs[cnt].enabled)
1406 {
1407 /* Make sure nobody keeps using the database. */
1408 dbs[cnt].head->timestamp = 0;
1409 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1410 dbs[cnt].head->nscd_certainly_running = 0;
1411
1412 if (dbs[cnt].persistent)
1413 // XXX async OK?
1414 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1415 }
1416
1417 /* The preparations are done. */
1418 execv ("/proc/self/exe", argv);
1419
1420 /* If we come here, we will never be able to re-exec. */
1421 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1422 strerror (errno));
1423
1424 if (server_user != NULL)
1425 {
1426 setuid (server_uid);
1427 setgid (server_gid);
1428 }
1429 if (chdir ("/") != 0)
1430 dbg_log (_("cannot change current working directory to \"/\": %s"),
1431 strerror (errno));
1432 paranoia = 0;
1433
1434 /* Reenable the databases. */
1435 time_t now = time (NULL);
1436 for (int cnt = 0; cnt < lastdb; ++cnt)
1437 if (dbs[cnt].enabled)
1438 {
1439 dbs[cnt].head->timestamp = now;
1440 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1441 }
1442 }
1443
1444
1445 /* List of file descriptors. */
1446 struct fdlist
1447 {
1448 int fd;
1449 struct fdlist *next;
1450 };
1451 /* Memory allocated for the list. */
1452 static struct fdlist *fdlist;
1453 /* List of currently ready-to-read file descriptors. */
1454 static struct fdlist *readylist;
1455
1456 /* Conditional variable and mutex to signal availability of entries in
1457 READYLIST. The condvar is initialized dynamically since we might
1458 use a different clock depending on availability. */
1459 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1460 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1461
1462 /* The clock to use with the condvar. */
1463 static clockid_t timeout_clock = CLOCK_REALTIME;
1464
1465 /* Number of threads ready to handle the READYLIST. */
1466 static unsigned long int nready;
1467
1468
1469 /* Function for the clean-up threads. */
1470 static void *
1471 __attribute__ ((__noreturn__))
1472 nscd_run_prune (void *p)
1473 {
1474 const long int my_number = (long int) p;
1475 assert (dbs[my_number].enabled);
1476
1477 int dont_need_update = setup_thread (&dbs[my_number]);
1478
1479 time_t now = time (NULL);
1480
1481 /* We are running. */
1482 dbs[my_number].head->timestamp = now;
1483
1484 struct timespec prune_ts;
1485 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1486 /* Should never happen. */
1487 abort ();
1488
1489 /* Compute the initial timeout time. Prevent all the timers to go
1490 off at the same time by adding a db-based value. */
1491 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1492 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1493
1494 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1495 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1496
1497 pthread_mutex_lock (prune_lock);
1498 while (1)
1499 {
1500 /* Wait, but not forever. */
1501 int e = 0;
1502 if (! dbs[my_number].clear_cache)
1503 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1504 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1505
1506 time_t next_wait;
1507 now = time (NULL);
1508 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1509 || dbs[my_number].clear_cache)
1510 {
1511 /* We will determine the new timout values based on the
1512 cache content. Should there be concurrent additions to
1513 the cache which are not accounted for in the cache
1514 pruning we want to know about it. Therefore set the
1515 timeout to the maximum. It will be descreased when adding
1516 new entries to the cache, if necessary. */
1517 if (sizeof (time_t) == sizeof (long int))
1518 dbs[my_number].wakeup_time = LONG_MAX;
1519 else
1520 dbs[my_number].wakeup_time = INT_MAX;
1521
1522 /* Unconditionally reset the flag. */
1523 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1524 dbs[my_number].clear_cache = 0;
1525
1526 pthread_mutex_unlock (prune_lock);
1527
1528 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1529
1530 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1531 /* If clients cannot determine for sure whether nscd is running
1532 we need to wake up occasionally to update the timestamp.
1533 Wait 90% of the update period. */
1534 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1535 if (__builtin_expect (! dont_need_update, 0))
1536 {
1537 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1538 dbs[my_number].head->timestamp = now;
1539 }
1540
1541 pthread_mutex_lock (prune_lock);
1542
1543 /* Make it known when we will wake up again. */
1544 if (now + next_wait < dbs[my_number].wakeup_time)
1545 dbs[my_number].wakeup_time = now + next_wait;
1546 else
1547 next_wait = dbs[my_number].wakeup_time - now;
1548 }
1549 else
1550 /* The cache was just pruned. Do not do it again now. Just
1551 use the new timeout value. */
1552 next_wait = dbs[my_number].wakeup_time - now;
1553
1554 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1555 /* Should never happen. */
1556 abort ();
1557
1558 /* Compute next timeout time. */
1559 prune_ts.tv_sec += next_wait;
1560 }
1561 }
1562
1563
1564 /* This is the main loop. It is replicated in different threads but
1565 the the use of the ready list makes sure only one thread handles an
1566 incoming connection. */
1567 static void *
1568 __attribute__ ((__noreturn__))
1569 nscd_run_worker (void *p)
1570 {
1571 char buf[256];
1572
1573 /* Initialize the memory-in-flight list. */
1574 for (enum in_flight idx = 0; idx < IDX_last; ++idx)
1575 mem_in_flight.block[idx].dbidx = -1;
1576 /* And queue this threads structure. */
1577 do
1578 mem_in_flight.next = mem_in_flight_list;
1579 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
1580 &mem_in_flight,
1581 mem_in_flight.next) != 0);
1582
1583 /* Initial locking. */
1584 pthread_mutex_lock (&readylist_lock);
1585
1586 /* One more thread available. */
1587 ++nready;
1588
1589 while (1)
1590 {
1591 while (readylist == NULL)
1592 pthread_cond_wait (&readylist_cond, &readylist_lock);
1593
1594 struct fdlist *it = readylist->next;
1595 if (readylist->next == readylist)
1596 /* Just one entry on the list. */
1597 readylist = NULL;
1598 else
1599 readylist->next = it->next;
1600
1601 /* Extract the information and mark the record ready to be used
1602 again. */
1603 int fd = it->fd;
1604 it->next = NULL;
1605
1606 /* One more thread available. */
1607 --nready;
1608
1609 /* We are done with the list. */
1610 pthread_mutex_unlock (&readylist_lock);
1611
1612 #ifndef __ASSUME_SOCK_CLOEXEC
1613 if (have_sock_cloexec < 0)
1614 {
1615 /* We do not want to block on a short read or so. */
1616 int fl = fcntl (fd, F_GETFL);
1617 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1618 goto close_and_out;
1619 }
1620 #endif
1621
1622 /* Now read the request. */
1623 request_header req;
1624 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1625 != sizeof (req), 0))
1626 {
1627 /* We failed to read data. Note that this also might mean we
1628 failed because we would have blocked. */
1629 if (debug_level > 0)
1630 dbg_log (_("short read while reading request: %s"),
1631 strerror_r (errno, buf, sizeof (buf)));
1632 goto close_and_out;
1633 }
1634
1635 /* Check whether this is a valid request type. */
1636 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1637 goto close_and_out;
1638
1639 /* Some systems have no SO_PEERCRED implementation. They don't
1640 care about security so we don't as well. */
1641 uid_t uid = -1;
1642 #ifdef SO_PEERCRED
1643 pid_t pid = 0;
1644
1645 if (__builtin_expect (debug_level > 0, 0))
1646 {
1647 struct ucred caller;
1648 socklen_t optlen = sizeof (caller);
1649
1650 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1651 pid = caller.pid;
1652 }
1653 #else
1654 const pid_t pid = 0;
1655 #endif
1656
1657 /* It should not be possible to crash the nscd with a silly
1658 request (i.e., a terribly large key). We limit the size to 1kb. */
1659 if (__builtin_expect (req.key_len, 1) < 0
1660 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1661 {
1662 if (debug_level > 0)
1663 dbg_log (_("key length in request too long: %d"), req.key_len);
1664 }
1665 else
1666 {
1667 /* Get the key. */
1668 char keybuf[MAXKEYLEN];
1669
1670 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1671 req.key_len))
1672 != req.key_len, 0))
1673 {
1674 /* Again, this can also mean we would have blocked. */
1675 if (debug_level > 0)
1676 dbg_log (_("short read while reading request key: %s"),
1677 strerror_r (errno, buf, sizeof (buf)));
1678 goto close_and_out;
1679 }
1680
1681 if (__builtin_expect (debug_level, 0) > 0)
1682 {
1683 #ifdef SO_PEERCRED
1684 if (pid != 0)
1685 dbg_log (_("\
1686 handle_request: request received (Version = %d) from PID %ld"),
1687 req.version, (long int) pid);
1688 else
1689 #endif
1690 dbg_log (_("\
1691 handle_request: request received (Version = %d)"), req.version);
1692 }
1693
1694 /* Phew, we got all the data, now process it. */
1695 handle_request (fd, &req, keybuf, uid, pid);
1696 }
1697
1698 close_and_out:
1699 /* We are done. */
1700 close (fd);
1701
1702 /* Re-locking. */
1703 pthread_mutex_lock (&readylist_lock);
1704
1705 /* One more thread available. */
1706 ++nready;
1707 }
1708 }
1709
1710
1711 static unsigned int nconns;
1712
1713 static void
1714 fd_ready (int fd)
1715 {
1716 pthread_mutex_lock (&readylist_lock);
1717
1718 /* Find an empty entry in FDLIST. */
1719 size_t inner;
1720 for (inner = 0; inner < nconns; ++inner)
1721 if (fdlist[inner].next == NULL)
1722 break;
1723 assert (inner < nconns);
1724
1725 fdlist[inner].fd = fd;
1726
1727 if (readylist == NULL)
1728 readylist = fdlist[inner].next = &fdlist[inner];
1729 else
1730 {
1731 fdlist[inner].next = readylist->next;
1732 readylist = readylist->next = &fdlist[inner];
1733 }
1734
1735 bool do_signal = true;
1736 if (__builtin_expect (nready == 0, 0))
1737 {
1738 ++client_queued;
1739 do_signal = false;
1740
1741 /* Try to start another thread to help out. */
1742 pthread_t th;
1743 if (nthreads < max_nthreads
1744 && pthread_create (&th, &attr, nscd_run_worker,
1745 (void *) (long int) nthreads) == 0)
1746 {
1747 /* We got another thread. */
1748 ++nthreads;
1749 /* The new thread might need a kick. */
1750 do_signal = true;
1751 }
1752
1753 }
1754
1755 pthread_mutex_unlock (&readylist_lock);
1756
1757 /* Tell one of the worker threads there is work to do. */
1758 if (do_signal)
1759 pthread_cond_signal (&readylist_cond);
1760 }
1761
1762
1763 /* Check whether restarting should happen. */
1764 static inline int
1765 restart_p (time_t now)
1766 {
1767 return (paranoia && readylist == NULL && nready == nthreads
1768 && now >= restart_time);
1769 }
1770
1771
1772 /* Array for times a connection was accepted. */
1773 static time_t *starttime;
1774
1775
1776 static void
1777 __attribute__ ((__noreturn__))
1778 main_loop_poll (void)
1779 {
1780 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1781 * sizeof (conns[0]));
1782
1783 conns[0].fd = sock;
1784 conns[0].events = POLLRDNORM;
1785 size_t nused = 1;
1786 size_t firstfree = 1;
1787
1788 #ifdef HAVE_INOTIFY
1789 if (inotify_fd != -1)
1790 {
1791 conns[1].fd = inotify_fd;
1792 conns[1].events = POLLRDNORM;
1793 nused = 2;
1794 firstfree = 2;
1795 }
1796 #endif
1797
1798 while (1)
1799 {
1800 /* Wait for any event. We wait at most a couple of seconds so
1801 that we can check whether we should close any of the accepted
1802 connections since we have not received a request. */
1803 #define MAX_ACCEPT_TIMEOUT 30
1804 #define MIN_ACCEPT_TIMEOUT 5
1805 #define MAIN_THREAD_TIMEOUT \
1806 (MAX_ACCEPT_TIMEOUT * 1000 \
1807 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1808
1809 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1810
1811 time_t now = time (NULL);
1812
1813 /* If there is a descriptor ready for reading or there is a new
1814 connection, process this now. */
1815 if (n > 0)
1816 {
1817 if (conns[0].revents != 0)
1818 {
1819 /* We have a new incoming connection. Accept the connection. */
1820 int fd;
1821
1822 #ifndef __ASSUME_PACCEPT
1823 fd = -1;
1824 if (have_paccept >= 0)
1825 #endif
1826 {
1827 fd = TEMP_FAILURE_RETRY (paccept (sock, NULL, NULL, NULL,
1828 SOCK_NONBLOCK));
1829 #ifndef __ASSUME_PACCEPT
1830 if (have_paccept == 0)
1831 have_paccept = fd != -1 || errno != ENOSYS ? 1 : -1;
1832 #endif
1833 }
1834 #ifndef __ASSUME_PACCEPT
1835 if (have_paccept < 0)
1836 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1837 #endif
1838
1839 /* Use the descriptor if we have not reached the limit. */
1840 if (fd >= 0)
1841 {
1842 if (firstfree < nconns)
1843 {
1844 conns[firstfree].fd = fd;
1845 conns[firstfree].events = POLLRDNORM;
1846 starttime[firstfree] = now;
1847 if (firstfree >= nused)
1848 nused = firstfree + 1;
1849
1850 do
1851 ++firstfree;
1852 while (firstfree < nused && conns[firstfree].fd != -1);
1853 }
1854 else
1855 /* We cannot use the connection so close it. */
1856 close (fd);
1857 }
1858
1859 --n;
1860 }
1861
1862 size_t first = 1;
1863 #ifdef HAVE_INOTIFY
1864 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1865 {
1866 if (conns[1].revents != 0)
1867 {
1868 bool to_clear[lastdb] = { false, };
1869 union
1870 {
1871 # ifndef PATH_MAX
1872 # define PATH_MAX 1024
1873 # endif
1874 struct inotify_event i;
1875 char buf[sizeof (struct inotify_event) + PATH_MAX];
1876 } inev;
1877
1878 while (1)
1879 {
1880 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1881 sizeof (inev)));
1882 if (nb < (ssize_t) sizeof (struct inotify_event))
1883 {
1884 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1885 0))
1886 {
1887 /* Something went wrong when reading the inotify
1888 data. Better disable inotify. */
1889 dbg_log (_("\
1890 disabled inotify after read error %d"),
1891 errno);
1892 conns[1].fd = -1;
1893 firstfree = 1;
1894 if (nused == 2)
1895 nused = 1;
1896 close (inotify_fd);
1897 inotify_fd = -1;
1898 }
1899 break;
1900 }
1901
1902 /* Check which of the files changed. */
1903 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1904 if (inev.i.wd == dbs[dbcnt].inotify_descr)
1905 {
1906 to_clear[dbcnt] = true;
1907 goto next;
1908 }
1909
1910 if (inev.i.wd == resolv_conf_descr)
1911 {
1912 res_init ();
1913 to_clear[hstdb] = true;
1914 }
1915 next:;
1916 }
1917
1918 /* Actually perform the cache clearing. */
1919 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1920 if (to_clear[dbcnt])
1921 {
1922 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1923 dbs[dbcnt].clear_cache = 1;
1924 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1925 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1926 }
1927
1928 --n;
1929 }
1930
1931 first = 2;
1932 }
1933 #endif
1934
1935 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
1936 if (conns[cnt].revents != 0)
1937 {
1938 fd_ready (conns[cnt].fd);
1939
1940 /* Clean up the CONNS array. */
1941 conns[cnt].fd = -1;
1942 if (cnt < firstfree)
1943 firstfree = cnt;
1944 if (cnt == nused - 1)
1945 do
1946 --nused;
1947 while (conns[nused - 1].fd == -1);
1948
1949 --n;
1950 }
1951 }
1952
1953 /* Now find entries which have timed out. */
1954 assert (nused > 0);
1955
1956 /* We make the timeout length depend on the number of file
1957 descriptors currently used. */
1958 #define ACCEPT_TIMEOUT \
1959 (MAX_ACCEPT_TIMEOUT \
1960 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1961 time_t laststart = now - ACCEPT_TIMEOUT;
1962
1963 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1964 {
1965 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1966 {
1967 /* Remove the entry, it timed out. */
1968 (void) close (conns[cnt].fd);
1969 conns[cnt].fd = -1;
1970
1971 if (cnt < firstfree)
1972 firstfree = cnt;
1973 if (cnt == nused - 1)
1974 do
1975 --nused;
1976 while (conns[nused - 1].fd == -1);
1977 }
1978 }
1979
1980 if (restart_p (now))
1981 restart ();
1982 }
1983 }
1984
1985
1986 #ifdef HAVE_EPOLL
1987 static void
1988 main_loop_epoll (int efd)
1989 {
1990 struct epoll_event ev = { 0, };
1991 int nused = 1;
1992 size_t highest = 0;
1993
1994 /* Add the socket. */
1995 ev.events = EPOLLRDNORM;
1996 ev.data.fd = sock;
1997 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1998 /* We cannot use epoll. */
1999 return;
2000
2001 #ifdef HAVE_INOTIFY
2002 if (inotify_fd != -1)
2003 {
2004 ev.events = EPOLLRDNORM;
2005 ev.data.fd = inotify_fd;
2006 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2007 /* We cannot use epoll. */
2008 return;
2009 nused = 2;
2010 }
2011 #endif
2012
2013 while (1)
2014 {
2015 struct epoll_event revs[100];
2016 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2017
2018 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2019
2020 time_t now = time (NULL);
2021
2022 for (int cnt = 0; cnt < n; ++cnt)
2023 if (revs[cnt].data.fd == sock)
2024 {
2025 /* A new connection. */
2026 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2027
2028 if (fd >= 0)
2029 {
2030 /* Try to add the new descriptor. */
2031 ev.data.fd = fd;
2032 if (fd >= nconns
2033 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2034 /* The descriptor is too large or something went
2035 wrong. Close the descriptor. */
2036 close (fd);
2037 else
2038 {
2039 /* Remember when we accepted the connection. */
2040 starttime[fd] = now;
2041
2042 if (fd > highest)
2043 highest = fd;
2044
2045 ++nused;
2046 }
2047 }
2048 }
2049 #ifdef HAVE_INOTIFY
2050 else if (revs[cnt].data.fd == inotify_fd)
2051 {
2052 bool to_clear[lastdb] = { false, };
2053 union
2054 {
2055 struct inotify_event i;
2056 char buf[sizeof (struct inotify_event) + PATH_MAX];
2057 } inev;
2058
2059 while (1)
2060 {
2061 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2062 sizeof (inev)));
2063 if (nb < (ssize_t) sizeof (struct inotify_event))
2064 {
2065 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2066 {
2067 /* Something went wrong when reading the inotify
2068 data. Better disable inotify. */
2069 dbg_log (_("disabled inotify after read error %d"),
2070 errno);
2071 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2072 NULL);
2073 close (inotify_fd);
2074 inotify_fd = -1;
2075 }
2076 break;
2077 }
2078
2079 /* Check which of the files changed. */
2080 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2081 if (inev.i.wd == dbs[dbcnt].inotify_descr)
2082 {
2083 to_clear[dbcnt] = true;
2084 goto next;
2085 }
2086
2087 if (inev.i.wd == resolv_conf_descr)
2088 {
2089 res_init ();
2090 to_clear[hstdb] = true;
2091 }
2092 next:;
2093 }
2094
2095 /* Actually perform the cache clearing. */
2096 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2097 if (to_clear[dbcnt])
2098 {
2099 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2100 dbs[dbcnt].clear_cache = 1;
2101 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2102 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2103 }
2104 }
2105 #endif
2106 else
2107 {
2108 /* Remove the descriptor from the epoll descriptor. */
2109 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2110
2111 /* Get a worker to handle the request. */
2112 fd_ready (revs[cnt].data.fd);
2113
2114 /* Reset the time. */
2115 starttime[revs[cnt].data.fd] = 0;
2116 if (revs[cnt].data.fd == highest)
2117 do
2118 --highest;
2119 while (highest > 0 && starttime[highest] == 0);
2120
2121 --nused;
2122 }
2123
2124 /* Now look for descriptors for accepted connections which have
2125 no reply in too long of a time. */
2126 time_t laststart = now - ACCEPT_TIMEOUT;
2127 assert (starttime[sock] == 0);
2128 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2129 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2130 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2131 {
2132 /* We are waiting for this one for too long. Close it. */
2133 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2134
2135 (void) close (cnt);
2136
2137 starttime[cnt] = 0;
2138 if (cnt == highest)
2139 --highest;
2140 }
2141 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2142 --highest;
2143
2144 if (restart_p (now))
2145 restart ();
2146 }
2147 }
2148 #endif
2149
2150
2151 /* Start all the threads we want. The initial process is thread no. 1. */
2152 void
2153 start_threads (void)
2154 {
2155 /* Initialize the conditional variable we will use. The only
2156 non-standard attribute we might use is the clock selection. */
2157 pthread_condattr_t condattr;
2158 pthread_condattr_init (&condattr);
2159
2160 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2161 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2162 /* Determine whether the monotonous clock is available. */
2163 struct timespec dummy;
2164 # if _POSIX_MONOTONIC_CLOCK == 0
2165 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2166 # endif
2167 # if _POSIX_CLOCK_SELECTION == 0
2168 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2169 # endif
2170 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2171 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2172 timeout_clock = CLOCK_MONOTONIC;
2173 #endif
2174
2175 /* Create the attribute for the threads. They are all created
2176 detached. */
2177 pthread_attr_init (&attr);
2178 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2179 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2180 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2181
2182 /* We allow less than LASTDB threads only for debugging. */
2183 if (debug_level == 0)
2184 nthreads = MAX (nthreads, lastdb);
2185
2186 /* Create the threads which prune the databases. */
2187 // XXX Ideally this work would be done by some of the worker threads.
2188 // XXX But this is problematic since we would need to be able to wake
2189 // XXX them up explicitly as well as part of the group handling the
2190 // XXX ready-list. This requires an operation where we can wait on
2191 // XXX two conditional variables at the same time. This operation
2192 // XXX does not exist (yet).
2193 for (long int i = 0; i < lastdb; ++i)
2194 {
2195 /* Initialize the conditional variable. */
2196 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2197 {
2198 dbg_log (_("could not initialize conditional variable"));
2199 exit (1);
2200 }
2201
2202 pthread_t th;
2203 if (dbs[i].enabled
2204 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2205 {
2206 dbg_log (_("could not start clean-up thread; terminating"));
2207 exit (1);
2208 }
2209 }
2210
2211 pthread_condattr_destroy (&condattr);
2212
2213 for (long int i = 0; i < nthreads; ++i)
2214 {
2215 pthread_t th;
2216 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2217 {
2218 if (i == 0)
2219 {
2220 dbg_log (_("could not start any worker thread; terminating"));
2221 exit (1);
2222 }
2223
2224 break;
2225 }
2226 }
2227
2228 /* Determine how much room for descriptors we should initially
2229 allocate. This might need to change later if we cap the number
2230 with MAXCONN. */
2231 const long int nfds = sysconf (_SC_OPEN_MAX);
2232 #define MINCONN 32
2233 #define MAXCONN 16384
2234 if (nfds == -1 || nfds > MAXCONN)
2235 nconns = MAXCONN;
2236 else if (nfds < MINCONN)
2237 nconns = MINCONN;
2238 else
2239 nconns = nfds;
2240
2241 /* We need memory to pass descriptors on to the worker threads. */
2242 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2243 /* Array to keep track when connection was accepted. */
2244 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2245
2246 /* In the main thread we execute the loop which handles incoming
2247 connections. */
2248 #ifdef HAVE_EPOLL
2249 int efd = epoll_create (100);
2250 if (efd != -1)
2251 {
2252 main_loop_epoll (efd);
2253 close (efd);
2254 }
2255 #endif
2256
2257 main_loop_poll ();
2258 }
2259
2260
2261 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2262 this function is called, we are not listening on the nscd socket yet so
2263 we can just use the ordinary lookup functions without causing a lockup */
2264 static void
2265 begin_drop_privileges (void)
2266 {
2267 struct passwd *pwd = getpwnam (server_user);
2268
2269 if (pwd == NULL)
2270 {
2271 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2272 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2273 server_user);
2274 }
2275
2276 server_uid = pwd->pw_uid;
2277 server_gid = pwd->pw_gid;
2278
2279 /* Save the old UID/GID if we have to change back. */
2280 if (paranoia)
2281 {
2282 old_uid = getuid ();
2283 old_gid = getgid ();
2284 }
2285
2286 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2287 {
2288 /* This really must never happen. */
2289 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2290 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2291 }
2292
2293 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2294
2295 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2296 == -1)
2297 {
2298 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2299 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2300 }
2301 }
2302
2303
2304 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2305 run nscd as the user specified in the configuration file. */
2306 static void
2307 finish_drop_privileges (void)
2308 {
2309 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2310 /* We need to preserve the capabilities to connect to the audit daemon. */
2311 cap_t new_caps = preserve_capabilities ();
2312 #endif
2313
2314 if (setgroups (server_ngroups, server_groups) == -1)
2315 {
2316 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2317 error (EXIT_FAILURE, errno, _("setgroups failed"));
2318 }
2319
2320 int res;
2321 if (paranoia)
2322 res = setresgid (server_gid, server_gid, old_gid);
2323 else
2324 res = setgid (server_gid);
2325 if (res == -1)
2326 {
2327 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2328 perror ("setgid");
2329 exit (4);
2330 }
2331
2332 if (paranoia)
2333 res = setresuid (server_uid, server_uid, old_uid);
2334 else
2335 res = setuid (server_uid);
2336 if (res == -1)
2337 {
2338 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2339 perror ("setuid");
2340 exit (4);
2341 }
2342
2343 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2344 /* Remove the temporary capabilities. */
2345 install_real_capabilities (new_caps);
2346 #endif
2347 }