]> git.ipfire.org Git - thirdparty/glibc.git/blame - nscd/connections.c
* sysdeps/unix/sysv/linux/timer_routines.c (timer_helper_thread):
[thirdparty/glibc.git] / nscd / connections.c
CommitLineData
67479a70 1/* Inner loops of cache daemon.
6cac6927 2 Copyright (C) 1998-2003, 2004, 2005, 2006 Free Software Foundation, Inc.
d67281a7 3 This file is part of the GNU C Library.
67479a70 4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
d67281a7 5
43bc8ac6
UD
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License version 2 as
8 published by the Free Software Foundation.
d67281a7 9
43bc8ac6 10 This program is distributed in the hope that it will be useful,
d67281a7 11 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
d67281a7 14
43bc8ac6
UD
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software Foundation,
17 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
d67281a7 18
4401d759 19#include <alloca.h>
67479a70 20#include <assert.h>
0fdb4f42 21#include <atomic.h>
d67281a7 22#include <error.h>
67479a70 23#include <errno.h>
d6db0975 24#include <fcntl.h>
057685e4 25#include <grp.h>
a95a08b4 26#include <libintl.h>
d67281a7 27#include <pthread.h>
057685e4 28#include <pwd.h>
482bbeb9 29#include <resolv.h>
057685e4 30#include <stdio.h>
d67281a7
UD
31#include <stdlib.h>
32#include <unistd.h>
8d8c6efa 33#include <arpa/inet.h>
fc03df7a
UD
34#ifdef HAVE_EPOLL
35# include <sys/epoll.h>
36#endif
a95a08b4 37#include <sys/mman.h>
67479a70 38#include <sys/param.h>
a53bad16 39#include <sys/poll.h>
eac10791
UD
40#ifdef HAVE_SENDFILE
41# include <sys/sendfile.h>
42#endif
d67281a7
UD
43#include <sys/socket.h>
44#include <sys/stat.h>
d67281a7
UD
45#include <sys/un.h>
46
47#include "nscd.h"
48#include "dbg_log.h"
74a30a58 49#include "selinux.h"
eac10791
UD
50#ifdef HAVE_SENDFILE
51# include <kernel-features.h>
52#endif
a334319f
UD
53
54
057685e4
UD
55/* Wrapper functions with error checking for standard functions. */
56extern void *xmalloc (size_t n);
57extern void *xcalloc (size_t n, size_t s);
58extern void *xrealloc (void *o, size_t n);
59
60/* Support to run nscd as an unprivileged user */
61const char *server_user;
62static uid_t server_uid;
63static gid_t server_gid;
a12ce44f
UD
64const char *stat_user;
65uid_t stat_uid;
057685e4
UD
66static gid_t *server_groups;
67#ifndef NGROUPS
68# define NGROUPS 32
69#endif
a95a08b4 70static int server_ngroups;
057685e4 71
27e82856
UD
72static pthread_attr_t attr;
73
057685e4
UD
74static void begin_drop_privileges (void);
75static void finish_drop_privileges (void);
76
67479a70
UD
77/* Map request type to a string. */
78const char *serv2str[LASTREQ] =
d67281a7 79{
67479a70
UD
80 [GETPWBYNAME] = "GETPWBYNAME",
81 [GETPWBYUID] = "GETPWBYUID",
82 [GETGRBYNAME] = "GETGRBYNAME",
83 [GETGRBYGID] = "GETGRBYGID",
84 [GETHOSTBYNAME] = "GETHOSTBYNAME",
85 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
86 [GETHOSTBYADDR] = "GETHOSTBYADDR",
87 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
88 [SHUTDOWN] = "SHUTDOWN",
756409c4 89 [GETSTAT] = "GETSTAT",
c207f23b
UD
90 [INVALIDATE] = "INVALIDATE",
91 [GETFDPW] = "GETFDPW",
92 [GETFDGR] = "GETFDGR",
d19687d6 93 [GETFDHST] = "GETFDHST",
f7e7a396
UD
94 [GETAI] = "GETAI",
95 [INITGROUPS] = "INITGROUPS"
67479a70
UD
96};
97
98/* The control data structures for the services. */
a95a08b4 99struct database_dyn dbs[lastdb] =
67479a70
UD
100{
101 [pwddb] = {
c2e13112
RM
102 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
103 .enabled = 0,
104 .check_file = 1,
a95a08b4 105 .persistent = 0,
797ed6f7 106 .propagate = 1,
c207f23b 107 .shared = 0,
2c210d1e 108 .max_db_size = DEFAULT_MAX_DB_SIZE,
c2e13112 109 .filename = "/etc/passwd",
a95a08b4 110 .db_filename = _PATH_NSCD_PASSWD_DB,
c2e13112
RM
111 .disabled_iov = &pwd_iov_disabled,
112 .postimeout = 3600,
a95a08b4
UD
113 .negtimeout = 20,
114 .wr_fd = -1,
115 .ro_fd = -1,
116 .mmap_used = false
67479a70
UD
117 },
118 [grpdb] = {
c2e13112
RM
119 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
120 .enabled = 0,
121 .check_file = 1,
a95a08b4 122 .persistent = 0,
797ed6f7 123 .propagate = 1,
c207f23b 124 .shared = 0,
2c210d1e 125 .max_db_size = DEFAULT_MAX_DB_SIZE,
c2e13112 126 .filename = "/etc/group",
a95a08b4 127 .db_filename = _PATH_NSCD_GROUP_DB,
c2e13112
RM
128 .disabled_iov = &grp_iov_disabled,
129 .postimeout = 3600,
a95a08b4
UD
130 .negtimeout = 60,
131 .wr_fd = -1,
132 .ro_fd = -1,
133 .mmap_used = false
67479a70
UD
134 },
135 [hstdb] = {
c2e13112
RM
136 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
137 .enabled = 0,
138 .check_file = 1,
a95a08b4 139 .persistent = 0,
797ed6f7 140 .propagate = 0, /* Not used. */
c207f23b 141 .shared = 0,
2c210d1e 142 .max_db_size = DEFAULT_MAX_DB_SIZE,
c2e13112 143 .filename = "/etc/hosts",
a95a08b4 144 .db_filename = _PATH_NSCD_HOSTS_DB,
c2e13112
RM
145 .disabled_iov = &hst_iov_disabled,
146 .postimeout = 3600,
a95a08b4
UD
147 .negtimeout = 20,
148 .wr_fd = -1,
149 .ro_fd = -1,
150 .mmap_used = false
67479a70
UD
151 }
152};
d67281a7 153
a95a08b4
UD
154
155/* Mapping of request type to database. */
c207f23b 156static struct database_dyn *const serv2db[LASTREQ] =
a95a08b4
UD
157{
158 [GETPWBYNAME] = &dbs[pwddb],
159 [GETPWBYUID] = &dbs[pwddb],
160 [GETGRBYNAME] = &dbs[grpdb],
161 [GETGRBYGID] = &dbs[grpdb],
162 [GETHOSTBYNAME] = &dbs[hstdb],
163 [GETHOSTBYNAMEv6] = &dbs[hstdb],
164 [GETHOSTBYADDR] = &dbs[hstdb],
c207f23b
UD
165 [GETHOSTBYADDRv6] = &dbs[hstdb],
166 [GETFDPW] = &dbs[pwddb],
167 [GETFDGR] = &dbs[grpdb],
168 [GETFDHST] = &dbs[hstdb],
d19687d6 169 [GETAI] = &dbs[hstdb],
f7e7a396 170 [INITGROUPS] = &dbs[grpdb]
a95a08b4
UD
171};
172
173
34489d95
UD
174/* Number of seconds between two cache pruning runs. */
175#define CACHE_PRUNE_INTERVAL 15
176
a95a08b4 177
27e82856 178/* Initial number of threads to use. */
67479a70 179int nthreads = -1;
27e82856
UD
180/* Maximum number of threads to use. */
181int max_nthreads = 32;
d67281a7 182
67479a70
UD
183/* Socket for incoming connections. */
184static int sock;
d67281a7 185
0fdb4f42
UD
186/* Number of times clients had to wait. */
187unsigned long int client_queued;
188
d67281a7 189
d2dc7d84
UD
190ssize_t
191writeall (int fd, const void *buf, size_t len)
192{
193 size_t n = len;
194 ssize_t ret;
195 do
196 {
2c210d1e 197 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
d2dc7d84
UD
198 if (ret <= 0)
199 break;
200 buf = (const char *) buf + ret;
201 n -= ret;
202 }
203 while (n > 0);
204 return ret < 0 ? ret : len - n;
205}
206
207
bd547139
UD
208#ifdef HAVE_SENDFILE
209ssize_t
210sendfileall (int tofd, int fromfd, off_t off, size_t len)
211{
212 ssize_t n = len;
213 ssize_t ret;
214
215 do
216 {
217 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
218 if (ret <= 0)
219 break;
220 n -= ret;
221 }
222 while (n > 0);
223 return ret < 0 ? ret : len - n;
224}
225#endif
226
227
dc4bb1c2
UD
228enum usekey
229 {
230 use_not = 0,
231 /* The following three are not really used, they are symbolic constants. */
232 use_first = 16,
233 use_begin = 32,
234 use_end = 64,
235
236 use_he = 1,
237 use_he_begin = use_he | use_begin,
238 use_he_end = use_he | use_end,
239#if SEPARATE_KEY
240 use_key = 2,
241 use_key_begin = use_key | use_begin,
242 use_key_end = use_key | use_end,
243 use_key_first = use_key_begin | use_first,
244#endif
245 use_data = 3,
246 use_data_begin = use_data | use_begin,
247 use_data_end = use_data | use_end,
248 use_data_first = use_data_begin | use_first
249 };
250
251
252static int
253check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
254 enum usekey use, ref_t start, size_t len)
255{
256 assert (len >= 2);
257
258 if (start > first_free || start + len > first_free
259 || (start & BLOCK_ALIGN_M1))
260 return 0;
261
262 if (usemap[start] == use_not)
263 {
264 /* Add the start marker. */
265 usemap[start] = use | use_begin;
266 use &= ~use_first;
267
268 while (--len > 0)
269 if (usemap[++start] != use_not)
270 return 0;
271 else
272 usemap[start] = use;
273
274 /* Add the end marker. */
275 usemap[start] = use | use_end;
276 }
277 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
278 {
279 /* Hash entries can't be shared. */
280 if (use == use_he)
281 return 0;
282
283 usemap[start] |= (use & use_first);
284 use &= ~use_first;
285
286 while (--len > 1)
287 if (usemap[++start] != use)
288 return 0;
289
290 if (usemap[++start] != (use | use_end))
291 return 0;
292 }
293 else
294 /* Points to a wrong object or somewhere in the middle. */
295 return 0;
296
297 return 1;
298}
299
300
301/* Verify data in persistent database. */
302static int
303verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
304{
305 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb);
306
307 time_t now = time (NULL);
308
309 struct database_pers_head *head = mem;
310 struct database_pers_head head_copy = *head;
311
312 /* Check that the header that was read matches the head in the database. */
313 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
314 return 0;
315
316 /* First some easy tests: make sure the database header is sane. */
317 if (head->version != DB_VERSION
318 || head->header_size != sizeof (*head)
319 /* We allow a timestamp to be one hour ahead of the current time.
320 This should cover daylight saving time changes. */
321 || head->timestamp > now + 60 * 60 + 60
322 || (head->gc_cycle & 1)
323 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
324 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
325 || head->first_free < 0
326 || head->first_free > head->data_size
327 || (head->first_free & BLOCK_ALIGN_M1) != 0
328 || head->maxnentries < 0
329 || head->maxnsearched < 0)
330 return 0;
331
332 uint8_t *usemap = calloc (head->first_free, 1);
333 if (usemap == NULL)
334 return 0;
335
336 const char *data = (char *) &head->array[roundup (head->module,
337 ALIGN / sizeof (ref_t))];
338
339 nscd_ssize_t he_cnt = 0;
340 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
341 {
342 ref_t work = head->array[cnt];
343
344 while (work != ENDREF)
345 {
346 if (! check_use (data, head->first_free, usemap, use_he, work,
347 sizeof (struct hashentry)))
348 goto fail;
349
350 /* Now we know we can dereference the record. */
351 struct hashentry *here = (struct hashentry *) (data + work);
352
353 ++he_cnt;
354
355 /* Make sure the record is for this type of service. */
356 if (here->type >= LASTREQ
357 || serv2db[here->type] != &dbs[dbnr])
358 goto fail;
359
360 /* Validate boolean field value. */
361 if (here->first != false && here->first != true)
362 goto fail;
363
364 if (here->len < 0)
365 goto fail;
366
367 /* Now the data. */
368 if (here->packet < 0
369 || here->packet > head->first_free
370 || here->packet + sizeof (struct datahead) > head->first_free)
371 goto fail;
372
373 struct datahead *dh = (struct datahead *) (data + here->packet);
374
375 if (! check_use (data, head->first_free, usemap,
376 use_data | (here->first ? use_first : 0),
377 here->packet, dh->allocsize))
378 goto fail;
379
380 if (dh->allocsize < sizeof (struct datahead)
381 || dh->recsize > dh->allocsize
382 || (dh->notfound != false && dh->notfound != true)
383 || (dh->usable != false && dh->usable != true))
384 goto fail;
385
386 if (here->key < here->packet + sizeof (struct datahead)
387 || here->key > here->packet + dh->allocsize
388 || here->key + here->len > here->packet + dh->allocsize)
389 {
390#if SEPARATE_KEY
391 /* If keys can appear outside of data, this should be done
392 instead. But gc doesn't mark the data in that case. */
393 if (! check_use (data, head->first_free, usemap,
394 use_key | (here->first ? use_first : 0),
395 here->key, here->len))
396#endif
397 goto fail;
398 }
399
400 work = here->next;
401 }
402 }
403
404 if (he_cnt != head->nentries)
405 goto fail;
406
407 /* See if all data and keys had at least one reference from
408 he->first == true hashentry. */
409 for (ref_t idx = 0; idx < head->first_free; ++idx)
410 {
411#if SEPARATE_KEY
412 if (usemap[idx] == use_key_begin)
413 goto fail;
414#endif
415 if (usemap[idx] == use_data_begin)
416 goto fail;
417 }
418
419 /* Finally, make sure the database hasn't changed since the first test. */
420 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
421 goto fail;
422
423 free (usemap);
424 return 1;
425
426fail:
427 free (usemap);
428 return 0;
429}
430
431
a334319f
UD
432/* Initialize database information structures. */
433void
434nscd_init (void)
0ecb606c 435{
057685e4
UD
436 /* Look up unprivileged uid/gid/groups before we start listening on the
437 socket */
438 if (server_user != NULL)
439 begin_drop_privileges ();
440
67479a70
UD
441 if (nthreads == -1)
442 /* No configuration for this value, assume a default. */
443 nthreads = 2 * lastdb;
d67281a7 444
d2dc7d84 445 for (size_t cnt = 0; cnt < lastdb; ++cnt)
67479a70 446 if (dbs[cnt].enabled)
9db29cde 447 {
67479a70 448 pthread_rwlock_init (&dbs[cnt].lock, NULL);
a95a08b4 449 pthread_mutex_init (&dbs[cnt].memlock, NULL);
264d5b94 450
a95a08b4 451 if (dbs[cnt].persistent)
e09edf23 452 {
a95a08b4
UD
453 /* Try to open the appropriate file on disk. */
454 int fd = open (dbs[cnt].db_filename, O_RDWR);
455 if (fd != -1)
456 {
457 struct stat64 st;
458 void *mem;
459 size_t total;
460 struct database_pers_head head;
461 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
462 sizeof (head)));
463 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
464 {
465 fail_db:
466 dbg_log (_("invalid persistent database file \"%s\": %s"),
467 dbs[cnt].db_filename, strerror (errno));
dc4bb1c2 468 unlink (dbs[cnt].db_filename);
a95a08b4
UD
469 }
470 else if (head.module == 0 && head.data_size == 0)
471 {
472 /* The file has been created, but the head has not been
473 initialized yet. Remove the old file. */
474 unlink (dbs[cnt].db_filename);
475 }
476 else if (head.header_size != (int) sizeof (head))
477 {
478 dbg_log (_("invalid persistent database file \"%s\": %s"),
479 dbs[cnt].db_filename,
480 _("header size does not match"));
dc4bb1c2 481 unlink (dbs[cnt].db_filename);
a95a08b4
UD
482 }
483 else if ((total = (sizeof (head)
c207f23b 484 + roundup (head.module * sizeof (ref_t),
a95a08b4
UD
485 ALIGN)
486 + head.data_size))
dc4bb1c2
UD
487 > st.st_size
488 || total < sizeof (head))
a95a08b4
UD
489 {
490 dbg_log (_("invalid persistent database file \"%s\": %s"),
491 dbs[cnt].db_filename,
492 _("file size does not match"));
dc4bb1c2 493 unlink (dbs[cnt].db_filename);
a95a08b4 494 }
2c210d1e
UD
495 /* Note we map with the maximum size allowed for the
496 database. This is likely much larger than the
497 actual file size. This is OK on most OSes since
498 extensions of the underlying file will
499 automatically translate more pages available for
500 memory access. */
501 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
502 PROT_READ | PROT_WRITE,
503 MAP_SHARED, fd, 0))
504 == MAP_FAILED)
a95a08b4 505 goto fail_db;
dc4bb1c2
UD
506 else if (!verify_persistent_db (mem, &head, cnt))
507 {
508 munmap (mem, total);
509 dbg_log (_("invalid persistent database file \"%s\": %s"),
510 dbs[cnt].db_filename,
511 _("verification failed"));
512 unlink (dbs[cnt].db_filename);
513 }
a95a08b4
UD
514 else
515 {
516 /* Success. We have the database. */
517 dbs[cnt].head = mem;
518 dbs[cnt].memsize = total;
519 dbs[cnt].data = (char *)
520 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
521 ALIGN / sizeof (ref_t))];
522 dbs[cnt].mmap_used = true;
523
524 if (dbs[cnt].suggested_module > head.module)
525 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
526 dbnames[cnt]);
527
528 dbs[cnt].wr_fd = fd;
529 fd = -1;
530 /* We also need a read-only descriptor. */
d13a3c57
UD
531 if (dbs[cnt].shared)
532 {
533 dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
534 if (dbs[cnt].ro_fd == -1)
535 dbg_log (_("\
a95a08b4 536cannot create read-only descriptor for \"%s\"; no mmap"),
d13a3c57
UD
537 dbs[cnt].db_filename);
538 }
a95a08b4
UD
539
540 // XXX Shall we test whether the descriptors actually
541 // XXX point to the same file?
542 }
543
544 /* Close the file descriptors in case something went
545 wrong in which case the variable have not been
546 assigned -1. */
547 if (fd != -1)
548 close (fd);
549 }
550 }
551
552 if (dbs[cnt].head == NULL)
553 {
554 /* No database loaded. Allocate the data structure,
555 possibly on disk. */
556 struct database_pers_head head;
557 size_t total = (sizeof (head)
558 + roundup (dbs[cnt].suggested_module
559 * sizeof (ref_t), ALIGN)
560 + (dbs[cnt].suggested_module
561 * DEFAULT_DATASIZE_PER_BUCKET));
562
563 /* Try to create the database. If we do not need a
564 persistent database create a temporary file. */
565 int fd;
566 int ro_fd = -1;
567 if (dbs[cnt].persistent)
568 {
569 fd = open (dbs[cnt].db_filename,
570 O_RDWR | O_CREAT | O_EXCL | O_TRUNC,
571 S_IRUSR | S_IWUSR);
d13a3c57 572 if (fd != -1 && dbs[cnt].shared)
a95a08b4
UD
573 ro_fd = open (dbs[cnt].db_filename, O_RDONLY);
574 }
575 else
576 {
a8a58967 577 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
a95a08b4
UD
578 fd = mkstemp (fname);
579
580 /* We do not need the file name anymore after we
581 opened another file descriptor in read-only mode. */
5ca3d19c 582 if (fd != -1)
a95a08b4 583 {
5ca3d19c
UD
584 if (dbs[cnt].shared)
585 ro_fd = open (fname, O_RDONLY);
a95a08b4
UD
586
587 unlink (fname);
588 }
589 }
590
591 if (fd == -1)
592 {
593 if (errno == EEXIST)
594 {
595 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
596 dbnames[cnt], dbs[cnt].db_filename);
597 // XXX Correct way to terminate?
598 exit (1);
599 }
600
601 if (dbs[cnt].persistent)
602 dbg_log (_("cannot create %s; no persistent database used"),
603 dbs[cnt].db_filename);
604 else
605 dbg_log (_("cannot create %s; no sharing possible"),
606 dbs[cnt].db_filename);
607
608 dbs[cnt].persistent = 0;
609 // XXX remember: no mmap
610 }
611 else
612 {
613 /* Tell the user if we could not create the read-only
614 descriptor. */
d13a3c57 615 if (ro_fd == -1 && dbs[cnt].shared)
a95a08b4
UD
616 dbg_log (_("\
617cannot create read-only descriptor for \"%s\"; no mmap"),
618 dbs[cnt].db_filename);
619
620 /* Before we create the header, initialiye the hash
621 table. So that if we get interrupted if writing
622 the header we can recognize a partially initialized
623 database. */
624 size_t ps = sysconf (_SC_PAGESIZE);
625 char tmpbuf[ps];
626 assert (~ENDREF == 0);
627 memset (tmpbuf, '\xff', ps);
628
629 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
630 off_t offset = sizeof (head);
631
632 size_t towrite;
633 if (offset % ps != 0)
634 {
635 towrite = MIN (remaining, ps - (offset % ps));
233399bc
UD
636 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
637 goto write_fail;
a95a08b4
UD
638 offset += towrite;
639 remaining -= towrite;
640 }
641
642 while (remaining > ps)
643 {
233399bc
UD
644 if (pwrite (fd, tmpbuf, ps, offset) == -1)
645 goto write_fail;
a95a08b4
UD
646 offset += ps;
647 remaining -= ps;
648 }
649
233399bc
UD
650 if (remaining > 0
651 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
652 goto write_fail;
a95a08b4
UD
653
654 /* Create the header of the file. */
655 struct database_pers_head head =
656 {
657 .version = DB_VERSION,
658 .header_size = sizeof (head),
659 .module = dbs[cnt].suggested_module,
660 .data_size = (dbs[cnt].suggested_module
661 * DEFAULT_DATASIZE_PER_BUCKET),
662 .first_free = 0
663 };
664 void *mem;
665
666 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
667 != sizeof (head))
2c210d1e
UD
668 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
669 != 0)
670 || (mem = mmap (NULL, dbs[cnt].max_db_size,
671 PROT_READ | PROT_WRITE,
a95a08b4
UD
672 MAP_SHARED, fd, 0)) == MAP_FAILED)
673 {
233399bc 674 write_fail:
a95a08b4
UD
675 unlink (dbs[cnt].db_filename);
676 dbg_log (_("cannot write to database file %s: %s"),
677 dbs[cnt].db_filename, strerror (errno));
678 dbs[cnt].persistent = 0;
679 }
680 else
681 {
682 /* Success. */
683 dbs[cnt].head = mem;
684 dbs[cnt].data = (char *)
685 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
686 ALIGN / sizeof (ref_t))];
687 dbs[cnt].memsize = total;
688 dbs[cnt].mmap_used = true;
689
690 /* Remember the descriptors. */
691 dbs[cnt].wr_fd = fd;
692 dbs[cnt].ro_fd = ro_fd;
693 fd = -1;
694 ro_fd = -1;
695 }
696
697 if (fd != -1)
698 close (fd);
699 if (ro_fd != -1)
700 close (ro_fd);
701 }
702 }
703
4401d759
UD
704 if (paranoia
705 && ((dbs[cnt].wr_fd != -1
706 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
707 || (dbs[cnt].ro_fd != -1
708 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
709 {
710 dbg_log (_("\
711cannot set socket to close on exec: %s; disabling paranoia mode"),
712 strerror (errno));
713 paranoia = 0;
714 }
715
a95a08b4
UD
716 if (dbs[cnt].head == NULL)
717 {
718 /* We do not use the persistent database. Just
719 create an in-memory data structure. */
720 assert (! dbs[cnt].persistent);
721
722 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
723 + (dbs[cnt].suggested_module
724 * sizeof (ref_t)));
a334319f 725 memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head));
a95a08b4
UD
726 assert (~ENDREF == 0);
727 memset (dbs[cnt].head->array, '\xff',
728 dbs[cnt].suggested_module * sizeof (ref_t));
729 dbs[cnt].head->module = dbs[cnt].suggested_module;
730 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
731 * dbs[cnt].head->module);
732 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
733 dbs[cnt].head->first_free = 0;
c207f23b
UD
734
735 dbs[cnt].shared = 0;
736 assert (dbs[cnt].ro_fd == -1);
e09edf23 737 }
d67281a7 738
67479a70 739 if (dbs[cnt].check_file)
d67281a7 740 {
67479a70 741 /* We need the modification date of the file. */
80ea3037 742 struct stat64 st;
d67281a7 743
80ea3037 744 if (stat64 (dbs[cnt].filename, &st) < 0)
d67281a7 745 {
67479a70
UD
746 /* We cannot stat() the file, disable file checking. */
747 dbg_log (_("cannot stat() file `%s': %s"),
e09edf23 748 dbs[cnt].filename, strerror (errno));
67479a70 749 dbs[cnt].check_file = 0;
d67281a7
UD
750 }
751 else
67479a70
UD
752 dbs[cnt].file_mtime = st.st_mtime;
753 }
754 }
d67281a7
UD
755
756 /* Create the socket. */
67479a70
UD
757 sock = socket (AF_UNIX, SOCK_STREAM, 0);
758 if (sock < 0)
d67281a7 759 {
67479a70 760 dbg_log (_("cannot open socket: %s"), strerror (errno));
64d64de6 761 exit (errno == EACCES ? 4 : 1);
d67281a7
UD
762 }
763 /* Bind a name to the socket. */
d2dc7d84 764 struct sockaddr_un sock_addr;
d67281a7
UD
765 sock_addr.sun_family = AF_UNIX;
766 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
67479a70 767 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
d67281a7
UD
768 {
769 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
64d64de6 770 exit (errno == EACCES ? 4 : 1);
d67281a7 771 }
67479a70 772
4401d759 773 /* We don't want to get stuck on accept. */
d6db0975 774 int fl = fcntl (sock, F_GETFL);
4401d759
UD
775 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
776 {
777 dbg_log (_("cannot change socket to nonblocking mode: %s"),
778 strerror (errno));
779 exit (1);
780 }
781
782 /* The descriptor needs to be closed on exec. */
783 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
784 {
785 dbg_log (_("cannot set socket to close on exec: %s"),
786 strerror (errno));
787 exit (1);
788 }
d6db0975 789
d67281a7 790 /* Set permissions for the socket. */
a95a08b4 791 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
d67281a7
UD
792
793 /* Set the socket up to accept connections. */
67479a70 794 if (listen (sock, SOMAXCONN) < 0)
d67281a7 795 {
67479a70
UD
796 dbg_log (_("cannot enable socket to accept connections: %s"),
797 strerror (errno));
d67281a7
UD
798 exit (1);
799 }
057685e4
UD
800
801 /* Change to unprivileged uid/gid/groups if specifed in config file */
802 if (server_user != NULL)
803 finish_drop_privileges ();
d67281a7
UD
804}
805
67479a70
UD
806
807/* Close the connections. */
d67281a7 808void
67479a70 809close_sockets (void)
d67281a7 810{
67479a70
UD
811 close (sock);
812}
d67281a7 813
a12ce44f 814
756409c4 815static void
a334319f 816invalidate_cache (char *key)
756409c4
UD
817{
818 dbtype number;
819
820 if (strcmp (key, "passwd") == 0)
821 number = pwddb;
822 else if (strcmp (key, "group") == 0)
823 number = grpdb;
23700036 824 else if (__builtin_expect (strcmp (key, "hosts"), 0) == 0)
482bbeb9
UD
825 {
826 number = hstdb;
827
828 /* Re-initialize the resolver. resolv.conf might have changed. */
829 res_init ();
830 }
23700036 831 else
a334319f 832 return;
756409c4 833
fd665070 834 if (dbs[number].enabled)
a334319f 835 prune_cache (&dbs[number], LONG_MAX);
756409c4
UD
836}
837
67479a70 838
c207f23b
UD
839#ifdef SCM_RIGHTS
840static void
841send_ro_fd (struct database_dyn *db, char *key, int fd)
842{
843 /* If we do not have an read-only file descriptor do nothing. */
844 if (db->ro_fd == -1)
845 return;
846
847 /* We need to send some data along with the descriptor. */
848 struct iovec iov[1];
849 iov[0].iov_base = key;
850 iov[0].iov_len = strlen (key) + 1;
851
852 /* Prepare the control message to transfer the descriptor. */
a08ab897
UD
853 union
854 {
855 struct cmsghdr hdr;
856 char bytes[CMSG_SPACE (sizeof (int))];
857 } buf;
c207f23b 858 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 1,
a08ab897
UD
859 .msg_control = buf.bytes,
860 .msg_controllen = sizeof (buf) };
c207f23b
UD
861 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
862
863 cmsg->cmsg_level = SOL_SOCKET;
864 cmsg->cmsg_type = SCM_RIGHTS;
865 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
866
867 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
868
869 msg.msg_controllen = cmsg->cmsg_len;
870
871 /* Send the control message. We repeat when we are interrupted but
872 everything else is ignored. */
6925ef9a
UD
873#ifndef MSG_NOSIGNAL
874# define MSG_NOSIGNAL 0
875#endif
876 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
c207f23b
UD
877
878 if (__builtin_expect (debug_level > 0, 0))
879 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
880}
881#endif /* SCM_RIGHTS */
882
883
67479a70
UD
884/* Handle new request. */
885static void
a1c542bf 886handle_request (int fd, request_header *req, void *key, uid_t uid)
67479a70 887{
23700036 888 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
d67281a7 889 {
98e75a1c
UD
890 if (debug_level > 0)
891 dbg_log (_("\
67479a70 892cannot handle old request version %d; current version is %d"),
98e75a1c 893 req->version, NSCD_VERSION);
d67281a7
UD
894 return;
895 }
896
74a30a58
UD
897 /* Make the SELinux check before we go on to the standard checks. We
898 need to verify that the request type is valid, since it has not
899 yet been checked at this point. */
900 if (selinux_enabled
901 && __builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
902 && __builtin_expect (req->type, LASTREQ) < LASTREQ
903 && nscd_request_avc_has_perm (fd, req->type) != 0)
904 return;
905
a95a08b4
UD
906 struct database_dyn *db = serv2db[req->type];
907
f7e7a396
UD
908 // XXX Clean up so that each new command need not introduce a
909 // XXX new conditional.
d19687d6
UD
910 if ((__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME
911 && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ)
f7e7a396 912 || req->type == GETAI || req->type == INITGROUPS)
d67281a7 913 {
23700036 914 if (__builtin_expect (debug_level, 0) > 0)
8d8c6efa
UD
915 {
916 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
917 {
918 char buf[INET6_ADDRSTRLEN];
919
920 dbg_log ("\t%s (%s)", serv2str[req->type],
921 inet_ntop (req->type == GETHOSTBYADDR
922 ? AF_INET : AF_INET6,
923 key, buf, sizeof (buf)));
924 }
925 else
a95a08b4 926 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
8d8c6efa 927 }
d67281a7 928
67479a70
UD
929 /* Is this service enabled? */
930 if (!db->enabled)
931 {
ce85d65b 932 /* No, sent the prepared record. */
2c210d1e
UD
933 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
934 db->disabled_iov->iov_len,
935 MSG_NOSIGNAL))
4c5dd2a2 936 != (ssize_t) db->disabled_iov->iov_len
23700036 937 && __builtin_expect (debug_level, 0) > 0)
67479a70
UD
938 {
939 /* We have problems sending the result. */
940 char buf[256];
941 dbg_log (_("cannot write result: %s"),
942 strerror_r (errno, buf, sizeof (buf)));
943 }
d67281a7 944
67479a70
UD
945 return;
946 }
d67281a7 947
67479a70 948 /* Be sure we can read the data. */
c86e6aec
UD
949 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
950 {
a95a08b4 951 ++db->head->rdlockdelayed;
c86e6aec
UD
952 pthread_rwlock_rdlock (&db->lock);
953 }
67479a70
UD
954
955 /* See whether we can handle it from the cache. */
a95a08b4
UD
956 struct datahead *cached;
957 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
958 db, uid);
67479a70
UD
959 if (cached != NULL)
960 {
961 /* Hurray it's in the cache. */
eac10791
UD
962 ssize_t nwritten;
963
964#ifdef HAVE_SENDFILE
965 if (db->mmap_used || !cached->notfound)
966 {
967 assert (db->wr_fd != -1);
968 assert ((char *) cached->data > (char *) db->data);
969 assert ((char *) cached->data - (char *) db->head
970 + cached->recsize
971 <= (sizeof (struct database_pers_head)
972 + db->head->module * sizeof (ref_t)
973 + db->head->data_size));
bd547139
UD
974 nwritten = sendfileall (fd, db->wr_fd,
975 (char *) cached->data
976 - (char *) db->head, cached->recsize);
eac10791
UD
977# ifndef __ASSUME_SENDFILE
978 if (nwritten == -1 && errno == ENOSYS)
979 goto use_write;
980# endif
981 }
982 else
983# ifndef __ASSUME_SENDFILE
984 use_write:
985# endif
986#endif
987 nwritten = writeall (fd, cached->data, cached->recsize);
988
989 if (nwritten != cached->recsize
23700036 990 && __builtin_expect (debug_level, 0) > 0)
67479a70
UD
991 {
992 /* We have problems sending the result. */
993 char buf[256];
994 dbg_log (_("cannot write result: %s"),
995 strerror_r (errno, buf, sizeof (buf)));
996 }
997
998 pthread_rwlock_unlock (&db->lock);
999
1000 return;
1001 }
1002
1003 pthread_rwlock_unlock (&db->lock);
d67281a7 1004 }
23700036 1005 else if (__builtin_expect (debug_level, 0) > 0)
756409c4
UD
1006 {
1007 if (req->type == INVALIDATE)
c207f23b 1008 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
0532e21b 1009 else
a95a08b4 1010 dbg_log ("\t%s", serv2str[req->type]);
756409c4 1011 }
67479a70
UD
1012
1013 /* Handle the request. */
1014 switch (req->type)
d67281a7 1015 {
67479a70 1016 case GETPWBYNAME:
a95a08b4 1017 addpwbyname (db, fd, req, key, uid);
67479a70
UD
1018 break;
1019
1020 case GETPWBYUID:
a95a08b4 1021 addpwbyuid (db, fd, req, key, uid);
67479a70
UD
1022 break;
1023
1024 case GETGRBYNAME:
a95a08b4 1025 addgrbyname (db, fd, req, key, uid);
67479a70
UD
1026 break;
1027
1028 case GETGRBYGID:
a95a08b4 1029 addgrbygid (db, fd, req, key, uid);
67479a70
UD
1030 break;
1031
1032 case GETHOSTBYNAME:
a95a08b4 1033 addhstbyname (db, fd, req, key, uid);
67479a70
UD
1034 break;
1035
1036 case GETHOSTBYNAMEv6:
a95a08b4 1037 addhstbynamev6 (db, fd, req, key, uid);
67479a70
UD
1038 break;
1039
1040 case GETHOSTBYADDR:
a95a08b4 1041 addhstbyaddr (db, fd, req, key, uid);
67479a70
UD
1042 break;
1043
1044 case GETHOSTBYADDRv6:
a95a08b4 1045 addhstbyaddrv6 (db, fd, req, key, uid);
67479a70
UD
1046 break;
1047
d19687d6
UD
1048 case GETAI:
1049 addhstai (db, fd, req, key, uid);
1050 break;
1051
f7e7a396
UD
1052 case INITGROUPS:
1053 addinitgroups (db, fd, req, key, uid);
1054 break;
1055
67479a70 1056 case GETSTAT:
67479a70 1057 case SHUTDOWN:
756409c4 1058 case INVALIDATE:
70e2ebba
UD
1059 {
1060 /* Get the callers credentials. */
cedc8559 1061#ifdef SO_PEERCRED
70e2ebba
UD
1062 struct ucred caller;
1063 socklen_t optlen = sizeof (caller);
be3c40b6 1064
70e2ebba
UD
1065 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1066 {
1067 char buf[256];
a1c542bf 1068
70e2ebba
UD
1069 dbg_log (_("error getting callers id: %s"),
1070 strerror_r (errno, buf, sizeof (buf)));
1071 break;
1072 }
a12ce44f 1073
70e2ebba 1074 uid = caller.uid;
a12ce44f 1075#else
70e2ebba
UD
1076 /* Some systems have no SO_PEERCRED implementation. They don't
1077 care about security so we don't as well. */
1078 uid = 0;
cedc8559 1079#endif
70e2ebba 1080 }
a12ce44f
UD
1081
1082 /* Accept shutdown, getstat and invalidate only from root. For
1083 the stat call also allow the user specified in the config file. */
1084 if (req->type == GETSTAT)
1085 {
1086 if (uid == 0 || uid == stat_uid)
1087 send_stats (fd, dbs);
1088 }
1089 else if (uid == 0)
1090 {
1091 if (req->type == INVALIDATE)
a334319f 1092 invalidate_cache (key);
a12ce44f
UD
1093 else
1094 termination_handler (0);
a1c542bf 1095 }
67479a70
UD
1096 break;
1097
c207f23b
UD
1098 case GETFDPW:
1099 case GETFDGR:
1100 case GETFDHST:
1101#ifdef SCM_RIGHTS
1102 send_ro_fd (serv2db[req->type], key, fd);
1103#endif
1104 break;
1105
67479a70 1106 default:
64acf8ed
UD
1107 /* Ignore the command, it's nothing we know. */
1108 break;
d67281a7 1109 }
67479a70
UD
1110}
1111
d67281a7 1112
4401d759
UD
1113/* Restart the process. */
1114static void
1115restart (void)
1116{
1117 /* First determine the parameters. We do not use the parameters
1118 passed to main() since in case nscd is started by running the
1119 dynamic linker this will not work. Yes, this is not the usual
1120 case but nscd is part of glibc and we occasionally do this. */
1121 size_t buflen = 1024;
1122 char *buf = alloca (buflen);
1123 size_t readlen = 0;
1124 int fd = open ("/proc/self/cmdline", O_RDONLY);
1125 if (fd == -1)
1126 {
1127 dbg_log (_("\
1128cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1129 strerror (errno));
1130
1131 paranoia = 0;
1132 return;
1133 }
1134
1135 while (1)
1136 {
1137 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1138 buflen - readlen));
1139 if (n == -1)
1140 {
1141 dbg_log (_("\
1142cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1143 strerror (errno));
1144
1145 close (fd);
1146 paranoia = 0;
1147 return;
1148 }
1149
1150 readlen += n;
1151
1152 if (readlen < buflen)
1153 break;
1154
1155 /* We might have to extend the buffer. */
1156 size_t old_buflen = buflen;
1157 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1158 buf = memmove (newp, buf, old_buflen);
1159 }
1160
1161 close (fd);
1162
1163 /* Parse the command line. Worst case scenario: every two
1164 characters form one parameter (one character plus NUL). */
1165 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1166 int argc = 0;
1167
1168 char *cp = buf;
1169 while (cp < buf + readlen)
1170 {
1171 argv[argc++] = cp;
1172 cp = (char *) rawmemchr (cp, '\0') + 1;
1173 }
1174 argv[argc] = NULL;
1175
1176 /* Second, change back to the old user if we changed it. */
1177 if (server_user != NULL)
1178 {
a334319f 1179 if (setuid (old_uid) != 0)
4401d759
UD
1180 {
1181 dbg_log (_("\
1182cannot change to old UID: %s; disabling paranoia mode"),
1183 strerror (errno));
1184
1185 paranoia = 0;
1186 return;
1187 }
1188
a334319f 1189 if (setgid (old_gid) != 0)
4401d759
UD
1190 {
1191 dbg_log (_("\
1192cannot change to old GID: %s; disabling paranoia mode"),
1193 strerror (errno));
1194
1195 setuid (server_uid);
1196 paranoia = 0;
1197 return;
1198 }
1199 }
1200
1201 /* Next change back to the old working directory. */
1202 if (chdir (oldcwd) == -1)
1203 {
1204 dbg_log (_("\
1205cannot change to old working directory: %s; disabling paranoia mode"),
1206 strerror (errno));
1207
1208 if (server_user != NULL)
1209 {
1210 setuid (server_uid);
1211 setgid (server_gid);
1212 }
1213 paranoia = 0;
1214 return;
1215 }
1216
1217 /* Synchronize memory. */
1218 for (int cnt = 0; cnt < lastdb; ++cnt)
1219 {
1220 /* Make sure nobody keeps using the database. */
1221 dbs[cnt].head->timestamp = 0;
1222
1223 if (dbs[cnt].persistent)
1224 // XXX async OK?
1225 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1226 }
1227
1228 /* The preparations are done. */
1229 execv ("/proc/self/exe", argv);
1230
1231 /* If we come here, we will never be able to re-exec. */
1232 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1233 strerror (errno));
1234
1235 if (server_user != NULL)
1236 {
1237 setuid (server_uid);
1238 setgid (server_gid);
1239 }
233399bc
UD
1240 if (chdir ("/") != 0)
1241 dbg_log (_("cannot change current working directory to \"/\": %s"),
1242 strerror (errno));
4401d759
UD
1243 paranoia = 0;
1244}
1245
1246
1945c96f
UD
1247/* List of file descriptors. */
1248struct fdlist
1249{
1250 int fd;
1251 struct fdlist *next;
1252};
1253/* Memory allocated for the list. */
1254static struct fdlist *fdlist;
1255/* List of currently ready-to-read file descriptors. */
1256static struct fdlist *readylist;
1257
1258/* Conditional variable and mutex to signal availability of entries in
1259 READYLIST. The condvar is initialized dynamically since we might
1260 use a different clock depending on availability. */
1261static pthread_cond_t readylist_cond;
1262static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1263
1264/* The clock to use with the condvar. */
1265static clockid_t timeout_clock = CLOCK_REALTIME;
1266
1267/* Number of threads ready to handle the READYLIST. */
1268static unsigned long int nready;
1269
1270
67479a70
UD
1271/* This is the main loop. It is replicated in different threads but the
1272 `poll' call makes sure only one thread handles an incoming connection. */
1273static void *
1274__attribute__ ((__noreturn__))
1275nscd_run (void *p)
1276{
1945c96f
UD
1277 const long int my_number = (long int) p;
1278 const int run_prune = my_number < lastdb && dbs[my_number].enabled;
1279 struct timespec prune_ts;
1280 int to = 0;
1281 char buf[256];
264d5b94 1282
72ae1e97 1283 if (run_prune)
1945c96f
UD
1284 {
1285 setup_thread (&dbs[my_number]);
081fc592 1286
1945c96f
UD
1287 /* We are running. */
1288 dbs[my_number].head->timestamp = time (NULL);
d67281a7 1289
1945c96f
UD
1290 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1291 /* Should never happen. */
1292 abort ();
0fdb4f42 1293
1945c96f
UD
1294 /* Compute timeout time. */
1295 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1296 }
1297
1298 /* Initial locking. */
1299 pthread_mutex_lock (&readylist_lock);
1300
1301 /* One more thread available. */
1302 ++nready;
0fdb4f42 1303
1945c96f
UD
1304 while (1)
1305 {
1306 while (readylist == NULL)
d6db0975 1307 {
d6db0975
UD
1308 if (run_prune)
1309 {
1945c96f
UD
1310 /* Wait, but not forever. */
1311 to = pthread_cond_timedwait (&readylist_cond, &readylist_lock,
1312 &prune_ts);
1313
1314 /* If we were woken and there is no work to be done,
1315 just start pruning. */
1316 if (readylist == NULL && to == ETIMEDOUT)
1317 {
4401d759 1318 --nready;
1945c96f
UD
1319 pthread_mutex_unlock (&readylist_lock);
1320 goto only_prune;
1321 }
d6db0975 1322 }
1945c96f
UD
1323 else
1324 /* No need to timeout. */
1325 pthread_cond_wait (&readylist_cond, &readylist_lock);
1326 }
0fdb4f42 1327
1945c96f
UD
1328 struct fdlist *it = readylist->next;
1329 if (readylist->next == readylist)
1330 /* Just one entry on the list. */
1331 readylist = NULL;
1332 else
1333 readylist->next = it->next;
0fdb4f42 1334
1945c96f
UD
1335 /* Extract the information and mark the record ready to be used
1336 again. */
1337 int fd = it->fd;
1338 it->next = NULL;
0fdb4f42 1339
1945c96f
UD
1340 /* One more thread available. */
1341 --nready;
67479a70 1342
1945c96f
UD
1343 /* We are done with the list. */
1344 pthread_mutex_unlock (&readylist_lock);
67479a70 1345
1945c96f
UD
1346 /* We do not want to block on a short read or so. */
1347 int fl = fcntl (fd, F_GETFL);
1348 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1349 goto close_and_out;
0fdb4f42
UD
1350
1351 /* Now read the request. */
1945c96f 1352 request_header req;
0fdb4f42
UD
1353 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1354 != sizeof (req), 0))
d67281a7 1355 {
1945c96f
UD
1356 /* We failed to read data. Note that this also might mean we
1357 failed because we would have blocked. */
0fdb4f42
UD
1358 if (debug_level > 0)
1359 dbg_log (_("short read while reading request: %s"),
1360 strerror_r (errno, buf, sizeof (buf)));
1945c96f 1361 goto close_and_out;
0fdb4f42
UD
1362 }
1363
3c82c131
UD
1364 /* Check whether this is a valid request type. */
1365 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1366 goto close_and_out;
1367
0fdb4f42
UD
1368 /* Some systems have no SO_PEERCRED implementation. They don't
1369 care about security so we don't as well. */
1945c96f 1370 uid_t uid = -1;
c86e6aec 1371#ifdef SO_PEERCRED
1945c96f
UD
1372 pid_t pid = 0;
1373
70e2ebba 1374 if (__builtin_expect (debug_level > 0, 0))
0fdb4f42
UD
1375 {
1376 struct ucred caller;
1377 socklen_t optlen = sizeof (caller);
c86e6aec 1378
0fdb4f42
UD
1379 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1380 pid = caller.pid;
1381 }
cedc8559 1382#endif
a1c542bf 1383
0fdb4f42
UD
1384 /* It should not be possible to crash the nscd with a silly
1385 request (i.e., a terribly large key). We limit the size to 1kb. */
1945c96f 1386#define MAXKEYLEN 1024
0fdb4f42 1387 if (__builtin_expect (req.key_len, 1) < 0
1945c96f 1388 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
0fdb4f42
UD
1389 {
1390 if (debug_level > 0)
1391 dbg_log (_("key length in request too long: %d"), req.key_len);
0fdb4f42
UD
1392 }
1393 else
1394 {
1395 /* Get the key. */
1945c96f 1396 char keybuf[MAXKEYLEN];
0fdb4f42
UD
1397
1398 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1399 req.key_len))
1400 != req.key_len, 0))
67479a70 1401 {
1945c96f 1402 /* Again, this can also mean we would have blocked. */
98e75a1c 1403 if (debug_level > 0)
0fdb4f42
UD
1404 dbg_log (_("short read while reading request key: %s"),
1405 strerror_r (errno, buf, sizeof (buf)));
1945c96f 1406 goto close_and_out;
67479a70 1407 }
0fdb4f42
UD
1408
1409 if (__builtin_expect (debug_level, 0) > 0)
67479a70 1410 {
c86e6aec 1411#ifdef SO_PEERCRED
0fdb4f42
UD
1412 if (pid != 0)
1413 dbg_log (_("\
c86e6aec 1414handle_request: request received (Version = %d) from PID %ld"),
0fdb4f42
UD
1415 req.version, (long int) pid);
1416 else
c86e6aec 1417#endif
0fdb4f42 1418 dbg_log (_("\
c86e6aec 1419handle_request: request received (Version = %d)"), req.version);
0fdb4f42 1420 }
c86e6aec 1421
0fdb4f42
UD
1422 /* Phew, we got all the data, now process it. */
1423 handle_request (fd, &req, keybuf, uid);
d67281a7 1424 }
264d5b94 1425
3c82c131
UD
1426 close_and_out:
1427 /* We are done. */
1428 close (fd);
1429
1945c96f
UD
1430 /* Check whether we should be pruning the cache. */
1431 assert (run_prune || to == 0);
1432 if (to == ETIMEDOUT)
1433 {
1434 only_prune:
1435 /* The pthread_cond_timedwait() call timed out. It is time
1436 to clean up the cache. */
1437 assert (my_number < lastdb);
75596b98 1438 prune_cache (&dbs[my_number], time (NULL));
1945c96f
UD
1439
1440 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1441 /* Should never happen. */
1442 abort ();
1443
1444 /* Compute next timeout time. */
1445 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL;
1446
1447 /* In case the list is emtpy we do not want to run the prune
1448 code right away again. */
1449 to = 0;
1450 }
1451
1452 /* Re-locking. */
1453 pthread_mutex_lock (&readylist_lock);
1454
1455 /* One more thread available. */
1456 ++nready;
1457 }
1458}
1459
1460
fc03df7a
UD
1461static unsigned int nconns;
1462
1945c96f 1463static void
fc03df7a 1464fd_ready (int fd)
1945c96f 1465{
fc03df7a
UD
1466 pthread_mutex_lock (&readylist_lock);
1467
1468 /* Find an empty entry in FDLIST. */
1469 size_t inner;
1470 for (inner = 0; inner < nconns; ++inner)
1471 if (fdlist[inner].next == NULL)
1472 break;
1473 assert (inner < nconns);
1474
1475 fdlist[inner].fd = fd;
1476
1477 if (readylist == NULL)
1478 readylist = fdlist[inner].next = &fdlist[inner];
1945c96f 1479 else
fc03df7a
UD
1480 {
1481 fdlist[inner].next = readylist->next;
1482 readylist = readylist->next = &fdlist[inner];
1483 }
1484
1485 bool do_signal = true;
1486 if (__builtin_expect (nready == 0, 0))
1487 {
1488 ++client_queued;
1489 do_signal = false;
27e82856
UD
1490
1491 /* Try to start another thread to help out. */
1492 pthread_t th;
1493 if (nthreads < max_nthreads
1494 && pthread_create (&th, &attr, nscd_run,
1495 (void *) (long int) nthreads) == 0)
1496 {
1497 /* We got another thread. */
1498 ++nthreads;
908c9e87 1499 /* The new thread might need a kick. */
27e82856
UD
1500 do_signal = true;
1501 }
1502
fc03df7a
UD
1503 }
1504
1505 pthread_mutex_unlock (&readylist_lock);
1506
1507 /* Tell one of the worker threads there is work to do. */
1508 if (do_signal)
1509 pthread_cond_signal (&readylist_cond);
1510}
1945c96f 1511
fc03df7a 1512
4401d759
UD
1513/* Check whether restarting should happen. */
1514static inline int
1515restart_p (time_t now)
1516{
1517 return (paranoia && readylist == NULL && nready == nthreads
1518 && now >= restart_time);
1519}
1520
1521
1522/* Array for times a connection was accepted. */
fc03df7a
UD
1523static time_t *starttime;
1524
1525
1526static void
1527__attribute__ ((__noreturn__))
1528main_loop_poll (void)
1529{
1945c96f
UD
1530 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1531 * sizeof (conns[0]));
1532
1945c96f
UD
1533 conns[0].fd = sock;
1534 conns[0].events = POLLRDNORM;
1535 size_t nused = 1;
1536 size_t firstfree = 1;
1537
1538 while (1)
1539 {
1540 /* Wait for any event. We wait at most a couple of seconds so
1541 that we can check whether we should close any of the accepted
1542 connections since we have not received a request. */
1543#define MAX_ACCEPT_TIMEOUT 30
1544#define MIN_ACCEPT_TIMEOUT 5
1545#define MAIN_THREAD_TIMEOUT \
1546 (MAX_ACCEPT_TIMEOUT * 1000 \
1547 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1548
1549 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1550
1551 time_t now = time (NULL);
1552
1553 /* If there is a descriptor ready for reading or there is a new
1554 connection, process this now. */
1555 if (n > 0)
67479a70 1556 {
1945c96f
UD
1557 if (conns[0].revents != 0)
1558 {
1559 /* We have a new incoming connection. Accept the connection. */
1560 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1561
9388dcbb
UD
1562 /* Use the descriptor if we have not reached the limit. */
1563 if (fd >= 0)
1945c96f 1564 {
9388dcbb
UD
1565 if (firstfree < nconns)
1566 {
1567 conns[firstfree].fd = fd;
1568 conns[firstfree].events = POLLRDNORM;
1569 starttime[firstfree] = now;
1570 if (firstfree >= nused)
1571 nused = firstfree + 1;
1572
1573 do
1574 ++firstfree;
1575 while (firstfree < nused && conns[firstfree].fd != -1);
1576 }
1577 else
1578 /* We cannot use the connection so close it. */
1579 close (fd);
1945c96f
UD
1580 }
1581
1945c96f
UD
1582 --n;
1583 }
1584
1585 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1586 if (conns[cnt].revents != 0)
1587 {
fc03df7a 1588 fd_ready (conns[cnt].fd);
1945c96f
UD
1589
1590 /* Clean up the CONNS array. */
1591 conns[cnt].fd = -1;
1592 if (cnt < firstfree)
1593 firstfree = cnt;
1594 if (cnt == nused - 1)
1595 do
1596 --nused;
1597 while (conns[nused - 1].fd == -1);
1598
1599 --n;
1600 }
1601 }
1602
1603 /* Now find entries which have timed out. */
1604 assert (nused > 0);
fc03df7a
UD
1605
1606 /* We make the timeout length depend on the number of file
1607 descriptors currently used. */
1945c96f
UD
1608#define ACCEPT_TIMEOUT \
1609 (MAX_ACCEPT_TIMEOUT \
1610 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
fc03df7a 1611 time_t laststart = now - ACCEPT_TIMEOUT;
1945c96f 1612
fc03df7a
UD
1613 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1614 {
1945c96f
UD
1615 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1616 {
1617 /* Remove the entry, it timed out. */
1618 (void) close (conns[cnt].fd);
1619 conns[cnt].fd = -1;
1620
1621 if (cnt < firstfree)
1622 firstfree = cnt;
1623 if (cnt == nused - 1)
1624 do
1625 --nused;
1626 while (conns[nused - 1].fd == -1);
1627 }
67479a70 1628 }
4401d759
UD
1629
1630 if (restart_p (now))
1631 restart ();
67308730 1632 }
d67281a7
UD
1633}
1634
67479a70 1635
fc03df7a
UD
1636#ifdef HAVE_EPOLL
1637static void
1638main_loop_epoll (int efd)
1639{
1640 struct epoll_event ev = { 0, };
1641 int nused = 1;
1642 size_t highest = 0;
1643
1644 /* Add the socket. */
1645 ev.events = EPOLLRDNORM;
1646 ev.data.fd = sock;
1647 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1648 /* We cannot use epoll. */
1649 return;
1650
1651 while (1)
1652 {
1653 struct epoll_event revs[100];
1654# define nrevs (sizeof (revs) / sizeof (revs[0]))
1655
1656 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1657
1658 time_t now = time (NULL);
1659
1660 for (int cnt = 0; cnt < n; ++cnt)
1661 if (revs[cnt].data.fd == sock)
1662 {
1663 /* A new connection. */
1664 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1665
1666 if (fd >= 0)
1667 {
1668 /* Try to add the new descriptor. */
1669 ev.data.fd = fd;
1670 if (fd >= nconns
1671 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1672 /* The descriptor is too large or something went
1673 wrong. Close the descriptor. */
1674 close (fd);
1675 else
1676 {
1677 /* Remember when we accepted the connection. */
1678 starttime[fd] = now;
1679
1680 if (fd > highest)
1681 highest = fd;
1682
1683 ++nused;
1684 }
1685 }
1686 }
1687 else
1688 {
1689 /* Remove the descriptor from the epoll descriptor. */
908c9e87 1690 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
fc03df7a 1691
2461e3dc 1692 /* Get a worker to handle the request. */
fc03df7a
UD
1693 fd_ready (revs[cnt].data.fd);
1694
1695 /* Reset the time. */
1696 starttime[revs[cnt].data.fd] = 0;
1697 if (revs[cnt].data.fd == highest)
1698 do
1699 --highest;
1700 while (highest > 0 && starttime[highest] == 0);
1701
1702 --nused;
1703 }
1704
1705 /* Now look for descriptors for accepted connections which have
1706 no reply in too long of a time. */
1707 time_t laststart = now - ACCEPT_TIMEOUT;
1708 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1709 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1710 {
1711 /* We are waiting for this one for too long. Close it. */
908c9e87 1712 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
fc03df7a
UD
1713
1714 (void) close (cnt);
1715
1716 starttime[cnt] = 0;
1717 if (cnt == highest)
1718 --highest;
1719 }
1720 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1721 --highest;
4401d759
UD
1722
1723 if (restart_p (now))
1724 restart ();
fc03df7a
UD
1725 }
1726}
1727#endif
1728
1729
67479a70 1730/* Start all the threads we want. The initial process is thread no. 1. */
d67281a7 1731void
67479a70 1732start_threads (void)
d67281a7 1733{
1945c96f
UD
1734 /* Initialize the conditional variable we will use. The only
1735 non-standard attribute we might use is the clock selection. */
1736 pthread_condattr_t condattr;
1737 pthread_condattr_init (&condattr);
1738
3078cba2
UD
1739#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1740 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1945c96f
UD
1741 /* Determine whether the monotonous clock is available. */
1742 struct timespec dummy;
3078cba2 1743# if _POSIX_MONOTONIC_CLOCK == 0
94d824f9 1744 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
3078cba2
UD
1745# endif
1746# if _POSIX_CLOCK_SELECTION == 0
94d824f9 1747 if (sysconf (_SC_CLOCK_SELECTION) > 0)
3078cba2 1748# endif
94d824f9
UD
1749 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1750 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1751 timeout_clock = CLOCK_MONOTONIC;
1945c96f 1752#endif
d67281a7 1753
1945c96f
UD
1754 pthread_cond_init (&readylist_cond, &condattr);
1755 pthread_condattr_destroy (&condattr);
1756
1757
1758 /* Create the attribute for the threads. They are all created
1759 detached. */
67479a70
UD
1760 pthread_attr_init (&attr);
1761 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
27e82856
UD
1762 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1763 pthread_attr_setstacksize (&attr, 1024 * 1024 * (sizeof (void *) / 4));
d67281a7 1764
67479a70
UD
1765 /* We allow less than LASTDB threads only for debugging. */
1766 if (debug_level == 0)
1767 nthreads = MAX (nthreads, lastdb);
d67281a7 1768
27e82856 1769 int nfailed = 0;
1945c96f
UD
1770 for (long int i = 0; i < nthreads; ++i)
1771 {
1772 pthread_t th;
27e82856
UD
1773 if (pthread_create (&th, &attr, nscd_run, (void *) (i - nfailed)) != 0)
1774 ++nfailed;
1775 }
1776 if (nthreads - nfailed < lastdb)
1777 {
1778 /* We could not start enough threads. */
1779 dbg_log (_("could only start %d threads; terminating"),
1780 nthreads - nfailed);
1781 exit (1);
1945c96f 1782 }
d67281a7 1783
fc03df7a
UD
1784 /* Determine how much room for descriptors we should initially
1785 allocate. This might need to change later if we cap the number
1786 with MAXCONN. */
1787 const long int nfds = sysconf (_SC_OPEN_MAX);
1788#define MINCONN 32
1789#define MAXCONN 16384
1790 if (nfds == -1 || nfds > MAXCONN)
1791 nconns = MAXCONN;
1792 else if (nfds < MINCONN)
1793 nconns = MINCONN;
1794 else
1795 nconns = nfds;
1796
1797 /* We need memory to pass descriptors on to the worker threads. */
1798 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1799 /* Array to keep track when connection was accepted. */
1800 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1801
1945c96f
UD
1802 /* In the main thread we execute the loop which handles incoming
1803 connections. */
fc03df7a
UD
1804#ifdef HAVE_EPOLL
1805 int efd = epoll_create (100);
1806 if (efd != -1)
1807 {
1808 main_loop_epoll (efd);
1809 close (efd);
1810 }
1811#endif
1812
1813 main_loop_poll ();
d67281a7 1814}
057685e4
UD
1815
1816
1817/* Look up the uid, gid, and supplementary groups to run nscd as. When
1818 this function is called, we are not listening on the nscd socket yet so
1819 we can just use the ordinary lookup functions without causing a lockup */
1820static void
1821begin_drop_privileges (void)
1822{
a95a08b4 1823 struct passwd *pwd = getpwnam (server_user);
057685e4
UD
1824
1825 if (pwd == NULL)
1826 {
1827 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1828 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1829 server_user);
1830 }
1831
1832 server_uid = pwd->pw_uid;
1833 server_gid = pwd->pw_gid;
1834
4401d759
UD
1835 /* Save the old UID/GID if we have to change back. */
1836 if (paranoia)
1837 {
1838 old_uid = getuid ();
1839 old_gid = getgid ();
1840 }
1841
a95a08b4
UD
1842 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1843 {
1844 /* This really must never happen. */
1845 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1846 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1847 }
057685e4 1848
a95a08b4 1849 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
057685e4
UD
1850
1851 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1852 == -1)
1853 {
1854 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1855 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1856 }
1857}
1858
1859
1860/* Call setgroups(), setgid(), and setuid() to drop root privileges and
1861 run nscd as the user specified in the configuration file. */
1862static void
1863finish_drop_privileges (void)
1864{
1f063dca
UD
1865#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1866 /* We need to preserve the capabilities to connect to the audit daemon. */
1867 cap_t new_caps = preserve_capabilities ();
1868#endif
1869
057685e4
UD
1870 if (setgroups (server_ngroups, server_groups) == -1)
1871 {
1872 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1873 error (EXIT_FAILURE, errno, _("setgroups failed"));
1874 }
1875
a334319f 1876 if (setgid (server_gid) == -1)
057685e4
UD
1877 {
1878 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1879 perror ("setgid");
64d64de6 1880 exit (4);
057685e4
UD
1881 }
1882
a334319f 1883 if (setuid (server_uid) == -1)
057685e4
UD
1884 {
1885 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1886 perror ("setuid");
64d64de6 1887 exit (4);
057685e4 1888 }
1f063dca
UD
1889
1890#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1891 /* Remove the temporary capabilities. */
1892 install_real_capabilities (new_caps);
1893#endif
057685e4 1894}