]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
[BZ #5112]
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2003, 2004, 2005, 2006, 2007, 2008
3 Free Software Foundation, Inc.
4 This file is part of the GNU C Library.
5 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published
9 by the Free Software Foundation; version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software Foundation,
19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21 #include <alloca.h>
22 #include <assert.h>
23 #include <atomic.h>
24 #include <error.h>
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <grp.h>
28 #include <libintl.h>
29 #include <pthread.h>
30 #include <pwd.h>
31 #include <resolv.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <unistd.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_EPOLL
37 # include <sys/epoll.h>
38 #endif
39 #include <sys/mman.h>
40 #include <sys/param.h>
41 #include <sys/poll.h>
42 #ifdef HAVE_SENDFILE
43 # include <sys/sendfile.h>
44 #endif
45 #include <sys/socket.h>
46 #include <sys/stat.h>
47 #include <sys/un.h>
48
49 #include "nscd.h"
50 #include "dbg_log.h"
51 #include "selinux.h"
52 #ifdef HAVE_SENDFILE
53 # include <kernel-features.h>
54 #endif
55
56
57 /* Wrapper functions with error checking for standard functions. */
58 extern void *xmalloc (size_t n);
59 extern void *xcalloc (size_t n, size_t s);
60 extern void *xrealloc (void *o, size_t n);
61
62 /* Support to run nscd as an unprivileged user */
63 const char *server_user;
64 static uid_t server_uid;
65 static gid_t server_gid;
66 const char *stat_user;
67 uid_t stat_uid;
68 static gid_t *server_groups;
69 #ifndef NGROUPS
70 # define NGROUPS 32
71 #endif
72 static int server_ngroups;
73
74 static pthread_attr_t attr;
75
76 static void begin_drop_privileges (void);
77 static void finish_drop_privileges (void);
78
79 /* Map request type to a string. */
80 const char *const serv2str[LASTREQ] =
81 {
82 [GETPWBYNAME] = "GETPWBYNAME",
83 [GETPWBYUID] = "GETPWBYUID",
84 [GETGRBYNAME] = "GETGRBYNAME",
85 [GETGRBYGID] = "GETGRBYGID",
86 [GETHOSTBYNAME] = "GETHOSTBYNAME",
87 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
88 [GETHOSTBYADDR] = "GETHOSTBYADDR",
89 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
90 [SHUTDOWN] = "SHUTDOWN",
91 [GETSTAT] = "GETSTAT",
92 [INVALIDATE] = "INVALIDATE",
93 [GETFDPW] = "GETFDPW",
94 [GETFDGR] = "GETFDGR",
95 [GETFDHST] = "GETFDHST",
96 [GETAI] = "GETAI",
97 [INITGROUPS] = "INITGROUPS",
98 [GETSERVBYNAME] = "GETSERVBYNAME",
99 [GETSERVBYPORT] = "GETSERVBYPORT",
100 [GETFDSERV] = "GETFDSERV"
101 };
102
103 /* The control data structures for the services. */
104 struct database_dyn dbs[lastdb] =
105 {
106 [pwddb] = {
107 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
108 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
109 .enabled = 0,
110 .check_file = 1,
111 .persistent = 0,
112 .propagate = 1,
113 .shared = 0,
114 .max_db_size = DEFAULT_MAX_DB_SIZE,
115 .reset_res = 0,
116 .filename = "/etc/passwd",
117 .db_filename = _PATH_NSCD_PASSWD_DB,
118 .disabled_iov = &pwd_iov_disabled,
119 .postimeout = 3600,
120 .negtimeout = 20,
121 .wr_fd = -1,
122 .ro_fd = -1,
123 .mmap_used = false
124 },
125 [grpdb] = {
126 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
127 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
128 .enabled = 0,
129 .check_file = 1,
130 .persistent = 0,
131 .propagate = 1,
132 .shared = 0,
133 .max_db_size = DEFAULT_MAX_DB_SIZE,
134 .reset_res = 0,
135 .filename = "/etc/group",
136 .db_filename = _PATH_NSCD_GROUP_DB,
137 .disabled_iov = &grp_iov_disabled,
138 .postimeout = 3600,
139 .negtimeout = 60,
140 .wr_fd = -1,
141 .ro_fd = -1,
142 .mmap_used = false
143 },
144 [hstdb] = {
145 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
146 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
147 .enabled = 0,
148 .check_file = 1,
149 .persistent = 0,
150 .propagate = 0, /* Not used. */
151 .shared = 0,
152 .max_db_size = DEFAULT_MAX_DB_SIZE,
153 .reset_res = 1,
154 .filename = "/etc/hosts",
155 .db_filename = _PATH_NSCD_HOSTS_DB,
156 .disabled_iov = &hst_iov_disabled,
157 .postimeout = 3600,
158 .negtimeout = 20,
159 .wr_fd = -1,
160 .ro_fd = -1,
161 .mmap_used = false
162 },
163 [servdb] = {
164 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
165 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
166 .enabled = 0,
167 .check_file = 1,
168 .persistent = 0,
169 .propagate = 0, /* Not used. */
170 .shared = 0,
171 .max_db_size = DEFAULT_MAX_DB_SIZE,
172 .reset_res = 0,
173 .filename = "/etc/services",
174 .db_filename = _PATH_NSCD_SERVICES_DB,
175 .disabled_iov = &serv_iov_disabled,
176 .postimeout = 28800,
177 .negtimeout = 20,
178 .wr_fd = -1,
179 .ro_fd = -1,
180 .mmap_used = false
181 }
182 };
183
184
185 /* Mapping of request type to database. */
186 static struct
187 {
188 bool data_request;
189 struct database_dyn *db;
190 } const reqinfo[LASTREQ] =
191 {
192 [GETPWBYNAME] = { true, &dbs[pwddb] },
193 [GETPWBYUID] = { true, &dbs[pwddb] },
194 [GETGRBYNAME] = { true, &dbs[grpdb] },
195 [GETGRBYGID] = { true, &dbs[grpdb] },
196 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
197 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
198 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
199 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
200 [SHUTDOWN] = { false, NULL },
201 [GETSTAT] = { false, NULL },
202 [SHUTDOWN] = { false, NULL },
203 [GETFDPW] = { false, &dbs[pwddb] },
204 [GETFDGR] = { false, &dbs[grpdb] },
205 [GETFDHST] = { false, &dbs[hstdb] },
206 [GETAI] = { true, &dbs[hstdb] },
207 [INITGROUPS] = { true, &dbs[grpdb] },
208 [GETSERVBYNAME] = { true, &dbs[servdb] },
209 [GETSERVBYPORT] = { true, &dbs[servdb] },
210 [GETFDSERV] = { false, &dbs[servdb] }
211 };
212
213
214 /* Initial number of threads to use. */
215 int nthreads = -1;
216 /* Maximum number of threads to use. */
217 int max_nthreads = 32;
218
219 /* Socket for incoming connections. */
220 static int sock;
221
222 /* Number of times clients had to wait. */
223 unsigned long int client_queued;
224
225
226 ssize_t
227 writeall (int fd, const void *buf, size_t len)
228 {
229 size_t n = len;
230 ssize_t ret;
231 do
232 {
233 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
234 if (ret <= 0)
235 break;
236 buf = (const char *) buf + ret;
237 n -= ret;
238 }
239 while (n > 0);
240 return ret < 0 ? ret : len - n;
241 }
242
243
244 #ifdef HAVE_SENDFILE
245 ssize_t
246 sendfileall (int tofd, int fromfd, off_t off, size_t len)
247 {
248 ssize_t n = len;
249 ssize_t ret;
250
251 do
252 {
253 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
254 if (ret <= 0)
255 break;
256 n -= ret;
257 }
258 while (n > 0);
259 return ret < 0 ? ret : len - n;
260 }
261 #endif
262
263
264 enum usekey
265 {
266 use_not = 0,
267 /* The following three are not really used, they are symbolic constants. */
268 use_first = 16,
269 use_begin = 32,
270 use_end = 64,
271
272 use_he = 1,
273 use_he_begin = use_he | use_begin,
274 use_he_end = use_he | use_end,
275 #if SEPARATE_KEY
276 use_key = 2,
277 use_key_begin = use_key | use_begin,
278 use_key_end = use_key | use_end,
279 use_key_first = use_key_begin | use_first,
280 #endif
281 use_data = 3,
282 use_data_begin = use_data | use_begin,
283 use_data_end = use_data | use_end,
284 use_data_first = use_data_begin | use_first
285 };
286
287
288 static int
289 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
290 enum usekey use, ref_t start, size_t len)
291 {
292 assert (len >= 2);
293
294 if (start > first_free || start + len > first_free
295 || (start & BLOCK_ALIGN_M1))
296 return 0;
297
298 if (usemap[start] == use_not)
299 {
300 /* Add the start marker. */
301 usemap[start] = use | use_begin;
302 use &= ~use_first;
303
304 while (--len > 0)
305 if (usemap[++start] != use_not)
306 return 0;
307 else
308 usemap[start] = use;
309
310 /* Add the end marker. */
311 usemap[start] = use | use_end;
312 }
313 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
314 {
315 /* Hash entries can't be shared. */
316 if (use == use_he)
317 return 0;
318
319 usemap[start] |= (use & use_first);
320 use &= ~use_first;
321
322 while (--len > 1)
323 if (usemap[++start] != use)
324 return 0;
325
326 if (usemap[++start] != (use | use_end))
327 return 0;
328 }
329 else
330 /* Points to a wrong object or somewhere in the middle. */
331 return 0;
332
333 return 1;
334 }
335
336
337 /* Verify data in persistent database. */
338 static int
339 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
340 {
341 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
342
343 time_t now = time (NULL);
344
345 struct database_pers_head *head = mem;
346 struct database_pers_head head_copy = *head;
347
348 /* Check that the header that was read matches the head in the database. */
349 if (readhead != NULL && memcmp (head, readhead, sizeof (*head)) != 0)
350 return 0;
351
352 /* First some easy tests: make sure the database header is sane. */
353 if (head->version != DB_VERSION
354 || head->header_size != sizeof (*head)
355 /* We allow a timestamp to be one hour ahead of the current time.
356 This should cover daylight saving time changes. */
357 || head->timestamp > now + 60 * 60 + 60
358 || (head->gc_cycle & 1)
359 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
360 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
361 || head->first_free < 0
362 || head->first_free > head->data_size
363 || (head->first_free & BLOCK_ALIGN_M1) != 0
364 || head->maxnentries < 0
365 || head->maxnsearched < 0)
366 return 0;
367
368 uint8_t *usemap = calloc (head->first_free, 1);
369 if (usemap == NULL)
370 return 0;
371
372 const char *data = (char *) &head->array[roundup (head->module,
373 ALIGN / sizeof (ref_t))];
374
375 nscd_ssize_t he_cnt = 0;
376 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
377 {
378 ref_t trail = head->array[cnt];
379 ref_t work = trail;
380 int tick = 0;
381
382 while (work != ENDREF)
383 {
384 if (! check_use (data, head->first_free, usemap, use_he, work,
385 sizeof (struct hashentry)))
386 goto fail;
387
388 /* Now we know we can dereference the record. */
389 struct hashentry *here = (struct hashentry *) (data + work);
390
391 ++he_cnt;
392
393 /* Make sure the record is for this type of service. */
394 if (here->type >= LASTREQ
395 || reqinfo[here->type].db != &dbs[dbnr])
396 goto fail;
397
398 /* Validate boolean field value. */
399 if (here->first != false && here->first != true)
400 goto fail;
401
402 if (here->len < 0)
403 goto fail;
404
405 /* Now the data. */
406 if (here->packet < 0
407 || here->packet > head->first_free
408 || here->packet + sizeof (struct datahead) > head->first_free)
409 goto fail;
410
411 struct datahead *dh = (struct datahead *) (data + here->packet);
412
413 if (! check_use (data, head->first_free, usemap,
414 use_data | (here->first ? use_first : 0),
415 here->packet, dh->allocsize))
416 goto fail;
417
418 if (dh->allocsize < sizeof (struct datahead)
419 || dh->recsize > dh->allocsize
420 || (dh->notfound != false && dh->notfound != true)
421 || (dh->usable != false && dh->usable != true))
422 goto fail;
423
424 if (here->key < here->packet + sizeof (struct datahead)
425 || here->key > here->packet + dh->allocsize
426 || here->key + here->len > here->packet + dh->allocsize)
427 {
428 #if SEPARATE_KEY
429 /* If keys can appear outside of data, this should be done
430 instead. But gc doesn't mark the data in that case. */
431 if (! check_use (data, head->first_free, usemap,
432 use_key | (here->first ? use_first : 0),
433 here->key, here->len))
434 #endif
435 goto fail;
436 }
437
438 work = here->next;
439
440 if (work == trail)
441 /* A circular list, this must not happen. */
442 goto fail;
443 if (tick)
444 trail = ((struct hashentry *) (data + trail))->next;
445 tick = 1 - tick;
446 }
447 }
448
449 if (he_cnt != head->nentries)
450 goto fail;
451
452 /* See if all data and keys had at least one reference from
453 he->first == true hashentry. */
454 for (ref_t idx = 0; idx < head->first_free; ++idx)
455 {
456 #if SEPARATE_KEY
457 if (usemap[idx] == use_key_begin)
458 goto fail;
459 #endif
460 if (usemap[idx] == use_data_begin)
461 goto fail;
462 }
463
464 /* Finally, make sure the database hasn't changed since the first test. */
465 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
466 goto fail;
467
468 free (usemap);
469 return 1;
470
471 fail:
472 free (usemap);
473 return 0;
474 }
475
476
477 #ifdef O_CLOEXEC
478 # define EXTRA_O_FLAGS O_CLOEXEC
479 #else
480 # define EXTRA_O_FLAGS 0
481 #endif
482
483
484 /* Initialize database information structures. */
485 void
486 nscd_init (void)
487 {
488 /* Look up unprivileged uid/gid/groups before we start listening on the
489 socket */
490 if (server_user != NULL)
491 begin_drop_privileges ();
492
493 if (nthreads == -1)
494 /* No configuration for this value, assume a default. */
495 nthreads = 4;
496
497 for (size_t cnt = 0; cnt < lastdb; ++cnt)
498 if (dbs[cnt].enabled)
499 {
500 pthread_rwlock_init (&dbs[cnt].lock, NULL);
501 pthread_mutex_init (&dbs[cnt].memlock, NULL);
502
503 if (dbs[cnt].persistent)
504 {
505 /* Try to open the appropriate file on disk. */
506 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
507 if (fd != -1)
508 {
509 struct stat64 st;
510 void *mem;
511 size_t total;
512 struct database_pers_head head;
513 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
514 sizeof (head)));
515 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
516 {
517 fail_db:
518 dbg_log (_("invalid persistent database file \"%s\": %s"),
519 dbs[cnt].db_filename, strerror (errno));
520 unlink (dbs[cnt].db_filename);
521 }
522 else if (head.module == 0 && head.data_size == 0)
523 {
524 /* The file has been created, but the head has not been
525 initialized yet. Remove the old file. */
526 unlink (dbs[cnt].db_filename);
527 }
528 else if (head.header_size != (int) sizeof (head))
529 {
530 dbg_log (_("invalid persistent database file \"%s\": %s"),
531 dbs[cnt].db_filename,
532 _("header size does not match"));
533 unlink (dbs[cnt].db_filename);
534 }
535 else if ((total = (sizeof (head)
536 + roundup (head.module * sizeof (ref_t),
537 ALIGN)
538 + head.data_size))
539 > st.st_size
540 || total < sizeof (head))
541 {
542 dbg_log (_("invalid persistent database file \"%s\": %s"),
543 dbs[cnt].db_filename,
544 _("file size does not match"));
545 unlink (dbs[cnt].db_filename);
546 }
547 /* Note we map with the maximum size allowed for the
548 database. This is likely much larger than the
549 actual file size. This is OK on most OSes since
550 extensions of the underlying file will
551 automatically translate more pages available for
552 memory access. */
553 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
554 PROT_READ | PROT_WRITE,
555 MAP_SHARED, fd, 0))
556 == MAP_FAILED)
557 goto fail_db;
558 else if (!verify_persistent_db (mem, &head, cnt))
559 {
560 munmap (mem, total);
561 dbg_log (_("invalid persistent database file \"%s\": %s"),
562 dbs[cnt].db_filename,
563 _("verification failed"));
564 unlink (dbs[cnt].db_filename);
565 }
566 else
567 {
568 /* Success. We have the database. */
569 dbs[cnt].head = mem;
570 dbs[cnt].memsize = total;
571 dbs[cnt].data = (char *)
572 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
573 ALIGN / sizeof (ref_t))];
574 dbs[cnt].mmap_used = true;
575
576 if (dbs[cnt].suggested_module > head.module)
577 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
578 dbnames[cnt]);
579
580 dbs[cnt].wr_fd = fd;
581 fd = -1;
582 /* We also need a read-only descriptor. */
583 if (dbs[cnt].shared)
584 {
585 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
586 O_RDONLY | EXTRA_O_FLAGS);
587 if (dbs[cnt].ro_fd == -1)
588 dbg_log (_("\
589 cannot create read-only descriptor for \"%s\"; no mmap"),
590 dbs[cnt].db_filename);
591 }
592
593 // XXX Shall we test whether the descriptors actually
594 // XXX point to the same file?
595 }
596
597 /* Close the file descriptors in case something went
598 wrong in which case the variable have not been
599 assigned -1. */
600 if (fd != -1)
601 close (fd);
602 }
603 }
604
605 if (dbs[cnt].head == NULL)
606 {
607 /* No database loaded. Allocate the data structure,
608 possibly on disk. */
609 struct database_pers_head head;
610 size_t total = (sizeof (head)
611 + roundup (dbs[cnt].suggested_module
612 * sizeof (ref_t), ALIGN)
613 + (dbs[cnt].suggested_module
614 * DEFAULT_DATASIZE_PER_BUCKET));
615
616 /* Try to create the database. If we do not need a
617 persistent database create a temporary file. */
618 int fd;
619 int ro_fd = -1;
620 if (dbs[cnt].persistent)
621 {
622 fd = open (dbs[cnt].db_filename,
623 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
624 S_IRUSR | S_IWUSR);
625 if (fd != -1 && dbs[cnt].shared)
626 ro_fd = open (dbs[cnt].db_filename,
627 O_RDONLY | EXTRA_O_FLAGS);
628 }
629 else
630 {
631 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
632 fd = mkostemp (fname, EXTRA_O_FLAGS);
633
634 /* We do not need the file name anymore after we
635 opened another file descriptor in read-only mode. */
636 if (fd != -1)
637 {
638 if (dbs[cnt].shared)
639 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
640
641 unlink (fname);
642 }
643 }
644
645 if (fd == -1)
646 {
647 if (errno == EEXIST)
648 {
649 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
650 dbnames[cnt], dbs[cnt].db_filename);
651 // XXX Correct way to terminate?
652 exit (1);
653 }
654
655 if (dbs[cnt].persistent)
656 dbg_log (_("cannot create %s; no persistent database used"),
657 dbs[cnt].db_filename);
658 else
659 dbg_log (_("cannot create %s; no sharing possible"),
660 dbs[cnt].db_filename);
661
662 dbs[cnt].persistent = 0;
663 // XXX remember: no mmap
664 }
665 else
666 {
667 /* Tell the user if we could not create the read-only
668 descriptor. */
669 if (ro_fd == -1 && dbs[cnt].shared)
670 dbg_log (_("\
671 cannot create read-only descriptor for \"%s\"; no mmap"),
672 dbs[cnt].db_filename);
673
674 /* Before we create the header, initialiye the hash
675 table. So that if we get interrupted if writing
676 the header we can recognize a partially initialized
677 database. */
678 size_t ps = sysconf (_SC_PAGESIZE);
679 char tmpbuf[ps];
680 assert (~ENDREF == 0);
681 memset (tmpbuf, '\xff', ps);
682
683 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
684 off_t offset = sizeof (head);
685
686 size_t towrite;
687 if (offset % ps != 0)
688 {
689 towrite = MIN (remaining, ps - (offset % ps));
690 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
691 goto write_fail;
692 offset += towrite;
693 remaining -= towrite;
694 }
695
696 while (remaining > ps)
697 {
698 if (pwrite (fd, tmpbuf, ps, offset) == -1)
699 goto write_fail;
700 offset += ps;
701 remaining -= ps;
702 }
703
704 if (remaining > 0
705 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
706 goto write_fail;
707
708 /* Create the header of the file. */
709 struct database_pers_head head =
710 {
711 .version = DB_VERSION,
712 .header_size = sizeof (head),
713 .module = dbs[cnt].suggested_module,
714 .data_size = (dbs[cnt].suggested_module
715 * DEFAULT_DATASIZE_PER_BUCKET),
716 .first_free = 0
717 };
718 void *mem;
719
720 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
721 != sizeof (head))
722 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
723 != 0)
724 || (mem = mmap (NULL, dbs[cnt].max_db_size,
725 PROT_READ | PROT_WRITE,
726 MAP_SHARED, fd, 0)) == MAP_FAILED)
727 {
728 write_fail:
729 unlink (dbs[cnt].db_filename);
730 dbg_log (_("cannot write to database file %s: %s"),
731 dbs[cnt].db_filename, strerror (errno));
732 dbs[cnt].persistent = 0;
733 }
734 else
735 {
736 /* Success. */
737 dbs[cnt].head = mem;
738 dbs[cnt].data = (char *)
739 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
740 ALIGN / sizeof (ref_t))];
741 dbs[cnt].memsize = total;
742 dbs[cnt].mmap_used = true;
743
744 /* Remember the descriptors. */
745 dbs[cnt].wr_fd = fd;
746 dbs[cnt].ro_fd = ro_fd;
747 fd = -1;
748 ro_fd = -1;
749 }
750
751 if (fd != -1)
752 close (fd);
753 if (ro_fd != -1)
754 close (ro_fd);
755 }
756 }
757
758 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
759 /* We do not check here whether the O_CLOEXEC provided to the
760 open call was successful or not. The two fcntl calls are
761 only performed once each per process start-up and therefore
762 is not noticeable at all. */
763 if (paranoia
764 && ((dbs[cnt].wr_fd != -1
765 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
766 || (dbs[cnt].ro_fd != -1
767 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
768 {
769 dbg_log (_("\
770 cannot set socket to close on exec: %s; disabling paranoia mode"),
771 strerror (errno));
772 paranoia = 0;
773 }
774 #endif
775
776 if (dbs[cnt].head == NULL)
777 {
778 /* We do not use the persistent database. Just
779 create an in-memory data structure. */
780 assert (! dbs[cnt].persistent);
781
782 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
783 + (dbs[cnt].suggested_module
784 * sizeof (ref_t)));
785 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
786 assert (~ENDREF == 0);
787 memset (dbs[cnt].head->array, '\xff',
788 dbs[cnt].suggested_module * sizeof (ref_t));
789 dbs[cnt].head->module = dbs[cnt].suggested_module;
790 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
791 * dbs[cnt].head->module);
792 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
793 dbs[cnt].head->first_free = 0;
794
795 dbs[cnt].shared = 0;
796 assert (dbs[cnt].ro_fd == -1);
797 }
798
799 if (dbs[cnt].check_file)
800 {
801 /* We need the modification date of the file. */
802 struct stat64 st;
803
804 if (stat64 (dbs[cnt].filename, &st) < 0)
805 {
806 /* We cannot stat() the file, disable file checking. */
807 dbg_log (_("cannot stat() file `%s': %s"),
808 dbs[cnt].filename, strerror (errno));
809 dbs[cnt].check_file = 0;
810 }
811 else
812 dbs[cnt].file_mtime = st.st_mtime;
813 }
814 }
815
816 /* Create the socket. */
817 sock = socket (AF_UNIX, SOCK_STREAM, 0);
818 if (sock < 0)
819 {
820 dbg_log (_("cannot open socket: %s"), strerror (errno));
821 exit (errno == EACCES ? 4 : 1);
822 }
823 /* Bind a name to the socket. */
824 struct sockaddr_un sock_addr;
825 sock_addr.sun_family = AF_UNIX;
826 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
827 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
828 {
829 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
830 exit (errno == EACCES ? 4 : 1);
831 }
832
833 /* We don't want to get stuck on accept. */
834 int fl = fcntl (sock, F_GETFL);
835 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
836 {
837 dbg_log (_("cannot change socket to nonblocking mode: %s"),
838 strerror (errno));
839 exit (1);
840 }
841
842 /* The descriptor needs to be closed on exec. */
843 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
844 {
845 dbg_log (_("cannot set socket to close on exec: %s"),
846 strerror (errno));
847 exit (1);
848 }
849
850 /* Set permissions for the socket. */
851 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
852
853 /* Set the socket up to accept connections. */
854 if (listen (sock, SOMAXCONN) < 0)
855 {
856 dbg_log (_("cannot enable socket to accept connections: %s"),
857 strerror (errno));
858 exit (1);
859 }
860
861 /* Change to unprivileged uid/gid/groups if specifed in config file */
862 if (server_user != NULL)
863 finish_drop_privileges ();
864 }
865
866
867 /* Close the connections. */
868 void
869 close_sockets (void)
870 {
871 close (sock);
872 }
873
874
875 static void
876 invalidate_cache (char *key, int fd)
877 {
878 dbtype number;
879 int32_t resp;
880
881 for (number = pwddb; number < lastdb; ++number)
882 if (strcmp (key, dbnames[number]) == 0)
883 {
884 if (dbs[number].reset_res)
885 res_init ();
886
887 break;
888 }
889
890 if (number == lastdb)
891 {
892 resp = EINVAL;
893 writeall (fd, &resp, sizeof (resp));
894 return;
895 }
896
897 if (dbs[number].enabled)
898 {
899 pthread_mutex_lock (&dbs[number].prune_lock);
900 prune_cache (&dbs[number], LONG_MAX, fd);
901 pthread_mutex_unlock (&dbs[number].prune_lock);
902 }
903 else
904 {
905 resp = 0;
906 writeall (fd, &resp, sizeof (resp));
907 }
908 }
909
910
911 #ifdef SCM_RIGHTS
912 static void
913 send_ro_fd (struct database_dyn *db, char *key, int fd)
914 {
915 /* If we do not have an read-only file descriptor do nothing. */
916 if (db->ro_fd == -1)
917 return;
918
919 /* We need to send some data along with the descriptor. */
920 uint64_t mapsize = (db->head->data_size
921 + roundup (db->head->module * sizeof (ref_t), ALIGN)
922 + sizeof (struct database_pers_head));
923 struct iovec iov[2];
924 iov[0].iov_base = key;
925 iov[0].iov_len = strlen (key) + 1;
926 iov[1].iov_base = &mapsize;
927 iov[1].iov_len = sizeof (mapsize);
928
929 /* Prepare the control message to transfer the descriptor. */
930 union
931 {
932 struct cmsghdr hdr;
933 char bytes[CMSG_SPACE (sizeof (int))];
934 } buf;
935 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
936 .msg_control = buf.bytes,
937 .msg_controllen = sizeof (buf) };
938 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
939
940 cmsg->cmsg_level = SOL_SOCKET;
941 cmsg->cmsg_type = SCM_RIGHTS;
942 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
943
944 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
945
946 msg.msg_controllen = cmsg->cmsg_len;
947
948 /* Send the control message. We repeat when we are interrupted but
949 everything else is ignored. */
950 #ifndef MSG_NOSIGNAL
951 # define MSG_NOSIGNAL 0
952 #endif
953 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
954
955 if (__builtin_expect (debug_level > 0, 0))
956 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
957 }
958 #endif /* SCM_RIGHTS */
959
960
961 /* Handle new request. */
962 static void
963 handle_request (int fd, request_header *req, void *key, uid_t uid)
964 {
965 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
966 {
967 if (debug_level > 0)
968 dbg_log (_("\
969 cannot handle old request version %d; current version is %d"),
970 req->version, NSCD_VERSION);
971 return;
972 }
973
974 /* Perform the SELinux check before we go on to the standard checks. */
975 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
976 {
977 if (debug_level > 0)
978 dbg_log (_("request not handled due to missing permission"));
979 return;
980 }
981
982 struct database_dyn *db = reqinfo[req->type].db;
983
984 /* See whether we can service the request from the cache. */
985 if (__builtin_expect (reqinfo[req->type].data_request, true))
986 {
987 if (__builtin_expect (debug_level, 0) > 0)
988 {
989 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
990 {
991 char buf[INET6_ADDRSTRLEN];
992
993 dbg_log ("\t%s (%s)", serv2str[req->type],
994 inet_ntop (req->type == GETHOSTBYADDR
995 ? AF_INET : AF_INET6,
996 key, buf, sizeof (buf)));
997 }
998 else
999 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1000 }
1001
1002 /* Is this service enabled? */
1003 if (__builtin_expect (!db->enabled, 0))
1004 {
1005 /* No, sent the prepared record. */
1006 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1007 db->disabled_iov->iov_len,
1008 MSG_NOSIGNAL))
1009 != (ssize_t) db->disabled_iov->iov_len
1010 && __builtin_expect (debug_level, 0) > 0)
1011 {
1012 /* We have problems sending the result. */
1013 char buf[256];
1014 dbg_log (_("cannot write result: %s"),
1015 strerror_r (errno, buf, sizeof (buf)));
1016 }
1017
1018 return;
1019 }
1020
1021 /* Be sure we can read the data. */
1022 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1023 {
1024 ++db->head->rdlockdelayed;
1025 pthread_rwlock_rdlock (&db->lock);
1026 }
1027
1028 /* See whether we can handle it from the cache. */
1029 struct datahead *cached;
1030 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1031 db, uid);
1032 if (cached != NULL)
1033 {
1034 /* Hurray it's in the cache. */
1035 ssize_t nwritten;
1036
1037 #ifdef HAVE_SENDFILE
1038 if (__builtin_expect (db->mmap_used, 1))
1039 {
1040 assert (db->wr_fd != -1);
1041 assert ((char *) cached->data > (char *) db->data);
1042 assert ((char *) cached->data - (char *) db->head
1043 + cached->recsize
1044 <= (sizeof (struct database_pers_head)
1045 + db->head->module * sizeof (ref_t)
1046 + db->head->data_size));
1047 nwritten = sendfileall (fd, db->wr_fd,
1048 (char *) cached->data
1049 - (char *) db->head, cached->recsize);
1050 # ifndef __ASSUME_SENDFILE
1051 if (nwritten == -1 && errno == ENOSYS)
1052 goto use_write;
1053 # endif
1054 }
1055 else
1056 # ifndef __ASSUME_SENDFILE
1057 use_write:
1058 # endif
1059 #endif
1060 nwritten = writeall (fd, cached->data, cached->recsize);
1061
1062 if (nwritten != cached->recsize
1063 && __builtin_expect (debug_level, 0) > 0)
1064 {
1065 /* We have problems sending the result. */
1066 char buf[256];
1067 dbg_log (_("cannot write result: %s"),
1068 strerror_r (errno, buf, sizeof (buf)));
1069 }
1070
1071 pthread_rwlock_unlock (&db->lock);
1072
1073 return;
1074 }
1075
1076 pthread_rwlock_unlock (&db->lock);
1077 }
1078 else if (__builtin_expect (debug_level, 0) > 0)
1079 {
1080 if (req->type == INVALIDATE)
1081 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1082 else
1083 dbg_log ("\t%s", serv2str[req->type]);
1084 }
1085
1086 /* Handle the request. */
1087 switch (req->type)
1088 {
1089 case GETPWBYNAME:
1090 addpwbyname (db, fd, req, key, uid);
1091 break;
1092
1093 case GETPWBYUID:
1094 addpwbyuid (db, fd, req, key, uid);
1095 break;
1096
1097 case GETGRBYNAME:
1098 addgrbyname (db, fd, req, key, uid);
1099 break;
1100
1101 case GETGRBYGID:
1102 addgrbygid (db, fd, req, key, uid);
1103 break;
1104
1105 case GETHOSTBYNAME:
1106 addhstbyname (db, fd, req, key, uid);
1107 break;
1108
1109 case GETHOSTBYNAMEv6:
1110 addhstbynamev6 (db, fd, req, key, uid);
1111 break;
1112
1113 case GETHOSTBYADDR:
1114 addhstbyaddr (db, fd, req, key, uid);
1115 break;
1116
1117 case GETHOSTBYADDRv6:
1118 addhstbyaddrv6 (db, fd, req, key, uid);
1119 break;
1120
1121 case GETAI:
1122 addhstai (db, fd, req, key, uid);
1123 break;
1124
1125 case INITGROUPS:
1126 addinitgroups (db, fd, req, key, uid);
1127 break;
1128
1129 case GETSERVBYNAME:
1130 addservbyname (db, fd, req, key, uid);
1131 break;
1132
1133 case GETSERVBYPORT:
1134 addservbyport (db, fd, req, key, uid);
1135 break;
1136
1137 case GETSTAT:
1138 case SHUTDOWN:
1139 case INVALIDATE:
1140 {
1141 /* Get the callers credentials. */
1142 #ifdef SO_PEERCRED
1143 struct ucred caller;
1144 socklen_t optlen = sizeof (caller);
1145
1146 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1147 {
1148 char buf[256];
1149
1150 dbg_log (_("error getting caller's id: %s"),
1151 strerror_r (errno, buf, sizeof (buf)));
1152 break;
1153 }
1154
1155 uid = caller.uid;
1156 #else
1157 /* Some systems have no SO_PEERCRED implementation. They don't
1158 care about security so we don't as well. */
1159 uid = 0;
1160 #endif
1161 }
1162
1163 /* Accept shutdown, getstat and invalidate only from root. For
1164 the stat call also allow the user specified in the config file. */
1165 if (req->type == GETSTAT)
1166 {
1167 if (uid == 0 || uid == stat_uid)
1168 send_stats (fd, dbs);
1169 }
1170 else if (uid == 0)
1171 {
1172 if (req->type == INVALIDATE)
1173 invalidate_cache (key, fd);
1174 else
1175 termination_handler (0);
1176 }
1177 break;
1178
1179 case GETFDPW:
1180 case GETFDGR:
1181 case GETFDHST:
1182 case GETFDSERV:
1183 #ifdef SCM_RIGHTS
1184 send_ro_fd (reqinfo[req->type].db, key, fd);
1185 #endif
1186 break;
1187
1188 default:
1189 /* Ignore the command, it's nothing we know. */
1190 break;
1191 }
1192 }
1193
1194
1195 /* Restart the process. */
1196 static void
1197 restart (void)
1198 {
1199 /* First determine the parameters. We do not use the parameters
1200 passed to main() since in case nscd is started by running the
1201 dynamic linker this will not work. Yes, this is not the usual
1202 case but nscd is part of glibc and we occasionally do this. */
1203 size_t buflen = 1024;
1204 char *buf = alloca (buflen);
1205 size_t readlen = 0;
1206 int fd = open ("/proc/self/cmdline", O_RDONLY);
1207 if (fd == -1)
1208 {
1209 dbg_log (_("\
1210 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1211 strerror (errno));
1212
1213 paranoia = 0;
1214 return;
1215 }
1216
1217 while (1)
1218 {
1219 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1220 buflen - readlen));
1221 if (n == -1)
1222 {
1223 dbg_log (_("\
1224 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1225 strerror (errno));
1226
1227 close (fd);
1228 paranoia = 0;
1229 return;
1230 }
1231
1232 readlen += n;
1233
1234 if (readlen < buflen)
1235 break;
1236
1237 /* We might have to extend the buffer. */
1238 size_t old_buflen = buflen;
1239 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1240 buf = memmove (newp, buf, old_buflen);
1241 }
1242
1243 close (fd);
1244
1245 /* Parse the command line. Worst case scenario: every two
1246 characters form one parameter (one character plus NUL). */
1247 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1248 int argc = 0;
1249
1250 char *cp = buf;
1251 while (cp < buf + readlen)
1252 {
1253 argv[argc++] = cp;
1254 cp = (char *) rawmemchr (cp, '\0') + 1;
1255 }
1256 argv[argc] = NULL;
1257
1258 /* Second, change back to the old user if we changed it. */
1259 if (server_user != NULL)
1260 {
1261 if (setresuid (old_uid, old_uid, old_uid) != 0)
1262 {
1263 dbg_log (_("\
1264 cannot change to old UID: %s; disabling paranoia mode"),
1265 strerror (errno));
1266
1267 paranoia = 0;
1268 return;
1269 }
1270
1271 if (setresgid (old_gid, old_gid, old_gid) != 0)
1272 {
1273 dbg_log (_("\
1274 cannot change to old GID: %s; disabling paranoia mode"),
1275 strerror (errno));
1276
1277 setuid (server_uid);
1278 paranoia = 0;
1279 return;
1280 }
1281 }
1282
1283 /* Next change back to the old working directory. */
1284 if (chdir (oldcwd) == -1)
1285 {
1286 dbg_log (_("\
1287 cannot change to old working directory: %s; disabling paranoia mode"),
1288 strerror (errno));
1289
1290 if (server_user != NULL)
1291 {
1292 setuid (server_uid);
1293 setgid (server_gid);
1294 }
1295 paranoia = 0;
1296 return;
1297 }
1298
1299 /* Synchronize memory. */
1300 for (int cnt = 0; cnt < lastdb; ++cnt)
1301 if (dbs[cnt].enabled)
1302 {
1303 /* Make sure nobody keeps using the database. */
1304 dbs[cnt].head->timestamp = 0;
1305
1306 if (dbs[cnt].persistent)
1307 // XXX async OK?
1308 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1309 }
1310
1311 /* The preparations are done. */
1312 execv ("/proc/self/exe", argv);
1313
1314 /* If we come here, we will never be able to re-exec. */
1315 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1316 strerror (errno));
1317
1318 if (server_user != NULL)
1319 {
1320 setuid (server_uid);
1321 setgid (server_gid);
1322 }
1323 if (chdir ("/") != 0)
1324 dbg_log (_("cannot change current working directory to \"/\": %s"),
1325 strerror (errno));
1326 paranoia = 0;
1327 }
1328
1329
1330 /* List of file descriptors. */
1331 struct fdlist
1332 {
1333 int fd;
1334 struct fdlist *next;
1335 };
1336 /* Memory allocated for the list. */
1337 static struct fdlist *fdlist;
1338 /* List of currently ready-to-read file descriptors. */
1339 static struct fdlist *readylist;
1340
1341 /* Conditional variable and mutex to signal availability of entries in
1342 READYLIST. The condvar is initialized dynamically since we might
1343 use a different clock depending on availability. */
1344 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1345 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1346
1347 /* The clock to use with the condvar. */
1348 static clockid_t timeout_clock = CLOCK_REALTIME;
1349
1350 /* Number of threads ready to handle the READYLIST. */
1351 static unsigned long int nready;
1352
1353
1354 /* Function for the clean-up threads. */
1355 static void *
1356 __attribute__ ((__noreturn__))
1357 nscd_run_prune (void *p)
1358 {
1359 const long int my_number = (long int) p;
1360 assert (dbs[my_number].enabled);
1361
1362 int dont_need_update = setup_thread (&dbs[my_number]);
1363
1364 /* We are running. */
1365 dbs[my_number].head->timestamp = time (NULL);
1366
1367 struct timespec prune_ts;
1368 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1369 /* Should never happen. */
1370 abort ();
1371
1372 /* Compute the initial timeout time. Prevent all the timers to go
1373 off at the same time by adding a db-based value. */
1374 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1375
1376 pthread_mutex_lock (&dbs[my_number].prune_lock);
1377 while (1)
1378 {
1379 /* Wait, but not forever. */
1380 int e = pthread_cond_timedwait (&dbs[my_number].prune_cond,
1381 &dbs[my_number].prune_lock,
1382 &prune_ts);
1383 assert (e == 0 || e == ETIMEDOUT);
1384
1385 time_t next_wait;
1386 time_t now = time (NULL);
1387 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time)
1388 {
1389 next_wait = prune_cache (&dbs[my_number], now, -1);
1390 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1391 /* If clients cannot determine for sure whether nscd is running
1392 we need to wake up occasionally to update the timestamp.
1393 Wait 90% of the update period. */
1394 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1395 if (__builtin_expect (! dont_need_update, 0))
1396 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1397
1398 /* Make it known when we will wake up again. */
1399 dbs[my_number].wakeup_time = now + next_wait;
1400 }
1401 else
1402 /* The cache was just pruned. Do not do it again now. Just
1403 use the new timeout value. */
1404 next_wait = dbs[my_number].wakeup_time - now;
1405
1406 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1407 /* Should never happen. */
1408 abort ();
1409
1410 /* Compute next timeout time. */
1411 prune_ts.tv_sec += next_wait;
1412 }
1413 }
1414
1415
1416 /* This is the main loop. It is replicated in different threads but
1417 the the use of the ready list makes sure only one thread handles an
1418 incoming connection. */
1419 static void *
1420 __attribute__ ((__noreturn__))
1421 nscd_run_worker (void *p)
1422 {
1423 char buf[256];
1424
1425 /* Initial locking. */
1426 pthread_mutex_lock (&readylist_lock);
1427
1428 /* One more thread available. */
1429 ++nready;
1430
1431 while (1)
1432 {
1433 while (readylist == NULL)
1434 pthread_cond_wait (&readylist_cond, &readylist_lock);
1435
1436 struct fdlist *it = readylist->next;
1437 if (readylist->next == readylist)
1438 /* Just one entry on the list. */
1439 readylist = NULL;
1440 else
1441 readylist->next = it->next;
1442
1443 /* Extract the information and mark the record ready to be used
1444 again. */
1445 int fd = it->fd;
1446 it->next = NULL;
1447
1448 /* One more thread available. */
1449 --nready;
1450
1451 /* We are done with the list. */
1452 pthread_mutex_unlock (&readylist_lock);
1453
1454 /* We do not want to block on a short read or so. */
1455 int fl = fcntl (fd, F_GETFL);
1456 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1457 goto close_and_out;
1458
1459 /* Now read the request. */
1460 request_header req;
1461 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1462 != sizeof (req), 0))
1463 {
1464 /* We failed to read data. Note that this also might mean we
1465 failed because we would have blocked. */
1466 if (debug_level > 0)
1467 dbg_log (_("short read while reading request: %s"),
1468 strerror_r (errno, buf, sizeof (buf)));
1469 goto close_and_out;
1470 }
1471
1472 /* Check whether this is a valid request type. */
1473 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1474 goto close_and_out;
1475
1476 /* Some systems have no SO_PEERCRED implementation. They don't
1477 care about security so we don't as well. */
1478 uid_t uid = -1;
1479 #ifdef SO_PEERCRED
1480 pid_t pid = 0;
1481
1482 if (__builtin_expect (debug_level > 0, 0))
1483 {
1484 struct ucred caller;
1485 socklen_t optlen = sizeof (caller);
1486
1487 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1488 pid = caller.pid;
1489 }
1490 #endif
1491
1492 /* It should not be possible to crash the nscd with a silly
1493 request (i.e., a terribly large key). We limit the size to 1kb. */
1494 if (__builtin_expect (req.key_len, 1) < 0
1495 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1496 {
1497 if (debug_level > 0)
1498 dbg_log (_("key length in request too long: %d"), req.key_len);
1499 }
1500 else
1501 {
1502 /* Get the key. */
1503 char keybuf[MAXKEYLEN];
1504
1505 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1506 req.key_len))
1507 != req.key_len, 0))
1508 {
1509 /* Again, this can also mean we would have blocked. */
1510 if (debug_level > 0)
1511 dbg_log (_("short read while reading request key: %s"),
1512 strerror_r (errno, buf, sizeof (buf)));
1513 goto close_and_out;
1514 }
1515
1516 if (__builtin_expect (debug_level, 0) > 0)
1517 {
1518 #ifdef SO_PEERCRED
1519 if (pid != 0)
1520 dbg_log (_("\
1521 handle_request: request received (Version = %d) from PID %ld"),
1522 req.version, (long int) pid);
1523 else
1524 #endif
1525 dbg_log (_("\
1526 handle_request: request received (Version = %d)"), req.version);
1527 }
1528
1529 /* Phew, we got all the data, now process it. */
1530 handle_request (fd, &req, keybuf, uid);
1531 }
1532
1533 close_and_out:
1534 /* We are done. */
1535 close (fd);
1536
1537 /* Re-locking. */
1538 pthread_mutex_lock (&readylist_lock);
1539
1540 /* One more thread available. */
1541 ++nready;
1542 }
1543 }
1544
1545
1546 static unsigned int nconns;
1547
1548 static void
1549 fd_ready (int fd)
1550 {
1551 pthread_mutex_lock (&readylist_lock);
1552
1553 /* Find an empty entry in FDLIST. */
1554 size_t inner;
1555 for (inner = 0; inner < nconns; ++inner)
1556 if (fdlist[inner].next == NULL)
1557 break;
1558 assert (inner < nconns);
1559
1560 fdlist[inner].fd = fd;
1561
1562 if (readylist == NULL)
1563 readylist = fdlist[inner].next = &fdlist[inner];
1564 else
1565 {
1566 fdlist[inner].next = readylist->next;
1567 readylist = readylist->next = &fdlist[inner];
1568 }
1569
1570 bool do_signal = true;
1571 if (__builtin_expect (nready == 0, 0))
1572 {
1573 ++client_queued;
1574 do_signal = false;
1575
1576 /* Try to start another thread to help out. */
1577 pthread_t th;
1578 if (nthreads < max_nthreads
1579 && pthread_create (&th, &attr, nscd_run_worker,
1580 (void *) (long int) nthreads) == 0)
1581 {
1582 /* We got another thread. */
1583 ++nthreads;
1584 /* The new thread might need a kick. */
1585 do_signal = true;
1586 }
1587
1588 }
1589
1590 pthread_mutex_unlock (&readylist_lock);
1591
1592 /* Tell one of the worker threads there is work to do. */
1593 if (do_signal)
1594 pthread_cond_signal (&readylist_cond);
1595 }
1596
1597
1598 /* Check whether restarting should happen. */
1599 static inline int
1600 restart_p (time_t now)
1601 {
1602 return (paranoia && readylist == NULL && nready == nthreads
1603 && now >= restart_time);
1604 }
1605
1606
1607 /* Array for times a connection was accepted. */
1608 static time_t *starttime;
1609
1610
1611 static void
1612 __attribute__ ((__noreturn__))
1613 main_loop_poll (void)
1614 {
1615 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1616 * sizeof (conns[0]));
1617
1618 conns[0].fd = sock;
1619 conns[0].events = POLLRDNORM;
1620 size_t nused = 1;
1621 size_t firstfree = 1;
1622
1623 while (1)
1624 {
1625 /* Wait for any event. We wait at most a couple of seconds so
1626 that we can check whether we should close any of the accepted
1627 connections since we have not received a request. */
1628 #define MAX_ACCEPT_TIMEOUT 30
1629 #define MIN_ACCEPT_TIMEOUT 5
1630 #define MAIN_THREAD_TIMEOUT \
1631 (MAX_ACCEPT_TIMEOUT * 1000 \
1632 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1633
1634 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1635
1636 time_t now = time (NULL);
1637
1638 /* If there is a descriptor ready for reading or there is a new
1639 connection, process this now. */
1640 if (n > 0)
1641 {
1642 if (conns[0].revents != 0)
1643 {
1644 /* We have a new incoming connection. Accept the connection. */
1645 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1646
1647 /* Use the descriptor if we have not reached the limit. */
1648 if (fd >= 0)
1649 {
1650 if (firstfree < nconns)
1651 {
1652 conns[firstfree].fd = fd;
1653 conns[firstfree].events = POLLRDNORM;
1654 starttime[firstfree] = now;
1655 if (firstfree >= nused)
1656 nused = firstfree + 1;
1657
1658 do
1659 ++firstfree;
1660 while (firstfree < nused && conns[firstfree].fd != -1);
1661 }
1662 else
1663 /* We cannot use the connection so close it. */
1664 close (fd);
1665 }
1666
1667 --n;
1668 }
1669
1670 for (size_t cnt = 1; cnt < nused && n > 0; ++cnt)
1671 if (conns[cnt].revents != 0)
1672 {
1673 fd_ready (conns[cnt].fd);
1674
1675 /* Clean up the CONNS array. */
1676 conns[cnt].fd = -1;
1677 if (cnt < firstfree)
1678 firstfree = cnt;
1679 if (cnt == nused - 1)
1680 do
1681 --nused;
1682 while (conns[nused - 1].fd == -1);
1683
1684 --n;
1685 }
1686 }
1687
1688 /* Now find entries which have timed out. */
1689 assert (nused > 0);
1690
1691 /* We make the timeout length depend on the number of file
1692 descriptors currently used. */
1693 #define ACCEPT_TIMEOUT \
1694 (MAX_ACCEPT_TIMEOUT \
1695 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1696 time_t laststart = now - ACCEPT_TIMEOUT;
1697
1698 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1699 {
1700 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1701 {
1702 /* Remove the entry, it timed out. */
1703 (void) close (conns[cnt].fd);
1704 conns[cnt].fd = -1;
1705
1706 if (cnt < firstfree)
1707 firstfree = cnt;
1708 if (cnt == nused - 1)
1709 do
1710 --nused;
1711 while (conns[nused - 1].fd == -1);
1712 }
1713 }
1714
1715 if (restart_p (now))
1716 restart ();
1717 }
1718 }
1719
1720
1721 #ifdef HAVE_EPOLL
1722 static void
1723 main_loop_epoll (int efd)
1724 {
1725 struct epoll_event ev = { 0, };
1726 int nused = 1;
1727 size_t highest = 0;
1728
1729 /* Add the socket. */
1730 ev.events = EPOLLRDNORM;
1731 ev.data.fd = sock;
1732 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1733 /* We cannot use epoll. */
1734 return;
1735
1736 while (1)
1737 {
1738 struct epoll_event revs[100];
1739 # define nrevs (sizeof (revs) / sizeof (revs[0]))
1740
1741 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
1742
1743 time_t now = time (NULL);
1744
1745 for (int cnt = 0; cnt < n; ++cnt)
1746 if (revs[cnt].data.fd == sock)
1747 {
1748 /* A new connection. */
1749 int fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1750
1751 if (fd >= 0)
1752 {
1753 /* Try to add the new descriptor. */
1754 ev.data.fd = fd;
1755 if (fd >= nconns
1756 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
1757 /* The descriptor is too large or something went
1758 wrong. Close the descriptor. */
1759 close (fd);
1760 else
1761 {
1762 /* Remember when we accepted the connection. */
1763 starttime[fd] = now;
1764
1765 if (fd > highest)
1766 highest = fd;
1767
1768 ++nused;
1769 }
1770 }
1771 }
1772 else
1773 {
1774 /* Remove the descriptor from the epoll descriptor. */
1775 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
1776
1777 /* Get a worker to handle the request. */
1778 fd_ready (revs[cnt].data.fd);
1779
1780 /* Reset the time. */
1781 starttime[revs[cnt].data.fd] = 0;
1782 if (revs[cnt].data.fd == highest)
1783 do
1784 --highest;
1785 while (highest > 0 && starttime[highest] == 0);
1786
1787 --nused;
1788 }
1789
1790 /* Now look for descriptors for accepted connections which have
1791 no reply in too long of a time. */
1792 time_t laststart = now - ACCEPT_TIMEOUT;
1793 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
1794 if (cnt != sock && starttime[cnt] != 0 && starttime[cnt] < laststart)
1795 {
1796 /* We are waiting for this one for too long. Close it. */
1797 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
1798
1799 (void) close (cnt);
1800
1801 starttime[cnt] = 0;
1802 if (cnt == highest)
1803 --highest;
1804 }
1805 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
1806 --highest;
1807
1808 if (restart_p (now))
1809 restart ();
1810 }
1811 }
1812 #endif
1813
1814
1815 /* Start all the threads we want. The initial process is thread no. 1. */
1816 void
1817 start_threads (void)
1818 {
1819 /* Initialize the conditional variable we will use. The only
1820 non-standard attribute we might use is the clock selection. */
1821 pthread_condattr_t condattr;
1822 pthread_condattr_init (&condattr);
1823
1824 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
1825 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1826 /* Determine whether the monotonous clock is available. */
1827 struct timespec dummy;
1828 # if _POSIX_MONOTONIC_CLOCK == 0
1829 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
1830 # endif
1831 # if _POSIX_CLOCK_SELECTION == 0
1832 if (sysconf (_SC_CLOCK_SELECTION) > 0)
1833 # endif
1834 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
1835 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
1836 timeout_clock = CLOCK_MONOTONIC;
1837 #endif
1838
1839 /* Create the attribute for the threads. They are all created
1840 detached. */
1841 pthread_attr_init (&attr);
1842 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
1843 /* Use 1MB stacks, twice as much for 64-bit architectures. */
1844 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
1845
1846 /* We allow less than LASTDB threads only for debugging. */
1847 if (debug_level == 0)
1848 nthreads = MAX (nthreads, lastdb);
1849
1850 /* Create the threads which prune the databases. */
1851 // XXX Ideally this work would be done by some of the worker threads.
1852 // XXX But this is problematic since we would need to be able to wake
1853 // XXX them up explicitly as well as part of the group handling the
1854 // XXX ready-list. This requires an operation where we can wait on
1855 // XXX two conditional variables at the same time. This operation
1856 // XXX does not exist (yet).
1857 for (long int i = 0; i < lastdb; ++i)
1858 {
1859 /* Initialize the conditional variable. */
1860 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
1861 {
1862 dbg_log (_("could not initialize conditional variable"));
1863 exit (1);
1864 }
1865
1866 pthread_t th;
1867 if (dbs[i].enabled
1868 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
1869 {
1870 dbg_log (_("could not start clean-up thread; terminating"));
1871 exit (1);
1872 }
1873 }
1874
1875 pthread_condattr_destroy (&condattr);
1876
1877 for (long int i = 0; i < nthreads; ++i)
1878 {
1879 pthread_t th;
1880 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
1881 {
1882 if (i == 0)
1883 {
1884 dbg_log (_("could not start any worker thread; terminating"));
1885 exit (1);
1886 }
1887
1888 break;
1889 }
1890 }
1891
1892 /* Determine how much room for descriptors we should initially
1893 allocate. This might need to change later if we cap the number
1894 with MAXCONN. */
1895 const long int nfds = sysconf (_SC_OPEN_MAX);
1896 #define MINCONN 32
1897 #define MAXCONN 16384
1898 if (nfds == -1 || nfds > MAXCONN)
1899 nconns = MAXCONN;
1900 else if (nfds < MINCONN)
1901 nconns = MINCONN;
1902 else
1903 nconns = nfds;
1904
1905 /* We need memory to pass descriptors on to the worker threads. */
1906 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
1907 /* Array to keep track when connection was accepted. */
1908 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
1909
1910 /* In the main thread we execute the loop which handles incoming
1911 connections. */
1912 #ifdef HAVE_EPOLL
1913 int efd = epoll_create (100);
1914 if (efd != -1)
1915 {
1916 main_loop_epoll (efd);
1917 close (efd);
1918 }
1919 #endif
1920
1921 main_loop_poll ();
1922 }
1923
1924
1925 /* Look up the uid, gid, and supplementary groups to run nscd as. When
1926 this function is called, we are not listening on the nscd socket yet so
1927 we can just use the ordinary lookup functions without causing a lockup */
1928 static void
1929 begin_drop_privileges (void)
1930 {
1931 struct passwd *pwd = getpwnam (server_user);
1932
1933 if (pwd == NULL)
1934 {
1935 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1936 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
1937 server_user);
1938 }
1939
1940 server_uid = pwd->pw_uid;
1941 server_gid = pwd->pw_gid;
1942
1943 /* Save the old UID/GID if we have to change back. */
1944 if (paranoia)
1945 {
1946 old_uid = getuid ();
1947 old_gid = getgid ();
1948 }
1949
1950 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
1951 {
1952 /* This really must never happen. */
1953 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1954 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
1955 }
1956
1957 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
1958
1959 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
1960 == -1)
1961 {
1962 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1963 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
1964 }
1965 }
1966
1967
1968 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
1969 run nscd as the user specified in the configuration file. */
1970 static void
1971 finish_drop_privileges (void)
1972 {
1973 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
1974 /* We need to preserve the capabilities to connect to the audit daemon. */
1975 cap_t new_caps = preserve_capabilities ();
1976 #endif
1977
1978 if (setgroups (server_ngroups, server_groups) == -1)
1979 {
1980 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1981 error (EXIT_FAILURE, errno, _("setgroups failed"));
1982 }
1983
1984 int res;
1985 if (paranoia)
1986 res = setresgid (server_gid, server_gid, old_gid);
1987 else
1988 res = setgid (server_gid);
1989 if (res == -1)
1990 {
1991 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
1992 perror ("setgid");
1993 exit (4);
1994 }
1995
1996 if (paranoia)
1997 res = setresuid (server_uid, server_uid, old_uid);
1998 else
1999 res = setuid (server_uid);
2000 if (res == -1)
2001 {
2002 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2003 perror ("setuid");
2004 exit (4);
2005 }
2006
2007 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2008 /* Remove the temporary capabilities. */
2009 install_real_capabilities (new_caps);
2010 #endif
2011 }