]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
* socket/sys/socket.h: Declare accept4.
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2007, 2008 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19
20 #include <alloca.h>
21 #include <assert.h>
22 #include <atomic.h>
23 #include <error.h>
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <grp.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <arpa/inet.h>
35 #ifdef HAVE_EPOLL
36 # include <sys/epoll.h>
37 #endif
38 #ifdef HAVE_INOTIFY
39 # include <sys/inotify.h>
40 #endif
41 #include <sys/mman.h>
42 #include <sys/param.h>
43 #include <sys/poll.h>
44 #ifdef HAVE_SENDFILE
45 # include <sys/sendfile.h>
46 #endif
47 #include <sys/socket.h>
48 #include <sys/stat.h>
49 #include <sys/un.h>
50
51 #include "nscd.h"
52 #include "dbg_log.h"
53 #include "selinux.h"
54 #include <resolv/resolv.h>
55 #ifdef HAVE_SENDFILE
56 # include <kernel-features.h>
57 #endif
58
59
60 /* Wrapper functions with error checking for standard functions. */
61 extern void *xmalloc (size_t n);
62 extern void *xcalloc (size_t n, size_t s);
63 extern void *xrealloc (void *o, size_t n);
64
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
76
77 static pthread_attr_t attr;
78
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
81
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
84 {
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV"
104 };
105
106 /* The control data structures for the services. */
107 struct database_dyn dbs[lastdb] =
108 {
109 [pwddb] = {
110 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
111 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
112 .enabled = 0,
113 .check_file = 1,
114 .persistent = 0,
115 .propagate = 1,
116 .shared = 0,
117 .max_db_size = DEFAULT_MAX_DB_SIZE,
118 .suggested_module = DEFAULT_SUGGESTED_MODULE,
119 .reset_res = 0,
120 .filename = "/etc/passwd",
121 .db_filename = _PATH_NSCD_PASSWD_DB,
122 .disabled_iov = &pwd_iov_disabled,
123 .postimeout = 3600,
124 .negtimeout = 20,
125 .wr_fd = -1,
126 .ro_fd = -1,
127 .mmap_used = false
128 },
129 [grpdb] = {
130 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
131 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
132 .enabled = 0,
133 .check_file = 1,
134 .persistent = 0,
135 .propagate = 1,
136 .shared = 0,
137 .max_db_size = DEFAULT_MAX_DB_SIZE,
138 .suggested_module = DEFAULT_SUGGESTED_MODULE,
139 .reset_res = 0,
140 .filename = "/etc/group",
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
148 },
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .enabled = 0,
153 .check_file = 1,
154 .persistent = 0,
155 .propagate = 0, /* Not used. */
156 .shared = 0,
157 .max_db_size = DEFAULT_MAX_DB_SIZE,
158 .suggested_module = DEFAULT_SUGGESTED_MODULE,
159 .reset_res = 1,
160 .filename = "/etc/hosts",
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
168 },
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .reset_res = 0,
180 .filename = "/etc/services",
181 .db_filename = _PATH_NSCD_SERVICES_DB,
182 .disabled_iov = &serv_iov_disabled,
183 .postimeout = 28800,
184 .negtimeout = 20,
185 .wr_fd = -1,
186 .ro_fd = -1,
187 .mmap_used = false
188 }
189 };
190
191
192 /* Mapping of request type to database. */
193 static struct
194 {
195 bool data_request;
196 struct database_dyn *db;
197 } const reqinfo[LASTREQ] =
198 {
199 [GETPWBYNAME] = { true, &dbs[pwddb] },
200 [GETPWBYUID] = { true, &dbs[pwddb] },
201 [GETGRBYNAME] = { true, &dbs[grpdb] },
202 [GETGRBYGID] = { true, &dbs[grpdb] },
203 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
204 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
205 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
206 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
207 [SHUTDOWN] = { false, NULL },
208 [GETSTAT] = { false, NULL },
209 [SHUTDOWN] = { false, NULL },
210 [GETFDPW] = { false, &dbs[pwddb] },
211 [GETFDGR] = { false, &dbs[grpdb] },
212 [GETFDHST] = { false, &dbs[hstdb] },
213 [GETAI] = { true, &dbs[hstdb] },
214 [INITGROUPS] = { true, &dbs[grpdb] },
215 [GETSERVBYNAME] = { true, &dbs[servdb] },
216 [GETSERVBYPORT] = { true, &dbs[servdb] },
217 [GETFDSERV] = { false, &dbs[servdb] }
218 };
219
220
221 /* Initial number of threads to use. */
222 int nthreads = -1;
223 /* Maximum number of threads to use. */
224 int max_nthreads = 32;
225
226 /* Socket for incoming connections. */
227 static int sock;
228
229 #ifdef HAVE_INOTIFY
230 /* Inotify descriptor. */
231 static int inotify_fd = -1;
232
233 /* Watch descriptor for resolver configuration file. */
234 static int resolv_conf_descr = -1;
235 #endif
236
237 #ifndef __ASSUME_SOCK_CLOEXEC
238 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
239 before be know the result. */
240 static int have_sock_cloexec;
241 #endif
242 #ifndef __ASSUME_ACCEPT4
243 static int have_accept4;
244 #endif
245
246 /* Number of times clients had to wait. */
247 unsigned long int client_queued;
248
249 /* Data structure for recording in-flight memory allocation. */
250 __thread struct mem_in_flight mem_in_flight attribute_tls_model_ie;
251 /* Global list of the mem_in_flight variables of all the threads. */
252 struct mem_in_flight *mem_in_flight_list;
253
254
255 ssize_t
256 writeall (int fd, const void *buf, size_t len)
257 {
258 size_t n = len;
259 ssize_t ret;
260 do
261 {
262 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
263 if (ret <= 0)
264 break;
265 buf = (const char *) buf + ret;
266 n -= ret;
267 }
268 while (n > 0);
269 return ret < 0 ? ret : len - n;
270 }
271
272
273 #ifdef HAVE_SENDFILE
274 ssize_t
275 sendfileall (int tofd, int fromfd, off_t off, size_t len)
276 {
277 ssize_t n = len;
278 ssize_t ret;
279
280 do
281 {
282 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
283 if (ret <= 0)
284 break;
285 n -= ret;
286 }
287 while (n > 0);
288 return ret < 0 ? ret : len - n;
289 }
290 #endif
291
292
293 enum usekey
294 {
295 use_not = 0,
296 /* The following three are not really used, they are symbolic constants. */
297 use_first = 16,
298 use_begin = 32,
299 use_end = 64,
300
301 use_he = 1,
302 use_he_begin = use_he | use_begin,
303 use_he_end = use_he | use_end,
304 #if SEPARATE_KEY
305 use_key = 2,
306 use_key_begin = use_key | use_begin,
307 use_key_end = use_key | use_end,
308 use_key_first = use_key_begin | use_first,
309 #endif
310 use_data = 3,
311 use_data_begin = use_data | use_begin,
312 use_data_end = use_data | use_end,
313 use_data_first = use_data_begin | use_first
314 };
315
316
317 static int
318 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
319 enum usekey use, ref_t start, size_t len)
320 {
321 assert (len >= 2);
322
323 if (start > first_free || start + len > first_free
324 || (start & BLOCK_ALIGN_M1))
325 return 0;
326
327 if (usemap[start] == use_not)
328 {
329 /* Add the start marker. */
330 usemap[start] = use | use_begin;
331 use &= ~use_first;
332
333 while (--len > 0)
334 if (usemap[++start] != use_not)
335 return 0;
336 else
337 usemap[start] = use;
338
339 /* Add the end marker. */
340 usemap[start] = use | use_end;
341 }
342 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
343 {
344 /* Hash entries can't be shared. */
345 if (use == use_he)
346 return 0;
347
348 usemap[start] |= (use & use_first);
349 use &= ~use_first;
350
351 while (--len > 1)
352 if (usemap[++start] != use)
353 return 0;
354
355 if (usemap[++start] != (use | use_end))
356 return 0;
357 }
358 else
359 /* Points to a wrong object or somewhere in the middle. */
360 return 0;
361
362 return 1;
363 }
364
365
366 /* Verify data in persistent database. */
367 static int
368 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
369 {
370 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb);
371
372 time_t now = time (NULL);
373
374 struct database_pers_head *head = mem;
375 struct database_pers_head head_copy = *head;
376
377 /* Check that the header that was read matches the head in the database. */
378 if (memcmp (head, readhead, sizeof (*head)) != 0)
379 return 0;
380
381 /* First some easy tests: make sure the database header is sane. */
382 if (head->version != DB_VERSION
383 || head->header_size != sizeof (*head)
384 /* We allow a timestamp to be one hour ahead of the current time.
385 This should cover daylight saving time changes. */
386 || head->timestamp > now + 60 * 60 + 60
387 || (head->gc_cycle & 1)
388 || head->module == 0
389 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
390 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
391 || head->first_free < 0
392 || head->first_free > head->data_size
393 || (head->first_free & BLOCK_ALIGN_M1) != 0
394 || head->maxnentries < 0
395 || head->maxnsearched < 0)
396 return 0;
397
398 uint8_t *usemap = calloc (head->first_free, 1);
399 if (usemap == NULL)
400 return 0;
401
402 const char *data = (char *) &head->array[roundup (head->module,
403 ALIGN / sizeof (ref_t))];
404
405 nscd_ssize_t he_cnt = 0;
406 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
407 {
408 ref_t trail = head->array[cnt];
409 ref_t work = trail;
410 int tick = 0;
411
412 while (work != ENDREF)
413 {
414 if (! check_use (data, head->first_free, usemap, use_he, work,
415 sizeof (struct hashentry)))
416 goto fail;
417
418 /* Now we know we can dereference the record. */
419 struct hashentry *here = (struct hashentry *) (data + work);
420
421 ++he_cnt;
422
423 /* Make sure the record is for this type of service. */
424 if (here->type >= LASTREQ
425 || reqinfo[here->type].db != &dbs[dbnr])
426 goto fail;
427
428 /* Validate boolean field value. */
429 if (here->first != false && here->first != true)
430 goto fail;
431
432 if (here->len < 0)
433 goto fail;
434
435 /* Now the data. */
436 if (here->packet < 0
437 || here->packet > head->first_free
438 || here->packet + sizeof (struct datahead) > head->first_free)
439 goto fail;
440
441 struct datahead *dh = (struct datahead *) (data + here->packet);
442
443 if (! check_use (data, head->first_free, usemap,
444 use_data | (here->first ? use_first : 0),
445 here->packet, dh->allocsize))
446 goto fail;
447
448 if (dh->allocsize < sizeof (struct datahead)
449 || dh->recsize > dh->allocsize
450 || (dh->notfound != false && dh->notfound != true)
451 || (dh->usable != false && dh->usable != true))
452 goto fail;
453
454 if (here->key < here->packet + sizeof (struct datahead)
455 || here->key > here->packet + dh->allocsize
456 || here->key + here->len > here->packet + dh->allocsize)
457 {
458 #if SEPARATE_KEY
459 /* If keys can appear outside of data, this should be done
460 instead. But gc doesn't mark the data in that case. */
461 if (! check_use (data, head->first_free, usemap,
462 use_key | (here->first ? use_first : 0),
463 here->key, here->len))
464 #endif
465 goto fail;
466 }
467
468 work = here->next;
469
470 if (work == trail)
471 /* A circular list, this must not happen. */
472 goto fail;
473 if (tick)
474 trail = ((struct hashentry *) (data + trail))->next;
475 tick = 1 - tick;
476 }
477 }
478
479 if (he_cnt != head->nentries)
480 goto fail;
481
482 /* See if all data and keys had at least one reference from
483 he->first == true hashentry. */
484 for (ref_t idx = 0; idx < head->first_free; ++idx)
485 {
486 #if SEPARATE_KEY
487 if (usemap[idx] == use_key_begin)
488 goto fail;
489 #endif
490 if (usemap[idx] == use_data_begin)
491 goto fail;
492 }
493
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
496 goto fail;
497
498 free (usemap);
499 return 1;
500
501 fail:
502 free (usemap);
503 return 0;
504 }
505
506
507 #ifdef O_CLOEXEC
508 # define EXTRA_O_FLAGS O_CLOEXEC
509 #else
510 # define EXTRA_O_FLAGS 0
511 #endif
512
513
514 /* Initialize database information structures. */
515 void
516 nscd_init (void)
517 {
518 /* Look up unprivileged uid/gid/groups before we start listening on the
519 socket */
520 if (server_user != NULL)
521 begin_drop_privileges ();
522
523 if (nthreads == -1)
524 /* No configuration for this value, assume a default. */
525 nthreads = 4;
526
527 #ifdef HAVE_INOTIFY
528 /* Use inotify to recognize changed files. */
529 inotify_fd = inotify_init1 (IN_NONBLOCK);
530 # ifndef __ASSUME_IN_NONBLOCK
531 if (inotify_fd == -1 && errno == ENOSYS)
532 {
533 inotify_fd = inotify_init ();
534 if (inotify_fd != -1)
535 fcntl (inotify_fd, F_SETFL, O_RDONLY | O_NONBLOCK);
536 }
537 # endif
538 #endif
539
540 for (size_t cnt = 0; cnt < lastdb; ++cnt)
541 if (dbs[cnt].enabled)
542 {
543 pthread_rwlock_init (&dbs[cnt].lock, NULL);
544 pthread_mutex_init (&dbs[cnt].memlock, NULL);
545
546 if (dbs[cnt].persistent)
547 {
548 /* Try to open the appropriate file on disk. */
549 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
550 if (fd != -1)
551 {
552 char *msg = NULL;
553 struct stat64 st;
554 void *mem;
555 size_t total;
556 struct database_pers_head head;
557 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
558 sizeof (head)));
559 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
560 {
561 fail_db_errno:
562 /* The code is single-threaded at this point so
563 using strerror is just fine. */
564 msg = strerror (errno);
565 fail_db:
566 dbg_log (_("invalid persistent database file \"%s\": %s"),
567 dbs[cnt].db_filename, msg);
568 unlink (dbs[cnt].db_filename);
569 }
570 else if (head.module == 0 && head.data_size == 0)
571 {
572 /* The file has been created, but the head has not
573 been initialized yet. */
574 msg = _("uninitialized header");
575 goto fail_db;
576 }
577 else if (head.header_size != (int) sizeof (head))
578 {
579 msg = _("header size does not match");
580 goto fail_db;
581 }
582 else if ((total = (sizeof (head)
583 + roundup (head.module * sizeof (ref_t),
584 ALIGN)
585 + head.data_size))
586 > st.st_size
587 || total < sizeof (head))
588 {
589 msg = _("file size does not match");
590 goto fail_db;
591 }
592 /* Note we map with the maximum size allowed for the
593 database. This is likely much larger than the
594 actual file size. This is OK on most OSes since
595 extensions of the underlying file will
596 automatically translate more pages available for
597 memory access. */
598 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
599 PROT_READ | PROT_WRITE,
600 MAP_SHARED, fd, 0))
601 == MAP_FAILED)
602 goto fail_db_errno;
603 else if (!verify_persistent_db (mem, &head, cnt))
604 {
605 munmap (mem, total);
606 msg = _("verification failed");
607 goto fail_db;
608 }
609 else
610 {
611 /* Success. We have the database. */
612 dbs[cnt].head = mem;
613 dbs[cnt].memsize = total;
614 dbs[cnt].data = (char *)
615 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
616 ALIGN / sizeof (ref_t))];
617 dbs[cnt].mmap_used = true;
618
619 if (dbs[cnt].suggested_module > head.module)
620 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
621 dbnames[cnt]);
622
623 dbs[cnt].wr_fd = fd;
624 fd = -1;
625 /* We also need a read-only descriptor. */
626 if (dbs[cnt].shared)
627 {
628 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
629 O_RDONLY | EXTRA_O_FLAGS);
630 if (dbs[cnt].ro_fd == -1)
631 dbg_log (_("\
632 cannot create read-only descriptor for \"%s\"; no mmap"),
633 dbs[cnt].db_filename);
634 }
635
636 // XXX Shall we test whether the descriptors actually
637 // XXX point to the same file?
638 }
639
640 /* Close the file descriptors in case something went
641 wrong in which case the variable have not been
642 assigned -1. */
643 if (fd != -1)
644 close (fd);
645 }
646 }
647
648 if (dbs[cnt].head == NULL)
649 {
650 /* No database loaded. Allocate the data structure,
651 possibly on disk. */
652 struct database_pers_head head;
653 size_t total = (sizeof (head)
654 + roundup (dbs[cnt].suggested_module
655 * sizeof (ref_t), ALIGN)
656 + (dbs[cnt].suggested_module
657 * DEFAULT_DATASIZE_PER_BUCKET));
658
659 /* Try to create the database. If we do not need a
660 persistent database create a temporary file. */
661 int fd;
662 int ro_fd = -1;
663 if (dbs[cnt].persistent)
664 {
665 fd = open (dbs[cnt].db_filename,
666 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
667 S_IRUSR | S_IWUSR);
668 if (fd != -1 && dbs[cnt].shared)
669 ro_fd = open (dbs[cnt].db_filename,
670 O_RDONLY | EXTRA_O_FLAGS);
671 }
672 else
673 {
674 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
675 fd = mkostemp (fname, EXTRA_O_FLAGS);
676
677 /* We do not need the file name anymore after we
678 opened another file descriptor in read-only mode. */
679 if (fd != -1)
680 {
681 if (dbs[cnt].shared)
682 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
683
684 unlink (fname);
685 }
686 }
687
688 if (fd == -1)
689 {
690 if (errno == EEXIST)
691 {
692 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
693 dbnames[cnt], dbs[cnt].db_filename);
694 // XXX Correct way to terminate?
695 exit (1);
696 }
697
698 if (dbs[cnt].persistent)
699 dbg_log (_("cannot create %s; no persistent database used"),
700 dbs[cnt].db_filename);
701 else
702 dbg_log (_("cannot create %s; no sharing possible"),
703 dbs[cnt].db_filename);
704
705 dbs[cnt].persistent = 0;
706 // XXX remember: no mmap
707 }
708 else
709 {
710 /* Tell the user if we could not create the read-only
711 descriptor. */
712 if (ro_fd == -1 && dbs[cnt].shared)
713 dbg_log (_("\
714 cannot create read-only descriptor for \"%s\"; no mmap"),
715 dbs[cnt].db_filename);
716
717 /* Before we create the header, initialiye the hash
718 table. So that if we get interrupted if writing
719 the header we can recognize a partially initialized
720 database. */
721 size_t ps = sysconf (_SC_PAGESIZE);
722 char tmpbuf[ps];
723 assert (~ENDREF == 0);
724 memset (tmpbuf, '\xff', ps);
725
726 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
727 off_t offset = sizeof (head);
728
729 size_t towrite;
730 if (offset % ps != 0)
731 {
732 towrite = MIN (remaining, ps - (offset % ps));
733 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
734 goto write_fail;
735 offset += towrite;
736 remaining -= towrite;
737 }
738
739 while (remaining > ps)
740 {
741 if (pwrite (fd, tmpbuf, ps, offset) == -1)
742 goto write_fail;
743 offset += ps;
744 remaining -= ps;
745 }
746
747 if (remaining > 0
748 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
749 goto write_fail;
750
751 /* Create the header of the file. */
752 struct database_pers_head head =
753 {
754 .version = DB_VERSION,
755 .header_size = sizeof (head),
756 .module = dbs[cnt].suggested_module,
757 .data_size = (dbs[cnt].suggested_module
758 * DEFAULT_DATASIZE_PER_BUCKET),
759 .first_free = 0
760 };
761 void *mem;
762
763 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
764 != sizeof (head))
765 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
766 != 0)
767 || (mem = mmap (NULL, dbs[cnt].max_db_size,
768 PROT_READ | PROT_WRITE,
769 MAP_SHARED, fd, 0)) == MAP_FAILED)
770 {
771 write_fail:
772 unlink (dbs[cnt].db_filename);
773 dbg_log (_("cannot write to database file %s: %s"),
774 dbs[cnt].db_filename, strerror (errno));
775 dbs[cnt].persistent = 0;
776 }
777 else
778 {
779 /* Success. */
780 dbs[cnt].head = mem;
781 dbs[cnt].data = (char *)
782 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
783 ALIGN / sizeof (ref_t))];
784 dbs[cnt].memsize = total;
785 dbs[cnt].mmap_used = true;
786
787 /* Remember the descriptors. */
788 dbs[cnt].wr_fd = fd;
789 dbs[cnt].ro_fd = ro_fd;
790 fd = -1;
791 ro_fd = -1;
792 }
793
794 if (fd != -1)
795 close (fd);
796 if (ro_fd != -1)
797 close (ro_fd);
798 }
799 }
800
801 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
802 /* We do not check here whether the O_CLOEXEC provided to the
803 open call was successful or not. The two fcntl calls are
804 only performed once each per process start-up and therefore
805 is not noticeable at all. */
806 if (paranoia
807 && ((dbs[cnt].wr_fd != -1
808 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
809 || (dbs[cnt].ro_fd != -1
810 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
811 {
812 dbg_log (_("\
813 cannot set socket to close on exec: %s; disabling paranoia mode"),
814 strerror (errno));
815 paranoia = 0;
816 }
817 #endif
818
819 if (dbs[cnt].head == NULL)
820 {
821 /* We do not use the persistent database. Just
822 create an in-memory data structure. */
823 assert (! dbs[cnt].persistent);
824
825 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
826 + (dbs[cnt].suggested_module
827 * sizeof (ref_t)));
828 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
829 assert (~ENDREF == 0);
830 memset (dbs[cnt].head->array, '\xff',
831 dbs[cnt].suggested_module * sizeof (ref_t));
832 dbs[cnt].head->module = dbs[cnt].suggested_module;
833 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
834 * dbs[cnt].head->module);
835 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
836 dbs[cnt].head->first_free = 0;
837
838 dbs[cnt].shared = 0;
839 assert (dbs[cnt].ro_fd == -1);
840 }
841
842 dbs[cnt].inotify_descr = -1;
843 if (dbs[cnt].check_file)
844 {
845 #ifdef HAVE_INOTIFY
846 if (inotify_fd < 0
847 || (dbs[cnt].inotify_descr
848 = inotify_add_watch (inotify_fd, dbs[cnt].filename,
849 IN_DELETE_SELF | IN_MODIFY)) < 0)
850 /* We cannot notice changes in the main thread. */
851 #endif
852 {
853 /* We need the modification date of the file. */
854 struct stat64 st;
855
856 if (stat64 (dbs[cnt].filename, &st) < 0)
857 {
858 /* We cannot stat() the file, disable file checking. */
859 dbg_log (_("cannot stat() file `%s': %s"),
860 dbs[cnt].filename, strerror (errno));
861 dbs[cnt].check_file = 0;
862 }
863 else
864 dbs[cnt].file_mtime = st.st_mtime;
865 }
866 }
867
868 #ifdef HAVE_INOTIFY
869 if (cnt == hstdb && inotify_fd >= -1)
870 /* We also monitor the resolver configuration file. */
871 resolv_conf_descr = inotify_add_watch (inotify_fd,
872 _PATH_RESCONF,
873 IN_DELETE_SELF | IN_MODIFY);
874 #endif
875 }
876
877 /* Create the socket. */
878 #ifndef __ASSUME_SOCK_CLOEXEC
879 sock = -1;
880 if (have_sock_cloexec >= 0)
881 #endif
882 {
883 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
884 #ifndef __ASSUME_SOCK_CLOEXEC
885 if (have_sock_cloexec == 0)
886 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
887 #endif
888 }
889 #ifndef __ASSUME_SOCK_CLOEXEC
890 if (have_sock_cloexec < 0)
891 sock = socket (AF_UNIX, SOCK_STREAM, 0);
892 #endif
893 if (sock < 0)
894 {
895 dbg_log (_("cannot open socket: %s"), strerror (errno));
896 exit (errno == EACCES ? 4 : 1);
897 }
898 /* Bind a name to the socket. */
899 struct sockaddr_un sock_addr;
900 sock_addr.sun_family = AF_UNIX;
901 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
902 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
903 {
904 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
905 exit (errno == EACCES ? 4 : 1);
906 }
907
908 #ifndef __ASSUME_SOCK_CLOEXEC
909 if (have_sock_cloexec < 0)
910 {
911 /* We don't want to get stuck on accept. */
912 int fl = fcntl (sock, F_GETFL);
913 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
914 {
915 dbg_log (_("cannot change socket to nonblocking mode: %s"),
916 strerror (errno));
917 exit (1);
918 }
919
920 /* The descriptor needs to be closed on exec. */
921 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
922 {
923 dbg_log (_("cannot set socket to close on exec: %s"),
924 strerror (errno));
925 exit (1);
926 }
927 }
928 #endif
929
930 /* Set permissions for the socket. */
931 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
932
933 /* Set the socket up to accept connections. */
934 if (listen (sock, SOMAXCONN) < 0)
935 {
936 dbg_log (_("cannot enable socket to accept connections: %s"),
937 strerror (errno));
938 exit (1);
939 }
940
941 /* Change to unprivileged uid/gid/groups if specifed in config file */
942 if (server_user != NULL)
943 finish_drop_privileges ();
944 }
945
946
947 /* Close the connections. */
948 void
949 close_sockets (void)
950 {
951 close (sock);
952 }
953
954
955 static void
956 invalidate_cache (char *key, int fd)
957 {
958 dbtype number;
959 int32_t resp;
960
961 for (number = pwddb; number < lastdb; ++number)
962 if (strcmp (key, dbnames[number]) == 0)
963 {
964 if (dbs[number].reset_res)
965 res_init ();
966
967 break;
968 }
969
970 if (number == lastdb)
971 {
972 resp = EINVAL;
973 writeall (fd, &resp, sizeof (resp));
974 return;
975 }
976
977 if (dbs[number].enabled)
978 {
979 pthread_mutex_lock (&dbs[number].prune_lock);
980 prune_cache (&dbs[number], LONG_MAX, fd);
981 pthread_mutex_unlock (&dbs[number].prune_lock);
982 }
983 else
984 {
985 resp = 0;
986 writeall (fd, &resp, sizeof (resp));
987 }
988 }
989
990
991 #ifdef SCM_RIGHTS
992 static void
993 send_ro_fd (struct database_dyn *db, char *key, int fd)
994 {
995 /* If we do not have an read-only file descriptor do nothing. */
996 if (db->ro_fd == -1)
997 return;
998
999 /* We need to send some data along with the descriptor. */
1000 uint64_t mapsize = (db->head->data_size
1001 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1002 + sizeof (struct database_pers_head));
1003 struct iovec iov[2];
1004 iov[0].iov_base = key;
1005 iov[0].iov_len = strlen (key) + 1;
1006 iov[1].iov_base = &mapsize;
1007 iov[1].iov_len = sizeof (mapsize);
1008
1009 /* Prepare the control message to transfer the descriptor. */
1010 union
1011 {
1012 struct cmsghdr hdr;
1013 char bytes[CMSG_SPACE (sizeof (int))];
1014 } buf;
1015 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1016 .msg_control = buf.bytes,
1017 .msg_controllen = sizeof (buf) };
1018 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1019
1020 cmsg->cmsg_level = SOL_SOCKET;
1021 cmsg->cmsg_type = SCM_RIGHTS;
1022 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1023
1024 *(int *) CMSG_DATA (cmsg) = db->ro_fd;
1025
1026 msg.msg_controllen = cmsg->cmsg_len;
1027
1028 /* Send the control message. We repeat when we are interrupted but
1029 everything else is ignored. */
1030 #ifndef MSG_NOSIGNAL
1031 # define MSG_NOSIGNAL 0
1032 #endif
1033 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1034
1035 if (__builtin_expect (debug_level > 0, 0))
1036 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1037 }
1038 #endif /* SCM_RIGHTS */
1039
1040
1041 /* Handle new request. */
1042 static void
1043 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1044 {
1045 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1046 {
1047 if (debug_level > 0)
1048 dbg_log (_("\
1049 cannot handle old request version %d; current version is %d"),
1050 req->version, NSCD_VERSION);
1051 return;
1052 }
1053
1054 /* Perform the SELinux check before we go on to the standard checks. */
1055 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1056 {
1057 if (debug_level > 0)
1058 {
1059 #ifdef SO_PEERCRED
1060 # ifdef PATH_MAX
1061 char buf[PATH_MAX];
1062 # else
1063 char buf[4096];
1064 # endif
1065
1066 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1067 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1068
1069 if (n <= 0)
1070 dbg_log (_("\
1071 request from %ld not handled due to missing permission"), (long int) pid);
1072 else
1073 {
1074 buf[n] = '\0';
1075 dbg_log (_("\
1076 request from '%s' [%ld] not handled due to missing permission"),
1077 buf, (long int) pid);
1078 }
1079 #else
1080 dbg_log (_("request not handled due to missing permission"));
1081 #endif
1082 }
1083 return;
1084 }
1085
1086 struct database_dyn *db = reqinfo[req->type].db;
1087
1088 /* See whether we can service the request from the cache. */
1089 if (__builtin_expect (reqinfo[req->type].data_request, true))
1090 {
1091 if (__builtin_expect (debug_level, 0) > 0)
1092 {
1093 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1094 {
1095 char buf[INET6_ADDRSTRLEN];
1096
1097 dbg_log ("\t%s (%s)", serv2str[req->type],
1098 inet_ntop (req->type == GETHOSTBYADDR
1099 ? AF_INET : AF_INET6,
1100 key, buf, sizeof (buf)));
1101 }
1102 else
1103 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1104 }
1105
1106 /* Is this service enabled? */
1107 if (__builtin_expect (!db->enabled, 0))
1108 {
1109 /* No, sent the prepared record. */
1110 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1111 db->disabled_iov->iov_len,
1112 MSG_NOSIGNAL))
1113 != (ssize_t) db->disabled_iov->iov_len
1114 && __builtin_expect (debug_level, 0) > 0)
1115 {
1116 /* We have problems sending the result. */
1117 char buf[256];
1118 dbg_log (_("cannot write result: %s"),
1119 strerror_r (errno, buf, sizeof (buf)));
1120 }
1121
1122 return;
1123 }
1124
1125 /* Be sure we can read the data. */
1126 if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0))
1127 {
1128 ++db->head->rdlockdelayed;
1129 pthread_rwlock_rdlock (&db->lock);
1130 }
1131
1132 /* See whether we can handle it from the cache. */
1133 struct datahead *cached;
1134 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1135 db, uid);
1136 if (cached != NULL)
1137 {
1138 /* Hurray it's in the cache. */
1139 ssize_t nwritten;
1140
1141 #ifdef HAVE_SENDFILE
1142 if (__builtin_expect (db->mmap_used, 1))
1143 {
1144 assert (db->wr_fd != -1);
1145 assert ((char *) cached->data > (char *) db->data);
1146 assert ((char *) cached->data - (char *) db->head
1147 + cached->recsize
1148 <= (sizeof (struct database_pers_head)
1149 + db->head->module * sizeof (ref_t)
1150 + db->head->data_size));
1151 nwritten = sendfileall (fd, db->wr_fd,
1152 (char *) cached->data
1153 - (char *) db->head, cached->recsize);
1154 # ifndef __ASSUME_SENDFILE
1155 if (nwritten == -1 && errno == ENOSYS)
1156 goto use_write;
1157 # endif
1158 }
1159 else
1160 # ifndef __ASSUME_SENDFILE
1161 use_write:
1162 # endif
1163 #endif
1164 nwritten = writeall (fd, cached->data, cached->recsize);
1165
1166 if (nwritten != cached->recsize
1167 && __builtin_expect (debug_level, 0) > 0)
1168 {
1169 /* We have problems sending the result. */
1170 char buf[256];
1171 dbg_log (_("cannot write result: %s"),
1172 strerror_r (errno, buf, sizeof (buf)));
1173 }
1174
1175 pthread_rwlock_unlock (&db->lock);
1176
1177 return;
1178 }
1179
1180 pthread_rwlock_unlock (&db->lock);
1181 }
1182 else if (__builtin_expect (debug_level, 0) > 0)
1183 {
1184 if (req->type == INVALIDATE)
1185 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1186 else
1187 dbg_log ("\t%s", serv2str[req->type]);
1188 }
1189
1190 /* Handle the request. */
1191 switch (req->type)
1192 {
1193 case GETPWBYNAME:
1194 addpwbyname (db, fd, req, key, uid);
1195 break;
1196
1197 case GETPWBYUID:
1198 addpwbyuid (db, fd, req, key, uid);
1199 break;
1200
1201 case GETGRBYNAME:
1202 addgrbyname (db, fd, req, key, uid);
1203 break;
1204
1205 case GETGRBYGID:
1206 addgrbygid (db, fd, req, key, uid);
1207 break;
1208
1209 case GETHOSTBYNAME:
1210 addhstbyname (db, fd, req, key, uid);
1211 break;
1212
1213 case GETHOSTBYNAMEv6:
1214 addhstbynamev6 (db, fd, req, key, uid);
1215 break;
1216
1217 case GETHOSTBYADDR:
1218 addhstbyaddr (db, fd, req, key, uid);
1219 break;
1220
1221 case GETHOSTBYADDRv6:
1222 addhstbyaddrv6 (db, fd, req, key, uid);
1223 break;
1224
1225 case GETAI:
1226 addhstai (db, fd, req, key, uid);
1227 break;
1228
1229 case INITGROUPS:
1230 addinitgroups (db, fd, req, key, uid);
1231 break;
1232
1233 case GETSERVBYNAME:
1234 addservbyname (db, fd, req, key, uid);
1235 break;
1236
1237 case GETSERVBYPORT:
1238 addservbyport (db, fd, req, key, uid);
1239 break;
1240
1241 case GETSTAT:
1242 case SHUTDOWN:
1243 case INVALIDATE:
1244 {
1245 /* Get the callers credentials. */
1246 #ifdef SO_PEERCRED
1247 struct ucred caller;
1248 socklen_t optlen = sizeof (caller);
1249
1250 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1251 {
1252 char buf[256];
1253
1254 dbg_log (_("error getting caller's id: %s"),
1255 strerror_r (errno, buf, sizeof (buf)));
1256 break;
1257 }
1258
1259 uid = caller.uid;
1260 #else
1261 /* Some systems have no SO_PEERCRED implementation. They don't
1262 care about security so we don't as well. */
1263 uid = 0;
1264 #endif
1265 }
1266
1267 /* Accept shutdown, getstat and invalidate only from root. For
1268 the stat call also allow the user specified in the config file. */
1269 if (req->type == GETSTAT)
1270 {
1271 if (uid == 0 || uid == stat_uid)
1272 send_stats (fd, dbs);
1273 }
1274 else if (uid == 0)
1275 {
1276 if (req->type == INVALIDATE)
1277 invalidate_cache (key, fd);
1278 else
1279 termination_handler (0);
1280 }
1281 break;
1282
1283 case GETFDPW:
1284 case GETFDGR:
1285 case GETFDHST:
1286 case GETFDSERV:
1287 #ifdef SCM_RIGHTS
1288 send_ro_fd (reqinfo[req->type].db, key, fd);
1289 #endif
1290 break;
1291
1292 default:
1293 /* Ignore the command, it's nothing we know. */
1294 break;
1295 }
1296 }
1297
1298
1299 /* Restart the process. */
1300 static void
1301 restart (void)
1302 {
1303 /* First determine the parameters. We do not use the parameters
1304 passed to main() since in case nscd is started by running the
1305 dynamic linker this will not work. Yes, this is not the usual
1306 case but nscd is part of glibc and we occasionally do this. */
1307 size_t buflen = 1024;
1308 char *buf = alloca (buflen);
1309 size_t readlen = 0;
1310 int fd = open ("/proc/self/cmdline", O_RDONLY);
1311 if (fd == -1)
1312 {
1313 dbg_log (_("\
1314 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1315 strerror (errno));
1316
1317 paranoia = 0;
1318 return;
1319 }
1320
1321 while (1)
1322 {
1323 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1324 buflen - readlen));
1325 if (n == -1)
1326 {
1327 dbg_log (_("\
1328 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1329 strerror (errno));
1330
1331 close (fd);
1332 paranoia = 0;
1333 return;
1334 }
1335
1336 readlen += n;
1337
1338 if (readlen < buflen)
1339 break;
1340
1341 /* We might have to extend the buffer. */
1342 size_t old_buflen = buflen;
1343 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1344 buf = memmove (newp, buf, old_buflen);
1345 }
1346
1347 close (fd);
1348
1349 /* Parse the command line. Worst case scenario: every two
1350 characters form one parameter (one character plus NUL). */
1351 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1352 int argc = 0;
1353
1354 char *cp = buf;
1355 while (cp < buf + readlen)
1356 {
1357 argv[argc++] = cp;
1358 cp = (char *) rawmemchr (cp, '\0') + 1;
1359 }
1360 argv[argc] = NULL;
1361
1362 /* Second, change back to the old user if we changed it. */
1363 if (server_user != NULL)
1364 {
1365 if (setresuid (old_uid, old_uid, old_uid) != 0)
1366 {
1367 dbg_log (_("\
1368 cannot change to old UID: %s; disabling paranoia mode"),
1369 strerror (errno));
1370
1371 paranoia = 0;
1372 return;
1373 }
1374
1375 if (setresgid (old_gid, old_gid, old_gid) != 0)
1376 {
1377 dbg_log (_("\
1378 cannot change to old GID: %s; disabling paranoia mode"),
1379 strerror (errno));
1380
1381 setuid (server_uid);
1382 paranoia = 0;
1383 return;
1384 }
1385 }
1386
1387 /* Next change back to the old working directory. */
1388 if (chdir (oldcwd) == -1)
1389 {
1390 dbg_log (_("\
1391 cannot change to old working directory: %s; disabling paranoia mode"),
1392 strerror (errno));
1393
1394 if (server_user != NULL)
1395 {
1396 setuid (server_uid);
1397 setgid (server_gid);
1398 }
1399 paranoia = 0;
1400 return;
1401 }
1402
1403 /* Synchronize memory. */
1404 int32_t certainly[lastdb];
1405 for (int cnt = 0; cnt < lastdb; ++cnt)
1406 if (dbs[cnt].enabled)
1407 {
1408 /* Make sure nobody keeps using the database. */
1409 dbs[cnt].head->timestamp = 0;
1410 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1411 dbs[cnt].head->nscd_certainly_running = 0;
1412
1413 if (dbs[cnt].persistent)
1414 // XXX async OK?
1415 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1416 }
1417
1418 /* The preparations are done. */
1419 execv ("/proc/self/exe", argv);
1420
1421 /* If we come here, we will never be able to re-exec. */
1422 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1423 strerror (errno));
1424
1425 if (server_user != NULL)
1426 {
1427 setuid (server_uid);
1428 setgid (server_gid);
1429 }
1430 if (chdir ("/") != 0)
1431 dbg_log (_("cannot change current working directory to \"/\": %s"),
1432 strerror (errno));
1433 paranoia = 0;
1434
1435 /* Reenable the databases. */
1436 time_t now = time (NULL);
1437 for (int cnt = 0; cnt < lastdb; ++cnt)
1438 if (dbs[cnt].enabled)
1439 {
1440 dbs[cnt].head->timestamp = now;
1441 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1442 }
1443 }
1444
1445
1446 /* List of file descriptors. */
1447 struct fdlist
1448 {
1449 int fd;
1450 struct fdlist *next;
1451 };
1452 /* Memory allocated for the list. */
1453 static struct fdlist *fdlist;
1454 /* List of currently ready-to-read file descriptors. */
1455 static struct fdlist *readylist;
1456
1457 /* Conditional variable and mutex to signal availability of entries in
1458 READYLIST. The condvar is initialized dynamically since we might
1459 use a different clock depending on availability. */
1460 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1461 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1462
1463 /* The clock to use with the condvar. */
1464 static clockid_t timeout_clock = CLOCK_REALTIME;
1465
1466 /* Number of threads ready to handle the READYLIST. */
1467 static unsigned long int nready;
1468
1469
1470 /* Function for the clean-up threads. */
1471 static void *
1472 __attribute__ ((__noreturn__))
1473 nscd_run_prune (void *p)
1474 {
1475 const long int my_number = (long int) p;
1476 assert (dbs[my_number].enabled);
1477
1478 int dont_need_update = setup_thread (&dbs[my_number]);
1479
1480 time_t now = time (NULL);
1481
1482 /* We are running. */
1483 dbs[my_number].head->timestamp = now;
1484
1485 struct timespec prune_ts;
1486 if (__builtin_expect (clock_gettime (timeout_clock, &prune_ts) == -1, 0))
1487 /* Should never happen. */
1488 abort ();
1489
1490 /* Compute the initial timeout time. Prevent all the timers to go
1491 off at the same time by adding a db-based value. */
1492 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1493 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1494
1495 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1496 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1497
1498 pthread_mutex_lock (prune_lock);
1499 while (1)
1500 {
1501 /* Wait, but not forever. */
1502 int e = 0;
1503 if (! dbs[my_number].clear_cache)
1504 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1505 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1506
1507 time_t next_wait;
1508 now = time (NULL);
1509 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1510 || dbs[my_number].clear_cache)
1511 {
1512 /* We will determine the new timout values based on the
1513 cache content. Should there be concurrent additions to
1514 the cache which are not accounted for in the cache
1515 pruning we want to know about it. Therefore set the
1516 timeout to the maximum. It will be descreased when adding
1517 new entries to the cache, if necessary. */
1518 if (sizeof (time_t) == sizeof (long int))
1519 dbs[my_number].wakeup_time = LONG_MAX;
1520 else
1521 dbs[my_number].wakeup_time = INT_MAX;
1522
1523 /* Unconditionally reset the flag. */
1524 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1525 dbs[my_number].clear_cache = 0;
1526
1527 pthread_mutex_unlock (prune_lock);
1528
1529 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1530
1531 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1532 /* If clients cannot determine for sure whether nscd is running
1533 we need to wake up occasionally to update the timestamp.
1534 Wait 90% of the update period. */
1535 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1536 if (__builtin_expect (! dont_need_update, 0))
1537 {
1538 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1539 dbs[my_number].head->timestamp = now;
1540 }
1541
1542 pthread_mutex_lock (prune_lock);
1543
1544 /* Make it known when we will wake up again. */
1545 if (now + next_wait < dbs[my_number].wakeup_time)
1546 dbs[my_number].wakeup_time = now + next_wait;
1547 else
1548 next_wait = dbs[my_number].wakeup_time - now;
1549 }
1550 else
1551 /* The cache was just pruned. Do not do it again now. Just
1552 use the new timeout value. */
1553 next_wait = dbs[my_number].wakeup_time - now;
1554
1555 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1556 /* Should never happen. */
1557 abort ();
1558
1559 /* Compute next timeout time. */
1560 prune_ts.tv_sec += next_wait;
1561 }
1562 }
1563
1564
1565 /* This is the main loop. It is replicated in different threads but
1566 the the use of the ready list makes sure only one thread handles an
1567 incoming connection. */
1568 static void *
1569 __attribute__ ((__noreturn__))
1570 nscd_run_worker (void *p)
1571 {
1572 char buf[256];
1573
1574 /* Initialize the memory-in-flight list. */
1575 for (enum in_flight idx = 0; idx < IDX_last; ++idx)
1576 mem_in_flight.block[idx].dbidx = -1;
1577 /* And queue this threads structure. */
1578 do
1579 mem_in_flight.next = mem_in_flight_list;
1580 while (atomic_compare_and_exchange_bool_acq (&mem_in_flight_list,
1581 &mem_in_flight,
1582 mem_in_flight.next) != 0);
1583
1584 /* Initial locking. */
1585 pthread_mutex_lock (&readylist_lock);
1586
1587 /* One more thread available. */
1588 ++nready;
1589
1590 while (1)
1591 {
1592 while (readylist == NULL)
1593 pthread_cond_wait (&readylist_cond, &readylist_lock);
1594
1595 struct fdlist *it = readylist->next;
1596 if (readylist->next == readylist)
1597 /* Just one entry on the list. */
1598 readylist = NULL;
1599 else
1600 readylist->next = it->next;
1601
1602 /* Extract the information and mark the record ready to be used
1603 again. */
1604 int fd = it->fd;
1605 it->next = NULL;
1606
1607 /* One more thread available. */
1608 --nready;
1609
1610 /* We are done with the list. */
1611 pthread_mutex_unlock (&readylist_lock);
1612
1613 #ifndef __ASSUME_ACCEPT4
1614 if (have_accept4 < 0)
1615 {
1616 /* We do not want to block on a short read or so. */
1617 int fl = fcntl (fd, F_GETFL);
1618 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1619 goto close_and_out;
1620 }
1621 #endif
1622
1623 /* Now read the request. */
1624 request_header req;
1625 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1626 != sizeof (req), 0))
1627 {
1628 /* We failed to read data. Note that this also might mean we
1629 failed because we would have blocked. */
1630 if (debug_level > 0)
1631 dbg_log (_("short read while reading request: %s"),
1632 strerror_r (errno, buf, sizeof (buf)));
1633 goto close_and_out;
1634 }
1635
1636 /* Check whether this is a valid request type. */
1637 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1638 goto close_and_out;
1639
1640 /* Some systems have no SO_PEERCRED implementation. They don't
1641 care about security so we don't as well. */
1642 uid_t uid = -1;
1643 #ifdef SO_PEERCRED
1644 pid_t pid = 0;
1645
1646 if (__builtin_expect (debug_level > 0, 0))
1647 {
1648 struct ucred caller;
1649 socklen_t optlen = sizeof (caller);
1650
1651 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1652 pid = caller.pid;
1653 }
1654 #else
1655 const pid_t pid = 0;
1656 #endif
1657
1658 /* It should not be possible to crash the nscd with a silly
1659 request (i.e., a terribly large key). We limit the size to 1kb. */
1660 if (__builtin_expect (req.key_len, 1) < 0
1661 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1662 {
1663 if (debug_level > 0)
1664 dbg_log (_("key length in request too long: %d"), req.key_len);
1665 }
1666 else
1667 {
1668 /* Get the key. */
1669 char keybuf[MAXKEYLEN];
1670
1671 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1672 req.key_len))
1673 != req.key_len, 0))
1674 {
1675 /* Again, this can also mean we would have blocked. */
1676 if (debug_level > 0)
1677 dbg_log (_("short read while reading request key: %s"),
1678 strerror_r (errno, buf, sizeof (buf)));
1679 goto close_and_out;
1680 }
1681
1682 if (__builtin_expect (debug_level, 0) > 0)
1683 {
1684 #ifdef SO_PEERCRED
1685 if (pid != 0)
1686 dbg_log (_("\
1687 handle_request: request received (Version = %d) from PID %ld"),
1688 req.version, (long int) pid);
1689 else
1690 #endif
1691 dbg_log (_("\
1692 handle_request: request received (Version = %d)"), req.version);
1693 }
1694
1695 /* Phew, we got all the data, now process it. */
1696 handle_request (fd, &req, keybuf, uid, pid);
1697 }
1698
1699 close_and_out:
1700 /* We are done. */
1701 close (fd);
1702
1703 /* Re-locking. */
1704 pthread_mutex_lock (&readylist_lock);
1705
1706 /* One more thread available. */
1707 ++nready;
1708 }
1709 }
1710
1711
1712 static unsigned int nconns;
1713
1714 static void
1715 fd_ready (int fd)
1716 {
1717 pthread_mutex_lock (&readylist_lock);
1718
1719 /* Find an empty entry in FDLIST. */
1720 size_t inner;
1721 for (inner = 0; inner < nconns; ++inner)
1722 if (fdlist[inner].next == NULL)
1723 break;
1724 assert (inner < nconns);
1725
1726 fdlist[inner].fd = fd;
1727
1728 if (readylist == NULL)
1729 readylist = fdlist[inner].next = &fdlist[inner];
1730 else
1731 {
1732 fdlist[inner].next = readylist->next;
1733 readylist = readylist->next = &fdlist[inner];
1734 }
1735
1736 bool do_signal = true;
1737 if (__builtin_expect (nready == 0, 0))
1738 {
1739 ++client_queued;
1740 do_signal = false;
1741
1742 /* Try to start another thread to help out. */
1743 pthread_t th;
1744 if (nthreads < max_nthreads
1745 && pthread_create (&th, &attr, nscd_run_worker,
1746 (void *) (long int) nthreads) == 0)
1747 {
1748 /* We got another thread. */
1749 ++nthreads;
1750 /* The new thread might need a kick. */
1751 do_signal = true;
1752 }
1753
1754 }
1755
1756 pthread_mutex_unlock (&readylist_lock);
1757
1758 /* Tell one of the worker threads there is work to do. */
1759 if (do_signal)
1760 pthread_cond_signal (&readylist_cond);
1761 }
1762
1763
1764 /* Check whether restarting should happen. */
1765 static inline int
1766 restart_p (time_t now)
1767 {
1768 return (paranoia && readylist == NULL && nready == nthreads
1769 && now >= restart_time);
1770 }
1771
1772
1773 /* Array for times a connection was accepted. */
1774 static time_t *starttime;
1775
1776
1777 static void
1778 __attribute__ ((__noreturn__))
1779 main_loop_poll (void)
1780 {
1781 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1782 * sizeof (conns[0]));
1783
1784 conns[0].fd = sock;
1785 conns[0].events = POLLRDNORM;
1786 size_t nused = 1;
1787 size_t firstfree = 1;
1788
1789 #ifdef HAVE_INOTIFY
1790 if (inotify_fd != -1)
1791 {
1792 conns[1].fd = inotify_fd;
1793 conns[1].events = POLLRDNORM;
1794 nused = 2;
1795 firstfree = 2;
1796 }
1797 #endif
1798
1799 while (1)
1800 {
1801 /* Wait for any event. We wait at most a couple of seconds so
1802 that we can check whether we should close any of the accepted
1803 connections since we have not received a request. */
1804 #define MAX_ACCEPT_TIMEOUT 30
1805 #define MIN_ACCEPT_TIMEOUT 5
1806 #define MAIN_THREAD_TIMEOUT \
1807 (MAX_ACCEPT_TIMEOUT * 1000 \
1808 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1809
1810 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1811
1812 time_t now = time (NULL);
1813
1814 /* If there is a descriptor ready for reading or there is a new
1815 connection, process this now. */
1816 if (n > 0)
1817 {
1818 if (conns[0].revents != 0)
1819 {
1820 /* We have a new incoming connection. Accept the connection. */
1821 int fd;
1822
1823 #ifndef __ASSUME_ACCEPT4
1824 fd = -1;
1825 if (have_accept4 >= 0)
1826 #endif
1827 {
1828 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1829 SOCK_NONBLOCK));
1830 #ifndef __ASSUME_ACCEPT4
1831 if (have_accept4 == 0)
1832 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1833 #endif
1834 }
1835 #ifndef __ASSUME_ACCEPT4
1836 if (have_accept4 < 0)
1837 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
1838 #endif
1839
1840 /* Use the descriptor if we have not reached the limit. */
1841 if (fd >= 0)
1842 {
1843 if (firstfree < nconns)
1844 {
1845 conns[firstfree].fd = fd;
1846 conns[firstfree].events = POLLRDNORM;
1847 starttime[firstfree] = now;
1848 if (firstfree >= nused)
1849 nused = firstfree + 1;
1850
1851 do
1852 ++firstfree;
1853 while (firstfree < nused && conns[firstfree].fd != -1);
1854 }
1855 else
1856 /* We cannot use the connection so close it. */
1857 close (fd);
1858 }
1859
1860 --n;
1861 }
1862
1863 size_t first = 1;
1864 #ifdef HAVE_INOTIFY
1865 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
1866 {
1867 if (conns[1].revents != 0)
1868 {
1869 bool to_clear[lastdb] = { false, };
1870 union
1871 {
1872 # ifndef PATH_MAX
1873 # define PATH_MAX 1024
1874 # endif
1875 struct inotify_event i;
1876 char buf[sizeof (struct inotify_event) + PATH_MAX];
1877 } inev;
1878
1879 while (1)
1880 {
1881 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
1882 sizeof (inev)));
1883 if (nb < (ssize_t) sizeof (struct inotify_event))
1884 {
1885 if (__builtin_expect (nb == -1 && errno != EAGAIN,
1886 0))
1887 {
1888 /* Something went wrong when reading the inotify
1889 data. Better disable inotify. */
1890 dbg_log (_("\
1891 disabled inotify after read error %d"),
1892 errno);
1893 conns[1].fd = -1;
1894 firstfree = 1;
1895 if (nused == 2)
1896 nused = 1;
1897 close (inotify_fd);
1898 inotify_fd = -1;
1899 }
1900 break;
1901 }
1902
1903 /* Check which of the files changed. */
1904 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1905 if (inev.i.wd == dbs[dbcnt].inotify_descr)
1906 {
1907 to_clear[dbcnt] = true;
1908 goto next;
1909 }
1910
1911 if (inev.i.wd == resolv_conf_descr)
1912 {
1913 res_init ();
1914 to_clear[hstdb] = true;
1915 }
1916 next:;
1917 }
1918
1919 /* Actually perform the cache clearing. */
1920 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1921 if (to_clear[dbcnt])
1922 {
1923 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1924 dbs[dbcnt].clear_cache = 1;
1925 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1926 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1927 }
1928
1929 --n;
1930 }
1931
1932 first = 2;
1933 }
1934 #endif
1935
1936 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
1937 if (conns[cnt].revents != 0)
1938 {
1939 fd_ready (conns[cnt].fd);
1940
1941 /* Clean up the CONNS array. */
1942 conns[cnt].fd = -1;
1943 if (cnt < firstfree)
1944 firstfree = cnt;
1945 if (cnt == nused - 1)
1946 do
1947 --nused;
1948 while (conns[nused - 1].fd == -1);
1949
1950 --n;
1951 }
1952 }
1953
1954 /* Now find entries which have timed out. */
1955 assert (nused > 0);
1956
1957 /* We make the timeout length depend on the number of file
1958 descriptors currently used. */
1959 #define ACCEPT_TIMEOUT \
1960 (MAX_ACCEPT_TIMEOUT \
1961 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
1962 time_t laststart = now - ACCEPT_TIMEOUT;
1963
1964 for (size_t cnt = nused - 1; cnt > 0; --cnt)
1965 {
1966 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
1967 {
1968 /* Remove the entry, it timed out. */
1969 (void) close (conns[cnt].fd);
1970 conns[cnt].fd = -1;
1971
1972 if (cnt < firstfree)
1973 firstfree = cnt;
1974 if (cnt == nused - 1)
1975 do
1976 --nused;
1977 while (conns[nused - 1].fd == -1);
1978 }
1979 }
1980
1981 if (restart_p (now))
1982 restart ();
1983 }
1984 }
1985
1986
1987 #ifdef HAVE_EPOLL
1988 static void
1989 main_loop_epoll (int efd)
1990 {
1991 struct epoll_event ev = { 0, };
1992 int nused = 1;
1993 size_t highest = 0;
1994
1995 /* Add the socket. */
1996 ev.events = EPOLLRDNORM;
1997 ev.data.fd = sock;
1998 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
1999 /* We cannot use epoll. */
2000 return;
2001
2002 # ifdef HAVE_INOTIFY
2003 if (inotify_fd != -1)
2004 {
2005 ev.events = EPOLLRDNORM;
2006 ev.data.fd = inotify_fd;
2007 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2008 /* We cannot use epoll. */
2009 return;
2010 nused = 2;
2011 }
2012 # endif
2013
2014 while (1)
2015 {
2016 struct epoll_event revs[100];
2017 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2018
2019 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2020
2021 time_t now = time (NULL);
2022
2023 for (int cnt = 0; cnt < n; ++cnt)
2024 if (revs[cnt].data.fd == sock)
2025 {
2026 /* A new connection. */
2027 int fd;
2028
2029 # ifndef __ASSUME_ACCEPT4
2030 fd = -1;
2031 if (have_accept4 >= 0)
2032 # endif
2033 {
2034 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2035 SOCK_NONBLOCK));
2036 # ifndef __ASSUME_ACCEPT4
2037 if (have_accept4 == 0)
2038 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2039 # endif
2040 }
2041 # ifndef __ASSUME_ACCEPT4
2042 if (have_accept4 < 0)
2043 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2044 # endif
2045
2046 /* Use the descriptor if we have not reached the limit. */
2047 if (fd >= 0)
2048 {
2049 /* Try to add the new descriptor. */
2050 ev.data.fd = fd;
2051 if (fd >= nconns
2052 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2053 /* The descriptor is too large or something went
2054 wrong. Close the descriptor. */
2055 close (fd);
2056 else
2057 {
2058 /* Remember when we accepted the connection. */
2059 starttime[fd] = now;
2060
2061 if (fd > highest)
2062 highest = fd;
2063
2064 ++nused;
2065 }
2066 }
2067 }
2068 # ifdef HAVE_INOTIFY
2069 else if (revs[cnt].data.fd == inotify_fd)
2070 {
2071 bool to_clear[lastdb] = { false, };
2072 union
2073 {
2074 struct inotify_event i;
2075 char buf[sizeof (struct inotify_event) + PATH_MAX];
2076 } inev;
2077
2078 while (1)
2079 {
2080 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2081 sizeof (inev)));
2082 if (nb < (ssize_t) sizeof (struct inotify_event))
2083 {
2084 if (__builtin_expect (nb == -1 && errno != EAGAIN, 0))
2085 {
2086 /* Something went wrong when reading the inotify
2087 data. Better disable inotify. */
2088 dbg_log (_("disabled inotify after read error %d"),
2089 errno);
2090 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2091 NULL);
2092 close (inotify_fd);
2093 inotify_fd = -1;
2094 }
2095 break;
2096 }
2097
2098 /* Check which of the files changed. */
2099 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2100 if (inev.i.wd == dbs[dbcnt].inotify_descr)
2101 {
2102 to_clear[dbcnt] = true;
2103 goto next;
2104 }
2105
2106 if (inev.i.wd == resolv_conf_descr)
2107 {
2108 res_init ();
2109 to_clear[hstdb] = true;
2110 }
2111 next:;
2112 }
2113
2114 /* Actually perform the cache clearing. */
2115 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2116 if (to_clear[dbcnt])
2117 {
2118 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2119 dbs[dbcnt].clear_cache = 1;
2120 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2121 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2122 }
2123 }
2124 # endif
2125 else
2126 {
2127 /* Remove the descriptor from the epoll descriptor. */
2128 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2129
2130 /* Get a worker to handle the request. */
2131 fd_ready (revs[cnt].data.fd);
2132
2133 /* Reset the time. */
2134 starttime[revs[cnt].data.fd] = 0;
2135 if (revs[cnt].data.fd == highest)
2136 do
2137 --highest;
2138 while (highest > 0 && starttime[highest] == 0);
2139
2140 --nused;
2141 }
2142
2143 /* Now look for descriptors for accepted connections which have
2144 no reply in too long of a time. */
2145 time_t laststart = now - ACCEPT_TIMEOUT;
2146 assert (starttime[sock] == 0);
2147 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2148 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2149 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2150 {
2151 /* We are waiting for this one for too long. Close it. */
2152 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2153
2154 (void) close (cnt);
2155
2156 starttime[cnt] = 0;
2157 if (cnt == highest)
2158 --highest;
2159 }
2160 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2161 --highest;
2162
2163 if (restart_p (now))
2164 restart ();
2165 }
2166 }
2167 #endif
2168
2169
2170 /* Start all the threads we want. The initial process is thread no. 1. */
2171 void
2172 start_threads (void)
2173 {
2174 /* Initialize the conditional variable we will use. The only
2175 non-standard attribute we might use is the clock selection. */
2176 pthread_condattr_t condattr;
2177 pthread_condattr_init (&condattr);
2178
2179 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2180 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2181 /* Determine whether the monotonous clock is available. */
2182 struct timespec dummy;
2183 # if _POSIX_MONOTONIC_CLOCK == 0
2184 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2185 # endif
2186 # if _POSIX_CLOCK_SELECTION == 0
2187 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2188 # endif
2189 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2190 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2191 timeout_clock = CLOCK_MONOTONIC;
2192 #endif
2193
2194 /* Create the attribute for the threads. They are all created
2195 detached. */
2196 pthread_attr_init (&attr);
2197 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2198 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2199 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2200
2201 /* We allow less than LASTDB threads only for debugging. */
2202 if (debug_level == 0)
2203 nthreads = MAX (nthreads, lastdb);
2204
2205 /* Create the threads which prune the databases. */
2206 // XXX Ideally this work would be done by some of the worker threads.
2207 // XXX But this is problematic since we would need to be able to wake
2208 // XXX them up explicitly as well as part of the group handling the
2209 // XXX ready-list. This requires an operation where we can wait on
2210 // XXX two conditional variables at the same time. This operation
2211 // XXX does not exist (yet).
2212 for (long int i = 0; i < lastdb; ++i)
2213 {
2214 /* Initialize the conditional variable. */
2215 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2216 {
2217 dbg_log (_("could not initialize conditional variable"));
2218 exit (1);
2219 }
2220
2221 pthread_t th;
2222 if (dbs[i].enabled
2223 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2224 {
2225 dbg_log (_("could not start clean-up thread; terminating"));
2226 exit (1);
2227 }
2228 }
2229
2230 pthread_condattr_destroy (&condattr);
2231
2232 for (long int i = 0; i < nthreads; ++i)
2233 {
2234 pthread_t th;
2235 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2236 {
2237 if (i == 0)
2238 {
2239 dbg_log (_("could not start any worker thread; terminating"));
2240 exit (1);
2241 }
2242
2243 break;
2244 }
2245 }
2246
2247 /* Determine how much room for descriptors we should initially
2248 allocate. This might need to change later if we cap the number
2249 with MAXCONN. */
2250 const long int nfds = sysconf (_SC_OPEN_MAX);
2251 #define MINCONN 32
2252 #define MAXCONN 16384
2253 if (nfds == -1 || nfds > MAXCONN)
2254 nconns = MAXCONN;
2255 else if (nfds < MINCONN)
2256 nconns = MINCONN;
2257 else
2258 nconns = nfds;
2259
2260 /* We need memory to pass descriptors on to the worker threads. */
2261 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2262 /* Array to keep track when connection was accepted. */
2263 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2264
2265 /* In the main thread we execute the loop which handles incoming
2266 connections. */
2267 #ifdef HAVE_EPOLL
2268 int efd = epoll_create (100);
2269 if (efd != -1)
2270 {
2271 main_loop_epoll (efd);
2272 close (efd);
2273 }
2274 #endif
2275
2276 main_loop_poll ();
2277 }
2278
2279
2280 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2281 this function is called, we are not listening on the nscd socket yet so
2282 we can just use the ordinary lookup functions without causing a lockup */
2283 static void
2284 begin_drop_privileges (void)
2285 {
2286 struct passwd *pwd = getpwnam (server_user);
2287
2288 if (pwd == NULL)
2289 {
2290 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2291 error (EXIT_FAILURE, 0, _("Failed to run nscd as user '%s'"),
2292 server_user);
2293 }
2294
2295 server_uid = pwd->pw_uid;
2296 server_gid = pwd->pw_gid;
2297
2298 /* Save the old UID/GID if we have to change back. */
2299 if (paranoia)
2300 {
2301 old_uid = getuid ();
2302 old_gid = getgid ();
2303 }
2304
2305 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2306 {
2307 /* This really must never happen. */
2308 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2309 error (EXIT_FAILURE, errno, _("initial getgrouplist failed"));
2310 }
2311
2312 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2313
2314 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2315 == -1)
2316 {
2317 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2318 error (EXIT_FAILURE, errno, _("getgrouplist failed"));
2319 }
2320 }
2321
2322
2323 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2324 run nscd as the user specified in the configuration file. */
2325 static void
2326 finish_drop_privileges (void)
2327 {
2328 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2329 /* We need to preserve the capabilities to connect to the audit daemon. */
2330 cap_t new_caps = preserve_capabilities ();
2331 #endif
2332
2333 if (setgroups (server_ngroups, server_groups) == -1)
2334 {
2335 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2336 error (EXIT_FAILURE, errno, _("setgroups failed"));
2337 }
2338
2339 int res;
2340 if (paranoia)
2341 res = setresgid (server_gid, server_gid, old_gid);
2342 else
2343 res = setgid (server_gid);
2344 if (res == -1)
2345 {
2346 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2347 perror ("setgid");
2348 exit (4);
2349 }
2350
2351 if (paranoia)
2352 res = setresuid (server_uid, server_uid, old_uid);
2353 else
2354 res = setuid (server_uid);
2355 if (res == -1)
2356 {
2357 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2358 perror ("setuid");
2359 exit (4);
2360 }
2361
2362 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2363 /* Remove the temporary capabilities. */
2364 install_real_capabilities (new_caps);
2365 #endif
2366 }