]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
Enhance nscd's inotify support (Bug 14906).
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
55
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
60
61 #include <kernel-features.h>
62 #include <libc-internal.h>
63
64
65 /* Support to run nscd as an unprivileged user */
66 const char *server_user;
67 static uid_t server_uid;
68 static gid_t server_gid;
69 const char *stat_user;
70 uid_t stat_uid;
71 static gid_t *server_groups;
72 #ifndef NGROUPS
73 # define NGROUPS 32
74 #endif
75 static int server_ngroups;
76
77 static pthread_attr_t attr;
78
79 static void begin_drop_privileges (void);
80 static void finish_drop_privileges (void);
81
82 /* Map request type to a string. */
83 const char *const serv2str[LASTREQ] =
84 {
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
94 [GETSTAT] = "GETSTAT",
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
98 [GETFDHST] = "GETFDHST",
99 [GETAI] = "GETAI",
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
107 };
108
109 /* The control data structures for the services. */
110 struct database_dyn dbs[lastdb] =
111 {
112 [pwddb] = {
113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
116 .enabled = 0,
117 .check_file = 1,
118 .persistent = 0,
119 .propagate = 1,
120 .shared = 0,
121 .max_db_size = DEFAULT_MAX_DB_SIZE,
122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
123 .db_filename = _PATH_NSCD_PASSWD_DB,
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
130 },
131 [grpdb] = {
132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
135 .enabled = 0,
136 .check_file = 1,
137 .persistent = 0,
138 .propagate = 1,
139 .shared = 0,
140 .max_db_size = DEFAULT_MAX_DB_SIZE,
141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
142 .db_filename = _PATH_NSCD_GROUP_DB,
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
149 },
150 [hstdb] = {
151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
154 .enabled = 0,
155 .check_file = 1,
156 .persistent = 0,
157 .propagate = 0, /* Not used. */
158 .shared = 0,
159 .max_db_size = DEFAULT_MAX_DB_SIZE,
160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
161 .db_filename = _PATH_NSCD_HOSTS_DB,
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
168 },
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
187 },
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
206 }
207 };
208
209
210 /* Mapping of request type to database. */
211 static struct
212 {
213 bool data_request;
214 struct database_dyn *db;
215 } const reqinfo[LASTREQ] =
216 {
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
239 };
240
241
242 /* Initial number of threads to use. */
243 int nthreads = -1;
244 /* Maximum number of threads to use. */
245 int max_nthreads = 32;
246
247 /* Socket for incoming connections. */
248 static int sock;
249
250 #ifdef HAVE_INOTIFY
251 /* Inotify descriptor. */
252 int inotify_fd = -1;
253 #endif
254
255 #ifdef HAVE_NETLINK
256 /* Descriptor for netlink status updates. */
257 static int nl_status_fd = -1;
258 #endif
259
260 #ifndef __ASSUME_SOCK_CLOEXEC
261 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
262 before be know the result. */
263 static int have_sock_cloexec;
264 #endif
265 #ifndef __ASSUME_ACCEPT4
266 static int have_accept4;
267 #endif
268
269 /* Number of times clients had to wait. */
270 unsigned long int client_queued;
271
272
273 ssize_t
274 writeall (int fd, const void *buf, size_t len)
275 {
276 size_t n = len;
277 ssize_t ret;
278 do
279 {
280 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
281 if (ret <= 0)
282 break;
283 buf = (const char *) buf + ret;
284 n -= ret;
285 }
286 while (n > 0);
287 return ret < 0 ? ret : len - n;
288 }
289
290
291 #ifdef HAVE_SENDFILE
292 ssize_t
293 sendfileall (int tofd, int fromfd, off_t off, size_t len)
294 {
295 ssize_t n = len;
296 ssize_t ret;
297
298 do
299 {
300 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
301 if (ret <= 0)
302 break;
303 n -= ret;
304 }
305 while (n > 0);
306 return ret < 0 ? ret : len - n;
307 }
308 #endif
309
310
311 enum usekey
312 {
313 use_not = 0,
314 /* The following three are not really used, they are symbolic constants. */
315 use_first = 16,
316 use_begin = 32,
317 use_end = 64,
318
319 use_he = 1,
320 use_he_begin = use_he | use_begin,
321 use_he_end = use_he | use_end,
322 use_data = 3,
323 use_data_begin = use_data | use_begin,
324 use_data_end = use_data | use_end,
325 use_data_first = use_data_begin | use_first
326 };
327
328
329 static int
330 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
331 enum usekey use, ref_t start, size_t len)
332 {
333 assert (len >= 2);
334
335 if (start > first_free || start + len > first_free
336 || (start & BLOCK_ALIGN_M1))
337 return 0;
338
339 if (usemap[start] == use_not)
340 {
341 /* Add the start marker. */
342 usemap[start] = use | use_begin;
343 use &= ~use_first;
344
345 while (--len > 0)
346 if (usemap[++start] != use_not)
347 return 0;
348 else
349 usemap[start] = use;
350
351 /* Add the end marker. */
352 usemap[start] = use | use_end;
353 }
354 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
355 {
356 /* Hash entries can't be shared. */
357 if (use == use_he)
358 return 0;
359
360 usemap[start] |= (use & use_first);
361 use &= ~use_first;
362
363 while (--len > 1)
364 if (usemap[++start] != use)
365 return 0;
366
367 if (usemap[++start] != (use | use_end))
368 return 0;
369 }
370 else
371 /* Points to a wrong object or somewhere in the middle. */
372 return 0;
373
374 return 1;
375 }
376
377
378 /* Verify data in persistent database. */
379 static int
380 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
381 {
382 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
383 || dbnr == netgrdb);
384
385 time_t now = time (NULL);
386
387 struct database_pers_head *head = mem;
388 struct database_pers_head head_copy = *head;
389
390 /* Check that the header that was read matches the head in the database. */
391 if (memcmp (head, readhead, sizeof (*head)) != 0)
392 return 0;
393
394 /* First some easy tests: make sure the database header is sane. */
395 if (head->version != DB_VERSION
396 || head->header_size != sizeof (*head)
397 /* We allow a timestamp to be one hour ahead of the current time.
398 This should cover daylight saving time changes. */
399 || head->timestamp > now + 60 * 60 + 60
400 || (head->gc_cycle & 1)
401 || head->module == 0
402 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
403 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
404 || head->first_free < 0
405 || head->first_free > head->data_size
406 || (head->first_free & BLOCK_ALIGN_M1) != 0
407 || head->maxnentries < 0
408 || head->maxnsearched < 0)
409 return 0;
410
411 uint8_t *usemap = calloc (head->first_free, 1);
412 if (usemap == NULL)
413 return 0;
414
415 const char *data = (char *) &head->array[roundup (head->module,
416 ALIGN / sizeof (ref_t))];
417
418 nscd_ssize_t he_cnt = 0;
419 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
420 {
421 ref_t trail = head->array[cnt];
422 ref_t work = trail;
423 int tick = 0;
424
425 while (work != ENDREF)
426 {
427 if (! check_use (data, head->first_free, usemap, use_he, work,
428 sizeof (struct hashentry)))
429 goto fail;
430
431 /* Now we know we can dereference the record. */
432 struct hashentry *here = (struct hashentry *) (data + work);
433
434 ++he_cnt;
435
436 /* Make sure the record is for this type of service. */
437 if (here->type >= LASTREQ
438 || reqinfo[here->type].db != &dbs[dbnr])
439 goto fail;
440
441 /* Validate boolean field value. */
442 if (here->first != false && here->first != true)
443 goto fail;
444
445 if (here->len < 0)
446 goto fail;
447
448 /* Now the data. */
449 if (here->packet < 0
450 || here->packet > head->first_free
451 || here->packet + sizeof (struct datahead) > head->first_free)
452 goto fail;
453
454 struct datahead *dh = (struct datahead *) (data + here->packet);
455
456 if (! check_use (data, head->first_free, usemap,
457 use_data | (here->first ? use_first : 0),
458 here->packet, dh->allocsize))
459 goto fail;
460
461 if (dh->allocsize < sizeof (struct datahead)
462 || dh->recsize > dh->allocsize
463 || (dh->notfound != false && dh->notfound != true)
464 || (dh->usable != false && dh->usable != true))
465 goto fail;
466
467 if (here->key < here->packet + sizeof (struct datahead)
468 || here->key > here->packet + dh->allocsize
469 || here->key + here->len > here->packet + dh->allocsize)
470 goto fail;
471
472 work = here->next;
473
474 if (work == trail)
475 /* A circular list, this must not happen. */
476 goto fail;
477 if (tick)
478 trail = ((struct hashentry *) (data + trail))->next;
479 tick = 1 - tick;
480 }
481 }
482
483 if (he_cnt != head->nentries)
484 goto fail;
485
486 /* See if all data and keys had at least one reference from
487 he->first == true hashentry. */
488 for (ref_t idx = 0; idx < head->first_free; ++idx)
489 {
490 if (usemap[idx] == use_data_begin)
491 goto fail;
492 }
493
494 /* Finally, make sure the database hasn't changed since the first test. */
495 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
496 goto fail;
497
498 free (usemap);
499 return 1;
500
501 fail:
502 free (usemap);
503 return 0;
504 }
505
506
507 #ifdef O_CLOEXEC
508 # define EXTRA_O_FLAGS O_CLOEXEC
509 #else
510 # define EXTRA_O_FLAGS 0
511 #endif
512
513
514 /* Initialize database information structures. */
515 void
516 nscd_init (void)
517 {
518 /* Look up unprivileged uid/gid/groups before we start listening on the
519 socket */
520 if (server_user != NULL)
521 begin_drop_privileges ();
522
523 if (nthreads == -1)
524 /* No configuration for this value, assume a default. */
525 nthreads = 4;
526
527 for (size_t cnt = 0; cnt < lastdb; ++cnt)
528 if (dbs[cnt].enabled)
529 {
530 pthread_rwlock_init (&dbs[cnt].lock, NULL);
531 pthread_mutex_init (&dbs[cnt].memlock, NULL);
532
533 if (dbs[cnt].persistent)
534 {
535 /* Try to open the appropriate file on disk. */
536 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
537 if (fd != -1)
538 {
539 char *msg = NULL;
540 struct stat64 st;
541 void *mem;
542 size_t total;
543 struct database_pers_head head;
544 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
545 sizeof (head)));
546 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
547 {
548 fail_db_errno:
549 /* The code is single-threaded at this point so
550 using strerror is just fine. */
551 msg = strerror (errno);
552 fail_db:
553 dbg_log (_("invalid persistent database file \"%s\": %s"),
554 dbs[cnt].db_filename, msg);
555 unlink (dbs[cnt].db_filename);
556 }
557 else if (head.module == 0 && head.data_size == 0)
558 {
559 /* The file has been created, but the head has not
560 been initialized yet. */
561 msg = _("uninitialized header");
562 goto fail_db;
563 }
564 else if (head.header_size != (int) sizeof (head))
565 {
566 msg = _("header size does not match");
567 goto fail_db;
568 }
569 else if ((total = (sizeof (head)
570 + roundup (head.module * sizeof (ref_t),
571 ALIGN)
572 + head.data_size))
573 > st.st_size
574 || total < sizeof (head))
575 {
576 msg = _("file size does not match");
577 goto fail_db;
578 }
579 /* Note we map with the maximum size allowed for the
580 database. This is likely much larger than the
581 actual file size. This is OK on most OSes since
582 extensions of the underlying file will
583 automatically translate more pages available for
584 memory access. */
585 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
586 PROT_READ | PROT_WRITE,
587 MAP_SHARED, fd, 0))
588 == MAP_FAILED)
589 goto fail_db_errno;
590 else if (!verify_persistent_db (mem, &head, cnt))
591 {
592 munmap (mem, total);
593 msg = _("verification failed");
594 goto fail_db;
595 }
596 else
597 {
598 /* Success. We have the database. */
599 dbs[cnt].head = mem;
600 dbs[cnt].memsize = total;
601 dbs[cnt].data = (char *)
602 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
603 ALIGN / sizeof (ref_t))];
604 dbs[cnt].mmap_used = true;
605
606 if (dbs[cnt].suggested_module > head.module)
607 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
608 dbnames[cnt]);
609
610 dbs[cnt].wr_fd = fd;
611 fd = -1;
612 /* We also need a read-only descriptor. */
613 if (dbs[cnt].shared)
614 {
615 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
616 O_RDONLY | EXTRA_O_FLAGS);
617 if (dbs[cnt].ro_fd == -1)
618 dbg_log (_("\
619 cannot create read-only descriptor for \"%s\"; no mmap"),
620 dbs[cnt].db_filename);
621 }
622
623 // XXX Shall we test whether the descriptors actually
624 // XXX point to the same file?
625 }
626
627 /* Close the file descriptors in case something went
628 wrong in which case the variable have not been
629 assigned -1. */
630 if (fd != -1)
631 close (fd);
632 }
633 else if (errno == EACCES)
634 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
635 dbs[cnt].db_filename);
636 }
637
638 if (dbs[cnt].head == NULL)
639 {
640 /* No database loaded. Allocate the data structure,
641 possibly on disk. */
642 struct database_pers_head head;
643 size_t total = (sizeof (head)
644 + roundup (dbs[cnt].suggested_module
645 * sizeof (ref_t), ALIGN)
646 + (dbs[cnt].suggested_module
647 * DEFAULT_DATASIZE_PER_BUCKET));
648
649 /* Try to create the database. If we do not need a
650 persistent database create a temporary file. */
651 int fd;
652 int ro_fd = -1;
653 if (dbs[cnt].persistent)
654 {
655 fd = open (dbs[cnt].db_filename,
656 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
657 S_IRUSR | S_IWUSR);
658 if (fd != -1 && dbs[cnt].shared)
659 ro_fd = open (dbs[cnt].db_filename,
660 O_RDONLY | EXTRA_O_FLAGS);
661 }
662 else
663 {
664 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
665 fd = mkostemp (fname, EXTRA_O_FLAGS);
666
667 /* We do not need the file name anymore after we
668 opened another file descriptor in read-only mode. */
669 if (fd != -1)
670 {
671 if (dbs[cnt].shared)
672 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
673
674 unlink (fname);
675 }
676 }
677
678 if (fd == -1)
679 {
680 if (errno == EEXIST)
681 {
682 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
683 dbnames[cnt], dbs[cnt].db_filename);
684 do_exit (1, 0, NULL);
685 }
686
687 if (dbs[cnt].persistent)
688 dbg_log (_("cannot create %s; no persistent database used"),
689 dbs[cnt].db_filename);
690 else
691 dbg_log (_("cannot create %s; no sharing possible"),
692 dbs[cnt].db_filename);
693
694 dbs[cnt].persistent = 0;
695 // XXX remember: no mmap
696 }
697 else
698 {
699 /* Tell the user if we could not create the read-only
700 descriptor. */
701 if (ro_fd == -1 && dbs[cnt].shared)
702 dbg_log (_("\
703 cannot create read-only descriptor for \"%s\"; no mmap"),
704 dbs[cnt].db_filename);
705
706 /* Before we create the header, initialize the hash
707 table. That way if we get interrupted while writing
708 the header we can recognize a partially initialized
709 database. */
710 size_t ps = sysconf (_SC_PAGESIZE);
711 char tmpbuf[ps];
712 assert (~ENDREF == 0);
713 memset (tmpbuf, '\xff', ps);
714
715 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
716 off_t offset = sizeof (head);
717
718 size_t towrite;
719 if (offset % ps != 0)
720 {
721 towrite = MIN (remaining, ps - (offset % ps));
722 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
723 goto write_fail;
724 offset += towrite;
725 remaining -= towrite;
726 }
727
728 while (remaining > ps)
729 {
730 if (pwrite (fd, tmpbuf, ps, offset) == -1)
731 goto write_fail;
732 offset += ps;
733 remaining -= ps;
734 }
735
736 if (remaining > 0
737 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
738 goto write_fail;
739
740 /* Create the header of the file. */
741 struct database_pers_head head =
742 {
743 .version = DB_VERSION,
744 .header_size = sizeof (head),
745 .module = dbs[cnt].suggested_module,
746 .data_size = (dbs[cnt].suggested_module
747 * DEFAULT_DATASIZE_PER_BUCKET),
748 .first_free = 0
749 };
750 void *mem;
751
752 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
753 != sizeof (head))
754 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
755 != 0)
756 || (mem = mmap (NULL, dbs[cnt].max_db_size,
757 PROT_READ | PROT_WRITE,
758 MAP_SHARED, fd, 0)) == MAP_FAILED)
759 {
760 write_fail:
761 unlink (dbs[cnt].db_filename);
762 dbg_log (_("cannot write to database file %s: %s"),
763 dbs[cnt].db_filename, strerror (errno));
764 dbs[cnt].persistent = 0;
765 }
766 else
767 {
768 /* Success. */
769 dbs[cnt].head = mem;
770 dbs[cnt].data = (char *)
771 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
772 ALIGN / sizeof (ref_t))];
773 dbs[cnt].memsize = total;
774 dbs[cnt].mmap_used = true;
775
776 /* Remember the descriptors. */
777 dbs[cnt].wr_fd = fd;
778 dbs[cnt].ro_fd = ro_fd;
779 fd = -1;
780 ro_fd = -1;
781 }
782
783 if (fd != -1)
784 close (fd);
785 if (ro_fd != -1)
786 close (ro_fd);
787 }
788 }
789
790 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
791 /* We do not check here whether the O_CLOEXEC provided to the
792 open call was successful or not. The two fcntl calls are
793 only performed once each per process start-up and therefore
794 is not noticeable at all. */
795 if (paranoia
796 && ((dbs[cnt].wr_fd != -1
797 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
798 || (dbs[cnt].ro_fd != -1
799 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
800 {
801 dbg_log (_("\
802 cannot set socket to close on exec: %s; disabling paranoia mode"),
803 strerror (errno));
804 paranoia = 0;
805 }
806 #endif
807
808 if (dbs[cnt].head == NULL)
809 {
810 /* We do not use the persistent database. Just
811 create an in-memory data structure. */
812 assert (! dbs[cnt].persistent);
813
814 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
815 + (dbs[cnt].suggested_module
816 * sizeof (ref_t)));
817 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
818 assert (~ENDREF == 0);
819 memset (dbs[cnt].head->array, '\xff',
820 dbs[cnt].suggested_module * sizeof (ref_t));
821 dbs[cnt].head->module = dbs[cnt].suggested_module;
822 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
823 * dbs[cnt].head->module);
824 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
825 dbs[cnt].head->first_free = 0;
826
827 dbs[cnt].shared = 0;
828 assert (dbs[cnt].ro_fd == -1);
829 }
830 }
831
832 /* Create the socket. */
833 #ifndef __ASSUME_SOCK_CLOEXEC
834 sock = -1;
835 if (have_sock_cloexec >= 0)
836 #endif
837 {
838 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
839 #ifndef __ASSUME_SOCK_CLOEXEC
840 if (have_sock_cloexec == 0)
841 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
842 #endif
843 }
844 #ifndef __ASSUME_SOCK_CLOEXEC
845 if (have_sock_cloexec < 0)
846 sock = socket (AF_UNIX, SOCK_STREAM, 0);
847 #endif
848 if (sock < 0)
849 {
850 dbg_log (_("cannot open socket: %s"), strerror (errno));
851 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
852 }
853 /* Bind a name to the socket. */
854 struct sockaddr_un sock_addr;
855 sock_addr.sun_family = AF_UNIX;
856 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
857 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
858 {
859 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
860 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
861 }
862
863 #ifndef __ASSUME_SOCK_CLOEXEC
864 if (have_sock_cloexec < 0)
865 {
866 /* We don't want to get stuck on accept. */
867 int fl = fcntl (sock, F_GETFL);
868 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
869 {
870 dbg_log (_("cannot change socket to nonblocking mode: %s"),
871 strerror (errno));
872 do_exit (1, 0, NULL);
873 }
874
875 /* The descriptor needs to be closed on exec. */
876 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
877 {
878 dbg_log (_("cannot set socket to close on exec: %s"),
879 strerror (errno));
880 do_exit (1, 0, NULL);
881 }
882 }
883 #endif
884
885 /* Set permissions for the socket. */
886 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
887
888 /* Set the socket up to accept connections. */
889 if (listen (sock, SOMAXCONN) < 0)
890 {
891 dbg_log (_("cannot enable socket to accept connections: %s"),
892 strerror (errno));
893 do_exit (1, 0, NULL);
894 }
895
896 #ifdef HAVE_NETLINK
897 if (dbs[hstdb].enabled)
898 {
899 /* Try to open netlink socket to monitor network setting changes. */
900 nl_status_fd = socket (AF_NETLINK,
901 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
902 NETLINK_ROUTE);
903 if (nl_status_fd != -1)
904 {
905 struct sockaddr_nl snl;
906 memset (&snl, '\0', sizeof (snl));
907 snl.nl_family = AF_NETLINK;
908 /* XXX Is this the best set to use? */
909 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
910 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
911 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
912 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
913 | RTMGRP_IPV6_PREFIX);
914
915 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
916 {
917 close (nl_status_fd);
918 nl_status_fd = -1;
919 }
920 else
921 {
922 /* Start the timestamp process. */
923 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
924 = __bump_nl_timestamp ();
925
926 # ifndef __ASSUME_SOCK_CLOEXEC
927 if (have_sock_cloexec < 0)
928 {
929 /* We don't want to get stuck on accept. */
930 int fl = fcntl (nl_status_fd, F_GETFL);
931 if (fl == -1
932 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
933 {
934 dbg_log (_("\
935 cannot change socket to nonblocking mode: %s"),
936 strerror (errno));
937 do_exit (1, 0, NULL);
938 }
939
940 /* The descriptor needs to be closed on exec. */
941 if (paranoia
942 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
943 {
944 dbg_log (_("cannot set socket to close on exec: %s"),
945 strerror (errno));
946 do_exit (1, 0, NULL);
947 }
948 }
949 # endif
950 }
951 }
952 }
953 #endif
954
955 /* Change to unprivileged uid/gid/groups if specified in config file */
956 if (server_user != NULL)
957 finish_drop_privileges ();
958 }
959
960 #ifdef HAVE_INOTIFY
961 #define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
962 #define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
963 void
964 install_watches (struct traced_file *finfo)
965 {
966 /* Use inotify support if we have it. */
967 if (finfo->inotify_descr[TRACED_FILE] < 0)
968 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
969 finfo->fname,
970 TRACED_FILE_MASK);
971 if (finfo->inotify_descr[TRACED_FILE] < 0)
972 {
973 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
974 finfo->fname, strerror (errno));
975 return;
976 }
977 dbg_log (_("monitoring file `%s` (%d)"),
978 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
979 /* Additionally listen for events in the file's parent directory.
980 We do this because the file to be watched might be
981 deleted and then added back again. When it is added back again
982 we must re-add the watch. We must also cover IN_MOVED_TO to
983 detect a file being moved into the directory. */
984 if (finfo->inotify_descr[TRACED_DIR] < 0)
985 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
986 finfo->dname,
987 TRACED_DIR_MASK);
988 if (finfo->inotify_descr[TRACED_DIR] < 0)
989 {
990 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
991 finfo->fname, strerror (errno));
992 return;
993 }
994 dbg_log (_("monitoring directory `%s` (%d)"),
995 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
996 }
997 #endif
998
999 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
1000
1001 We support registering multiple files per database. Each call to
1002 register_traced_file adds to the list of registered files.
1003
1004 When we prune the database, either through timeout or a request to
1005 invalidate, we will check to see if any of the registered files has changed.
1006 When we accept new connections to handle a cache request we will also
1007 check to see if any of the registered files has changed.
1008
1009 If we have inotify support then we install an inotify fd to notify us of
1010 file deletion or modification, both of which will require we invalidate
1011 the cache for the database. Without inotify support we stat the file and
1012 store st_mtime to determine if the file has been modified. */
1013 void
1014 register_traced_file (size_t dbidx, struct traced_file *finfo)
1015 {
1016 /* If the database is disabled or file checking is disabled
1017 then ignore the registration. */
1018 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
1019 return;
1020
1021 if (__glibc_unlikely (debug_level > 0))
1022 dbg_log (_("monitoring file %s for database %s"),
1023 finfo->fname, dbnames[dbidx]);
1024
1025 #ifdef HAVE_INOTIFY
1026 install_watches (finfo);
1027 #endif
1028 struct stat64 st;
1029 if (stat64 (finfo->fname, &st) < 0)
1030 {
1031 /* We cannot stat() the file. Set mtime to zero and try again later. */
1032 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
1033 finfo->fname, strerror (errno));
1034 finfo->mtime = 0;
1035 }
1036 else
1037 finfo->mtime = st.st_mtime;
1038
1039 /* Queue up the file name. */
1040 finfo->next = dbs[dbidx].traced_files;
1041 dbs[dbidx].traced_files = finfo;
1042 }
1043
1044
1045 /* Close the connections. */
1046 void
1047 close_sockets (void)
1048 {
1049 close (sock);
1050 }
1051
1052
1053 static void
1054 invalidate_cache (char *key, int fd)
1055 {
1056 dbtype number;
1057 int32_t resp;
1058
1059 for (number = pwddb; number < lastdb; ++number)
1060 if (strcmp (key, dbnames[number]) == 0)
1061 {
1062 struct traced_file *runp = dbs[number].traced_files;
1063 while (runp != NULL)
1064 {
1065 /* Make sure we reload from file when checking mtime. */
1066 runp->mtime = 0;
1067 #ifdef HAVE_INOTIFY
1068 /* During an invalidation we try to reload the traced
1069 file watches. This allows the user to re-sync if
1070 inotify events were lost. Similar to what we do during
1071 pruning. */
1072 install_watches (runp);
1073 #endif
1074 if (runp->call_res_init)
1075 {
1076 res_init ();
1077 break;
1078 }
1079 runp = runp->next;
1080 }
1081 break;
1082 }
1083
1084 if (number == lastdb)
1085 {
1086 resp = EINVAL;
1087 writeall (fd, &resp, sizeof (resp));
1088 return;
1089 }
1090
1091 if (dbs[number].enabled)
1092 {
1093 pthread_mutex_lock (&dbs[number].prune_run_lock);
1094 prune_cache (&dbs[number], LONG_MAX, fd);
1095 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1096 }
1097 else
1098 {
1099 resp = 0;
1100 writeall (fd, &resp, sizeof (resp));
1101 }
1102 }
1103
1104
1105 #ifdef SCM_RIGHTS
1106 static void
1107 send_ro_fd (struct database_dyn *db, char *key, int fd)
1108 {
1109 /* If we do not have an read-only file descriptor do nothing. */
1110 if (db->ro_fd == -1)
1111 return;
1112
1113 /* We need to send some data along with the descriptor. */
1114 uint64_t mapsize = (db->head->data_size
1115 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1116 + sizeof (struct database_pers_head));
1117 struct iovec iov[2];
1118 iov[0].iov_base = key;
1119 iov[0].iov_len = strlen (key) + 1;
1120 iov[1].iov_base = &mapsize;
1121 iov[1].iov_len = sizeof (mapsize);
1122
1123 /* Prepare the control message to transfer the descriptor. */
1124 union
1125 {
1126 struct cmsghdr hdr;
1127 char bytes[CMSG_SPACE (sizeof (int))];
1128 } buf;
1129 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1130 .msg_control = buf.bytes,
1131 .msg_controllen = sizeof (buf) };
1132 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1133
1134 cmsg->cmsg_level = SOL_SOCKET;
1135 cmsg->cmsg_type = SCM_RIGHTS;
1136 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1137
1138 int *ip = (int *) CMSG_DATA (cmsg);
1139 *ip = db->ro_fd;
1140
1141 msg.msg_controllen = cmsg->cmsg_len;
1142
1143 /* Send the control message. We repeat when we are interrupted but
1144 everything else is ignored. */
1145 #ifndef MSG_NOSIGNAL
1146 # define MSG_NOSIGNAL 0
1147 #endif
1148 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1149
1150 if (__glibc_unlikely (debug_level > 0))
1151 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1152 }
1153 #endif /* SCM_RIGHTS */
1154
1155
1156 /* Handle new request. */
1157 static void
1158 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1159 {
1160 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1161 {
1162 if (debug_level > 0)
1163 dbg_log (_("\
1164 cannot handle old request version %d; current version is %d"),
1165 req->version, NSCD_VERSION);
1166 return;
1167 }
1168
1169 /* Perform the SELinux check before we go on to the standard checks. */
1170 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1171 {
1172 if (debug_level > 0)
1173 {
1174 #ifdef SO_PEERCRED
1175 # ifdef PATH_MAX
1176 char buf[PATH_MAX];
1177 # else
1178 char buf[4096];
1179 # endif
1180
1181 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1182 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1183
1184 if (n <= 0)
1185 dbg_log (_("\
1186 request from %ld not handled due to missing permission"), (long int) pid);
1187 else
1188 {
1189 buf[n] = '\0';
1190 dbg_log (_("\
1191 request from '%s' [%ld] not handled due to missing permission"),
1192 buf, (long int) pid);
1193 }
1194 #else
1195 dbg_log (_("request not handled due to missing permission"));
1196 #endif
1197 }
1198 return;
1199 }
1200
1201 struct database_dyn *db = reqinfo[req->type].db;
1202
1203 /* See whether we can service the request from the cache. */
1204 if (__builtin_expect (reqinfo[req->type].data_request, true))
1205 {
1206 if (__builtin_expect (debug_level, 0) > 0)
1207 {
1208 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1209 {
1210 char buf[INET6_ADDRSTRLEN];
1211
1212 dbg_log ("\t%s (%s)", serv2str[req->type],
1213 inet_ntop (req->type == GETHOSTBYADDR
1214 ? AF_INET : AF_INET6,
1215 key, buf, sizeof (buf)));
1216 }
1217 else
1218 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1219 }
1220
1221 /* Is this service enabled? */
1222 if (__glibc_unlikely (!db->enabled))
1223 {
1224 /* No, sent the prepared record. */
1225 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1226 db->disabled_iov->iov_len,
1227 MSG_NOSIGNAL))
1228 != (ssize_t) db->disabled_iov->iov_len
1229 && __builtin_expect (debug_level, 0) > 0)
1230 {
1231 /* We have problems sending the result. */
1232 char buf[256];
1233 dbg_log (_("cannot write result: %s"),
1234 strerror_r (errno, buf, sizeof (buf)));
1235 }
1236
1237 return;
1238 }
1239
1240 /* Be sure we can read the data. */
1241 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1242 {
1243 ++db->head->rdlockdelayed;
1244 pthread_rwlock_rdlock (&db->lock);
1245 }
1246
1247 /* See whether we can handle it from the cache. */
1248 struct datahead *cached;
1249 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1250 db, uid);
1251 if (cached != NULL)
1252 {
1253 /* Hurray it's in the cache. */
1254 ssize_t nwritten;
1255
1256 #ifdef HAVE_SENDFILE
1257 if (__glibc_likely (db->mmap_used))
1258 {
1259 assert (db->wr_fd != -1);
1260 assert ((char *) cached->data > (char *) db->data);
1261 assert ((char *) cached->data - (char *) db->head
1262 + cached->recsize
1263 <= (sizeof (struct database_pers_head)
1264 + db->head->module * sizeof (ref_t)
1265 + db->head->data_size));
1266 nwritten = sendfileall (fd, db->wr_fd,
1267 (char *) cached->data
1268 - (char *) db->head, cached->recsize);
1269 # ifndef __ASSUME_SENDFILE
1270 if (nwritten == -1 && errno == ENOSYS)
1271 goto use_write;
1272 # endif
1273 }
1274 else
1275 # ifndef __ASSUME_SENDFILE
1276 use_write:
1277 # endif
1278 #endif
1279 nwritten = writeall (fd, cached->data, cached->recsize);
1280
1281 if (nwritten != cached->recsize
1282 && __builtin_expect (debug_level, 0) > 0)
1283 {
1284 /* We have problems sending the result. */
1285 char buf[256];
1286 dbg_log (_("cannot write result: %s"),
1287 strerror_r (errno, buf, sizeof (buf)));
1288 }
1289
1290 pthread_rwlock_unlock (&db->lock);
1291
1292 return;
1293 }
1294
1295 pthread_rwlock_unlock (&db->lock);
1296 }
1297 else if (__builtin_expect (debug_level, 0) > 0)
1298 {
1299 if (req->type == INVALIDATE)
1300 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1301 else
1302 dbg_log ("\t%s", serv2str[req->type]);
1303 }
1304
1305 /* Handle the request. */
1306 switch (req->type)
1307 {
1308 case GETPWBYNAME:
1309 addpwbyname (db, fd, req, key, uid);
1310 break;
1311
1312 case GETPWBYUID:
1313 addpwbyuid (db, fd, req, key, uid);
1314 break;
1315
1316 case GETGRBYNAME:
1317 addgrbyname (db, fd, req, key, uid);
1318 break;
1319
1320 case GETGRBYGID:
1321 addgrbygid (db, fd, req, key, uid);
1322 break;
1323
1324 case GETHOSTBYNAME:
1325 addhstbyname (db, fd, req, key, uid);
1326 break;
1327
1328 case GETHOSTBYNAMEv6:
1329 addhstbynamev6 (db, fd, req, key, uid);
1330 break;
1331
1332 case GETHOSTBYADDR:
1333 addhstbyaddr (db, fd, req, key, uid);
1334 break;
1335
1336 case GETHOSTBYADDRv6:
1337 addhstbyaddrv6 (db, fd, req, key, uid);
1338 break;
1339
1340 case GETAI:
1341 addhstai (db, fd, req, key, uid);
1342 break;
1343
1344 case INITGROUPS:
1345 addinitgroups (db, fd, req, key, uid);
1346 break;
1347
1348 case GETSERVBYNAME:
1349 addservbyname (db, fd, req, key, uid);
1350 break;
1351
1352 case GETSERVBYPORT:
1353 addservbyport (db, fd, req, key, uid);
1354 break;
1355
1356 case GETNETGRENT:
1357 addgetnetgrent (db, fd, req, key, uid);
1358 break;
1359
1360 case INNETGR:
1361 addinnetgr (db, fd, req, key, uid);
1362 break;
1363
1364 case GETSTAT:
1365 case SHUTDOWN:
1366 case INVALIDATE:
1367 {
1368 /* Get the callers credentials. */
1369 #ifdef SO_PEERCRED
1370 struct ucred caller;
1371 socklen_t optlen = sizeof (caller);
1372
1373 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1374 {
1375 char buf[256];
1376
1377 dbg_log (_("error getting caller's id: %s"),
1378 strerror_r (errno, buf, sizeof (buf)));
1379 break;
1380 }
1381
1382 uid = caller.uid;
1383 #else
1384 /* Some systems have no SO_PEERCRED implementation. They don't
1385 care about security so we don't as well. */
1386 uid = 0;
1387 #endif
1388 }
1389
1390 /* Accept shutdown, getstat and invalidate only from root. For
1391 the stat call also allow the user specified in the config file. */
1392 if (req->type == GETSTAT)
1393 {
1394 if (uid == 0 || uid == stat_uid)
1395 send_stats (fd, dbs);
1396 }
1397 else if (uid == 0)
1398 {
1399 if (req->type == INVALIDATE)
1400 invalidate_cache (key, fd);
1401 else
1402 termination_handler (0);
1403 }
1404 break;
1405
1406 case GETFDPW:
1407 case GETFDGR:
1408 case GETFDHST:
1409 case GETFDSERV:
1410 case GETFDNETGR:
1411 #ifdef SCM_RIGHTS
1412 send_ro_fd (reqinfo[req->type].db, key, fd);
1413 #endif
1414 break;
1415
1416 default:
1417 /* Ignore the command, it's nothing we know. */
1418 break;
1419 }
1420 }
1421
1422
1423 /* Restart the process. */
1424 static void
1425 restart (void)
1426 {
1427 /* First determine the parameters. We do not use the parameters
1428 passed to main() since in case nscd is started by running the
1429 dynamic linker this will not work. Yes, this is not the usual
1430 case but nscd is part of glibc and we occasionally do this. */
1431 size_t buflen = 1024;
1432 char *buf = alloca (buflen);
1433 size_t readlen = 0;
1434 int fd = open ("/proc/self/cmdline", O_RDONLY);
1435 if (fd == -1)
1436 {
1437 dbg_log (_("\
1438 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1439 strerror (errno));
1440
1441 paranoia = 0;
1442 return;
1443 }
1444
1445 while (1)
1446 {
1447 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1448 buflen - readlen));
1449 if (n == -1)
1450 {
1451 dbg_log (_("\
1452 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1453 strerror (errno));
1454
1455 close (fd);
1456 paranoia = 0;
1457 return;
1458 }
1459
1460 readlen += n;
1461
1462 if (readlen < buflen)
1463 break;
1464
1465 /* We might have to extend the buffer. */
1466 size_t old_buflen = buflen;
1467 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1468 buf = memmove (newp, buf, old_buflen);
1469 }
1470
1471 close (fd);
1472
1473 /* Parse the command line. Worst case scenario: every two
1474 characters form one parameter (one character plus NUL). */
1475 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1476 int argc = 0;
1477
1478 char *cp = buf;
1479 while (cp < buf + readlen)
1480 {
1481 argv[argc++] = cp;
1482 cp = (char *) rawmemchr (cp, '\0') + 1;
1483 }
1484 argv[argc] = NULL;
1485
1486 /* Second, change back to the old user if we changed it. */
1487 if (server_user != NULL)
1488 {
1489 if (setresuid (old_uid, old_uid, old_uid) != 0)
1490 {
1491 dbg_log (_("\
1492 cannot change to old UID: %s; disabling paranoia mode"),
1493 strerror (errno));
1494
1495 paranoia = 0;
1496 return;
1497 }
1498
1499 if (setresgid (old_gid, old_gid, old_gid) != 0)
1500 {
1501 dbg_log (_("\
1502 cannot change to old GID: %s; disabling paranoia mode"),
1503 strerror (errno));
1504
1505 ignore_value (setuid (server_uid));
1506 paranoia = 0;
1507 return;
1508 }
1509 }
1510
1511 /* Next change back to the old working directory. */
1512 if (chdir (oldcwd) == -1)
1513 {
1514 dbg_log (_("\
1515 cannot change to old working directory: %s; disabling paranoia mode"),
1516 strerror (errno));
1517
1518 if (server_user != NULL)
1519 {
1520 ignore_value (setuid (server_uid));
1521 ignore_value (setgid (server_gid));
1522 }
1523 paranoia = 0;
1524 return;
1525 }
1526
1527 /* Synchronize memory. */
1528 int32_t certainly[lastdb];
1529 for (int cnt = 0; cnt < lastdb; ++cnt)
1530 if (dbs[cnt].enabled)
1531 {
1532 /* Make sure nobody keeps using the database. */
1533 dbs[cnt].head->timestamp = 0;
1534 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1535 dbs[cnt].head->nscd_certainly_running = 0;
1536
1537 if (dbs[cnt].persistent)
1538 // XXX async OK?
1539 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1540 }
1541
1542 /* The preparations are done. */
1543 #ifdef PATH_MAX
1544 char pathbuf[PATH_MAX];
1545 #else
1546 char pathbuf[256];
1547 #endif
1548 /* Try to exec the real nscd program so the process name (as reported
1549 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1550 if readlink or the exec with the result of the readlink call fails. */
1551 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1552 if (n != -1)
1553 {
1554 pathbuf[n] = '\0';
1555 execv (pathbuf, argv);
1556 }
1557 execv ("/proc/self/exe", argv);
1558
1559 /* If we come here, we will never be able to re-exec. */
1560 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1561 strerror (errno));
1562
1563 if (server_user != NULL)
1564 {
1565 ignore_value (setuid (server_uid));
1566 ignore_value (setgid (server_gid));
1567 }
1568 if (chdir ("/") != 0)
1569 dbg_log (_("cannot change current working directory to \"/\": %s"),
1570 strerror (errno));
1571 paranoia = 0;
1572
1573 /* Reenable the databases. */
1574 time_t now = time (NULL);
1575 for (int cnt = 0; cnt < lastdb; ++cnt)
1576 if (dbs[cnt].enabled)
1577 {
1578 dbs[cnt].head->timestamp = now;
1579 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1580 }
1581 }
1582
1583
1584 /* List of file descriptors. */
1585 struct fdlist
1586 {
1587 int fd;
1588 struct fdlist *next;
1589 };
1590 /* Memory allocated for the list. */
1591 static struct fdlist *fdlist;
1592 /* List of currently ready-to-read file descriptors. */
1593 static struct fdlist *readylist;
1594
1595 /* Conditional variable and mutex to signal availability of entries in
1596 READYLIST. The condvar is initialized dynamically since we might
1597 use a different clock depending on availability. */
1598 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1599 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1600
1601 /* The clock to use with the condvar. */
1602 static clockid_t timeout_clock = CLOCK_REALTIME;
1603
1604 /* Number of threads ready to handle the READYLIST. */
1605 static unsigned long int nready;
1606
1607
1608 /* Function for the clean-up threads. */
1609 static void *
1610 __attribute__ ((__noreturn__))
1611 nscd_run_prune (void *p)
1612 {
1613 const long int my_number = (long int) p;
1614 assert (dbs[my_number].enabled);
1615
1616 int dont_need_update = setup_thread (&dbs[my_number]);
1617
1618 time_t now = time (NULL);
1619
1620 /* We are running. */
1621 dbs[my_number].head->timestamp = now;
1622
1623 struct timespec prune_ts;
1624 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1625 /* Should never happen. */
1626 abort ();
1627
1628 /* Compute the initial timeout time. Prevent all the timers to go
1629 off at the same time by adding a db-based value. */
1630 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1631 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1632
1633 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1634 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1635 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1636
1637 pthread_mutex_lock (prune_lock);
1638 while (1)
1639 {
1640 /* Wait, but not forever. */
1641 int e = 0;
1642 if (! dbs[my_number].clear_cache)
1643 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1644 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1645
1646 time_t next_wait;
1647 now = time (NULL);
1648 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1649 || dbs[my_number].clear_cache)
1650 {
1651 /* We will determine the new timout values based on the
1652 cache content. Should there be concurrent additions to
1653 the cache which are not accounted for in the cache
1654 pruning we want to know about it. Therefore set the
1655 timeout to the maximum. It will be descreased when adding
1656 new entries to the cache, if necessary. */
1657 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1658
1659 /* Unconditionally reset the flag. */
1660 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1661 dbs[my_number].clear_cache = 0;
1662
1663 pthread_mutex_unlock (prune_lock);
1664
1665 /* We use a separate lock for running the prune function (instead
1666 of keeping prune_lock locked) because this enables concurrent
1667 invocations of cache_add which might modify the timeout value. */
1668 pthread_mutex_lock (prune_run_lock);
1669 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1670 pthread_mutex_unlock (prune_run_lock);
1671
1672 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1673 /* If clients cannot determine for sure whether nscd is running
1674 we need to wake up occasionally to update the timestamp.
1675 Wait 90% of the update period. */
1676 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1677 if (__glibc_unlikely (! dont_need_update))
1678 {
1679 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1680 dbs[my_number].head->timestamp = now;
1681 }
1682
1683 pthread_mutex_lock (prune_lock);
1684
1685 /* Make it known when we will wake up again. */
1686 if (now + next_wait < dbs[my_number].wakeup_time)
1687 dbs[my_number].wakeup_time = now + next_wait;
1688 else
1689 next_wait = dbs[my_number].wakeup_time - now;
1690 }
1691 else
1692 /* The cache was just pruned. Do not do it again now. Just
1693 use the new timeout value. */
1694 next_wait = dbs[my_number].wakeup_time - now;
1695
1696 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1697 /* Should never happen. */
1698 abort ();
1699
1700 /* Compute next timeout time. */
1701 prune_ts.tv_sec += next_wait;
1702 }
1703 }
1704
1705
1706 /* This is the main loop. It is replicated in different threads but
1707 the use of the ready list makes sure only one thread handles an
1708 incoming connection. */
1709 static void *
1710 __attribute__ ((__noreturn__))
1711 nscd_run_worker (void *p)
1712 {
1713 char buf[256];
1714
1715 /* Initial locking. */
1716 pthread_mutex_lock (&readylist_lock);
1717
1718 /* One more thread available. */
1719 ++nready;
1720
1721 while (1)
1722 {
1723 while (readylist == NULL)
1724 pthread_cond_wait (&readylist_cond, &readylist_lock);
1725
1726 struct fdlist *it = readylist->next;
1727 if (readylist->next == readylist)
1728 /* Just one entry on the list. */
1729 readylist = NULL;
1730 else
1731 readylist->next = it->next;
1732
1733 /* Extract the information and mark the record ready to be used
1734 again. */
1735 int fd = it->fd;
1736 it->next = NULL;
1737
1738 /* One more thread available. */
1739 --nready;
1740
1741 /* We are done with the list. */
1742 pthread_mutex_unlock (&readylist_lock);
1743
1744 #ifndef __ASSUME_ACCEPT4
1745 if (have_accept4 < 0)
1746 {
1747 /* We do not want to block on a short read or so. */
1748 int fl = fcntl (fd, F_GETFL);
1749 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1750 goto close_and_out;
1751 }
1752 #endif
1753
1754 /* Now read the request. */
1755 request_header req;
1756 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1757 != sizeof (req), 0))
1758 {
1759 /* We failed to read data. Note that this also might mean we
1760 failed because we would have blocked. */
1761 if (debug_level > 0)
1762 dbg_log (_("short read while reading request: %s"),
1763 strerror_r (errno, buf, sizeof (buf)));
1764 goto close_and_out;
1765 }
1766
1767 /* Check whether this is a valid request type. */
1768 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1769 goto close_and_out;
1770
1771 /* Some systems have no SO_PEERCRED implementation. They don't
1772 care about security so we don't as well. */
1773 uid_t uid = -1;
1774 #ifdef SO_PEERCRED
1775 pid_t pid = 0;
1776
1777 if (__glibc_unlikely (debug_level > 0))
1778 {
1779 struct ucred caller;
1780 socklen_t optlen = sizeof (caller);
1781
1782 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1783 pid = caller.pid;
1784 }
1785 #else
1786 const pid_t pid = 0;
1787 #endif
1788
1789 /* It should not be possible to crash the nscd with a silly
1790 request (i.e., a terribly large key). We limit the size to 1kb. */
1791 if (__builtin_expect (req.key_len, 1) < 0
1792 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1793 {
1794 if (debug_level > 0)
1795 dbg_log (_("key length in request too long: %d"), req.key_len);
1796 }
1797 else
1798 {
1799 /* Get the key. */
1800 char keybuf[MAXKEYLEN + 1];
1801
1802 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1803 req.key_len))
1804 != req.key_len, 0))
1805 {
1806 /* Again, this can also mean we would have blocked. */
1807 if (debug_level > 0)
1808 dbg_log (_("short read while reading request key: %s"),
1809 strerror_r (errno, buf, sizeof (buf)));
1810 goto close_and_out;
1811 }
1812 keybuf[req.key_len] = '\0';
1813
1814 if (__builtin_expect (debug_level, 0) > 0)
1815 {
1816 #ifdef SO_PEERCRED
1817 if (pid != 0)
1818 dbg_log (_("\
1819 handle_request: request received (Version = %d) from PID %ld"),
1820 req.version, (long int) pid);
1821 else
1822 #endif
1823 dbg_log (_("\
1824 handle_request: request received (Version = %d)"), req.version);
1825 }
1826
1827 /* Phew, we got all the data, now process it. */
1828 handle_request (fd, &req, keybuf, uid, pid);
1829 }
1830
1831 close_and_out:
1832 /* We are done. */
1833 close (fd);
1834
1835 /* Re-locking. */
1836 pthread_mutex_lock (&readylist_lock);
1837
1838 /* One more thread available. */
1839 ++nready;
1840 }
1841 /* NOTREACHED */
1842 }
1843
1844
1845 static unsigned int nconns;
1846
1847 static void
1848 fd_ready (int fd)
1849 {
1850 pthread_mutex_lock (&readylist_lock);
1851
1852 /* Find an empty entry in FDLIST. */
1853 size_t inner;
1854 for (inner = 0; inner < nconns; ++inner)
1855 if (fdlist[inner].next == NULL)
1856 break;
1857 assert (inner < nconns);
1858
1859 fdlist[inner].fd = fd;
1860
1861 if (readylist == NULL)
1862 readylist = fdlist[inner].next = &fdlist[inner];
1863 else
1864 {
1865 fdlist[inner].next = readylist->next;
1866 readylist = readylist->next = &fdlist[inner];
1867 }
1868
1869 bool do_signal = true;
1870 if (__glibc_unlikely (nready == 0))
1871 {
1872 ++client_queued;
1873 do_signal = false;
1874
1875 /* Try to start another thread to help out. */
1876 pthread_t th;
1877 if (nthreads < max_nthreads
1878 && pthread_create (&th, &attr, nscd_run_worker,
1879 (void *) (long int) nthreads) == 0)
1880 {
1881 /* We got another thread. */
1882 ++nthreads;
1883 /* The new thread might need a kick. */
1884 do_signal = true;
1885 }
1886
1887 }
1888
1889 pthread_mutex_unlock (&readylist_lock);
1890
1891 /* Tell one of the worker threads there is work to do. */
1892 if (do_signal)
1893 pthread_cond_signal (&readylist_cond);
1894 }
1895
1896
1897 /* Check whether restarting should happen. */
1898 static bool
1899 restart_p (time_t now)
1900 {
1901 return (paranoia && readylist == NULL && nready == nthreads
1902 && now >= restart_time);
1903 }
1904
1905
1906 /* Array for times a connection was accepted. */
1907 static time_t *starttime;
1908
1909 #ifdef HAVE_INOTIFY
1910 /* Inotify event for changed file. */
1911 union __inev
1912 {
1913 struct inotify_event i;
1914 # ifndef PATH_MAX
1915 # define PATH_MAX 1024
1916 # endif
1917 char buf[sizeof (struct inotify_event) + PATH_MAX];
1918 };
1919
1920 /* Returns 0 if the file is there otherwise -1. */
1921 int
1922 check_file (struct traced_file *finfo)
1923 {
1924 struct stat64 st;
1925 /* We could check mtime and if different re-add
1926 the watches, and invalidate the database, but we
1927 don't because we are called from inotify_check_files
1928 which should be doing that work. If sufficient inotify
1929 events were lost then the next pruning or invalidation
1930 will do the stat and mtime check. We don't do it here to
1931 keep the logic simple. */
1932 if (stat64 (finfo->fname, &st) < 0)
1933 return -1;
1934 return 0;
1935 }
1936
1937 /* Process the inotify event in INEV. If the event matches any of the files
1938 registered with a database then mark that database as requiring its cache
1939 to be cleared. We indicate the cache needs clearing by setting
1940 TO_CLEAR[DBCNT] to true for the matching database. */
1941 static void
1942 inotify_check_files (bool *to_clear, union __inev *inev)
1943 {
1944 /* Check which of the files changed. */
1945 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1946 {
1947 struct traced_file *finfo = dbs[dbcnt].traced_files;
1948
1949 while (finfo != NULL)
1950 {
1951 /* The configuration file was moved or deleted.
1952 We stop watching it at that point, and reinitialize. */
1953 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1954 && ((inev->i.mask & IN_MOVE_SELF)
1955 || (inev->i.mask & IN_DELETE_SELF)
1956 || (inev->i.mask & IN_IGNORED)))
1957 {
1958 int ret;
1959 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1960
1961 if (check_file (finfo) == 0)
1962 {
1963 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1964 finfo->fname);
1965 return;
1966 }
1967
1968 dbg_log (_("monitored file `%s` was %s, removing watch"),
1969 finfo->fname, moved ? "moved" : "deleted");
1970 /* File was moved out, remove the watch. Watches are
1971 automatically removed when the file is deleted. */
1972 if (moved)
1973 {
1974 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1975 if (ret < 0)
1976 dbg_log (_("failed to remove file watch `%s`: %s"),
1977 finfo->fname, strerror (errno));
1978 }
1979 finfo->inotify_descr[TRACED_FILE] = -1;
1980 to_clear[dbcnt] = true;
1981 if (finfo->call_res_init)
1982 res_init ();
1983 return;
1984 }
1985 /* The configuration file was open for writing and has just closed.
1986 We reset the cache and reinitialize. */
1987 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1988 && inev->i.mask & IN_CLOSE_WRITE)
1989 {
1990 /* Mark cache as needing to be cleared and reinitialize. */
1991 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1992 to_clear[dbcnt] = true;
1993 if (finfo->call_res_init)
1994 res_init ();
1995 return;
1996 }
1997 /* The parent directory was moved or deleted. We trigger one last
1998 invalidation. At the next pruning or invalidation we may add
1999 this watch back if the file is present again. */
2000 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
2001 && ((inev->i.mask & IN_DELETE_SELF)
2002 || (inev->i.mask & IN_MOVE_SELF)
2003 || (inev->i.mask & IN_IGNORED)))
2004 {
2005 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
2006 /* The directory watch may have already been removed
2007 but we don't know so we just remove it again and
2008 ignore the error. Then we remove the file watch.
2009 Note: watches are automatically removed for deleted
2010 files. */
2011 if (moved)
2012 inotify_rm_watch (inotify_fd, inev->i.wd);
2013 if (finfo->inotify_descr[TRACED_FILE] != -1)
2014 {
2015 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
2016 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
2017 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
2018 dbg_log (_("failed to remove file watch `%s`: %s"),
2019 finfo->dname, strerror (errno));
2020 }
2021 finfo->inotify_descr[TRACED_FILE] = -1;
2022 finfo->inotify_descr[TRACED_DIR] = -1;
2023 to_clear[dbcnt] = true;
2024 if (finfo->call_res_init)
2025 res_init ();
2026 /* Continue to the next entry since this might be the
2027 parent directory for multiple registered files and
2028 we want to remove watches for all registered files. */
2029 continue;
2030 }
2031 /* The parent directory had a create or moved to event. */
2032 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
2033 && ((inev->i.mask & IN_MOVED_TO)
2034 || (inev->i.mask & IN_CREATE))
2035 && strcmp (inev->i.name, finfo->sfname) == 0)
2036 {
2037 /* We detected a directory change. We look for the creation
2038 of the file we are tracking or the move of the same file
2039 into the directory. */
2040 int ret;
2041 dbg_log (_("monitored file `%s` was %s, adding watch"),
2042 finfo->fname,
2043 inev->i.mask & IN_CREATE ? "created" : "moved into place");
2044 /* File was moved in or created. Regenerate the watch. */
2045 if (finfo->inotify_descr[TRACED_FILE] != -1)
2046 inotify_rm_watch (inotify_fd,
2047 finfo->inotify_descr[TRACED_FILE]);
2048
2049 ret = inotify_add_watch (inotify_fd,
2050 finfo->fname,
2051 TRACED_FILE_MASK);
2052 if (ret < 0)
2053 dbg_log (_("failed to add file watch `%s`: %s"),
2054 finfo->fname, strerror (errno));
2055
2056 finfo->inotify_descr[TRACED_FILE] = ret;
2057
2058 /* The file is new or moved so mark cache as needing to
2059 be cleared and reinitialize. */
2060 to_clear[dbcnt] = true;
2061 if (finfo->call_res_init)
2062 res_init ();
2063
2064 /* Done re-adding the watch. Don't return, we may still
2065 have other files in this same directory, same watch
2066 descriptor, and need to process them. */
2067 }
2068 /* Other events are ignored, and we move on to the next file. */
2069 finfo = finfo->next;
2070 }
2071 }
2072 }
2073
2074 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
2075 for the associated database, otherwise do nothing. The TO_CLEAR array must
2076 have LASTDB entries. */
2077 static inline void
2078 clear_db_cache (bool *to_clear)
2079 {
2080 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
2081 if (to_clear[dbcnt])
2082 {
2083 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
2084 dbs[dbcnt].clear_cache = 1;
2085 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
2086 pthread_cond_signal (&dbs[dbcnt].prune_cond);
2087 }
2088 }
2089
2090 int
2091 handle_inotify_events (void)
2092 {
2093 bool to_clear[lastdb] = { false, };
2094 union __inev inev;
2095
2096 /* Read all inotify events for files registered via
2097 register_traced_file(). */
2098 while (1)
2099 {
2100 /* Potentially read multiple events into buf. */
2101 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
2102 &inev.buf,
2103 sizeof (inev)));
2104 if (nb < (ssize_t) sizeof (struct inotify_event))
2105 {
2106 /* Not even 1 event. */
2107 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2108 return -1;
2109 /* Done reading events that are ready. */
2110 break;
2111 }
2112 /* Process all events. The normal inotify interface delivers
2113 complete events on a read and never a partial event. */
2114 char *eptr = &inev.buf[0];
2115 ssize_t count;
2116 while (1)
2117 {
2118 /* Check which of the files changed. */
2119 inotify_check_files (to_clear, &inev);
2120 count = sizeof (struct inotify_event) + inev.i.len;
2121 eptr += count;
2122 nb -= count;
2123 if (nb >= (ssize_t) sizeof (struct inotify_event))
2124 memcpy (&inev, eptr, nb);
2125 else
2126 break;
2127 }
2128 continue;
2129 }
2130 /* Actually perform the cache clearing. */
2131 clear_db_cache (to_clear);
2132 return 0;
2133 }
2134
2135 #endif
2136
2137 static void
2138 __attribute__ ((__noreturn__))
2139 main_loop_poll (void)
2140 {
2141 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2142 * sizeof (conns[0]));
2143
2144 conns[0].fd = sock;
2145 conns[0].events = POLLRDNORM;
2146 size_t nused = 1;
2147 size_t firstfree = 1;
2148
2149 #ifdef HAVE_INOTIFY
2150 if (inotify_fd != -1)
2151 {
2152 conns[1].fd = inotify_fd;
2153 conns[1].events = POLLRDNORM;
2154 nused = 2;
2155 firstfree = 2;
2156 }
2157 #endif
2158
2159 #ifdef HAVE_NETLINK
2160 size_t idx_nl_status_fd = 0;
2161 if (nl_status_fd != -1)
2162 {
2163 idx_nl_status_fd = nused;
2164 conns[nused].fd = nl_status_fd;
2165 conns[nused].events = POLLRDNORM;
2166 ++nused;
2167 firstfree = nused;
2168 }
2169 #endif
2170
2171 while (1)
2172 {
2173 /* Wait for any event. We wait at most a couple of seconds so
2174 that we can check whether we should close any of the accepted
2175 connections since we have not received a request. */
2176 #define MAX_ACCEPT_TIMEOUT 30
2177 #define MIN_ACCEPT_TIMEOUT 5
2178 #define MAIN_THREAD_TIMEOUT \
2179 (MAX_ACCEPT_TIMEOUT * 1000 \
2180 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2181
2182 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2183
2184 time_t now = time (NULL);
2185
2186 /* If there is a descriptor ready for reading or there is a new
2187 connection, process this now. */
2188 if (n > 0)
2189 {
2190 if (conns[0].revents != 0)
2191 {
2192 /* We have a new incoming connection. Accept the connection. */
2193 int fd;
2194
2195 #ifndef __ASSUME_ACCEPT4
2196 fd = -1;
2197 if (have_accept4 >= 0)
2198 #endif
2199 {
2200 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2201 SOCK_NONBLOCK));
2202 #ifndef __ASSUME_ACCEPT4
2203 if (have_accept4 == 0)
2204 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2205 #endif
2206 }
2207 #ifndef __ASSUME_ACCEPT4
2208 if (have_accept4 < 0)
2209 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2210 #endif
2211
2212 /* Use the descriptor if we have not reached the limit. */
2213 if (fd >= 0)
2214 {
2215 if (firstfree < nconns)
2216 {
2217 conns[firstfree].fd = fd;
2218 conns[firstfree].events = POLLRDNORM;
2219 starttime[firstfree] = now;
2220 if (firstfree >= nused)
2221 nused = firstfree + 1;
2222
2223 do
2224 ++firstfree;
2225 while (firstfree < nused && conns[firstfree].fd != -1);
2226 }
2227 else
2228 /* We cannot use the connection so close it. */
2229 close (fd);
2230 }
2231
2232 --n;
2233 }
2234
2235 size_t first = 1;
2236 #ifdef HAVE_INOTIFY
2237 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2238 {
2239 if (conns[1].revents != 0)
2240 {
2241 int ret;
2242 ret = handle_inotify_events ();
2243 if (ret == -1)
2244 {
2245 /* Something went wrong when reading the inotify
2246 data. Better disable inotify. */
2247 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2248 conns[1].fd = -1;
2249 firstfree = 1;
2250 if (nused == 2)
2251 nused = 1;
2252 close (inotify_fd);
2253 inotify_fd = -1;
2254 }
2255 --n;
2256 }
2257
2258 first = 2;
2259 }
2260 #endif
2261
2262 #ifdef HAVE_NETLINK
2263 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2264 {
2265 char buf[4096];
2266 /* Read all the data. We do not interpret it here. */
2267 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2268 sizeof (buf))) != -1)
2269 ;
2270
2271 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2272 = __bump_nl_timestamp ();
2273 }
2274 #endif
2275
2276 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2277 if (conns[cnt].revents != 0)
2278 {
2279 fd_ready (conns[cnt].fd);
2280
2281 /* Clean up the CONNS array. */
2282 conns[cnt].fd = -1;
2283 if (cnt < firstfree)
2284 firstfree = cnt;
2285 if (cnt == nused - 1)
2286 do
2287 --nused;
2288 while (conns[nused - 1].fd == -1);
2289
2290 --n;
2291 }
2292 }
2293
2294 /* Now find entries which have timed out. */
2295 assert (nused > 0);
2296
2297 /* We make the timeout length depend on the number of file
2298 descriptors currently used. */
2299 #define ACCEPT_TIMEOUT \
2300 (MAX_ACCEPT_TIMEOUT \
2301 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2302 time_t laststart = now - ACCEPT_TIMEOUT;
2303
2304 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2305 {
2306 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2307 {
2308 /* Remove the entry, it timed out. */
2309 (void) close (conns[cnt].fd);
2310 conns[cnt].fd = -1;
2311
2312 if (cnt < firstfree)
2313 firstfree = cnt;
2314 if (cnt == nused - 1)
2315 do
2316 --nused;
2317 while (conns[nused - 1].fd == -1);
2318 }
2319 }
2320
2321 if (restart_p (now))
2322 restart ();
2323 }
2324 }
2325
2326
2327 #ifdef HAVE_EPOLL
2328 static void
2329 main_loop_epoll (int efd)
2330 {
2331 struct epoll_event ev = { 0, };
2332 int nused = 1;
2333 size_t highest = 0;
2334
2335 /* Add the socket. */
2336 ev.events = EPOLLRDNORM;
2337 ev.data.fd = sock;
2338 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2339 /* We cannot use epoll. */
2340 return;
2341
2342 # ifdef HAVE_INOTIFY
2343 if (inotify_fd != -1)
2344 {
2345 ev.events = EPOLLRDNORM;
2346 ev.data.fd = inotify_fd;
2347 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2348 /* We cannot use epoll. */
2349 return;
2350 nused = 2;
2351 }
2352 # endif
2353
2354 # ifdef HAVE_NETLINK
2355 if (nl_status_fd != -1)
2356 {
2357 ev.events = EPOLLRDNORM;
2358 ev.data.fd = nl_status_fd;
2359 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2360 /* We cannot use epoll. */
2361 return;
2362 }
2363 # endif
2364
2365 while (1)
2366 {
2367 struct epoll_event revs[100];
2368 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2369
2370 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2371
2372 time_t now = time (NULL);
2373
2374 for (int cnt = 0; cnt < n; ++cnt)
2375 if (revs[cnt].data.fd == sock)
2376 {
2377 /* A new connection. */
2378 int fd;
2379
2380 # ifndef __ASSUME_ACCEPT4
2381 fd = -1;
2382 if (have_accept4 >= 0)
2383 # endif
2384 {
2385 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2386 SOCK_NONBLOCK));
2387 # ifndef __ASSUME_ACCEPT4
2388 if (have_accept4 == 0)
2389 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2390 # endif
2391 }
2392 # ifndef __ASSUME_ACCEPT4
2393 if (have_accept4 < 0)
2394 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2395 # endif
2396
2397 /* Use the descriptor if we have not reached the limit. */
2398 if (fd >= 0)
2399 {
2400 /* Try to add the new descriptor. */
2401 ev.data.fd = fd;
2402 if (fd >= nconns
2403 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2404 /* The descriptor is too large or something went
2405 wrong. Close the descriptor. */
2406 close (fd);
2407 else
2408 {
2409 /* Remember when we accepted the connection. */
2410 starttime[fd] = now;
2411
2412 if (fd > highest)
2413 highest = fd;
2414
2415 ++nused;
2416 }
2417 }
2418 }
2419 # ifdef HAVE_INOTIFY
2420 else if (revs[cnt].data.fd == inotify_fd)
2421 {
2422 int ret;
2423 ret = handle_inotify_events ();
2424 if (ret == -1)
2425 {
2426 /* Something went wrong when reading the inotify
2427 data. Better disable inotify. */
2428 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2429 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2430 close (inotify_fd);
2431 inotify_fd = -1;
2432 break;
2433 }
2434 }
2435 # endif
2436 # ifdef HAVE_NETLINK
2437 else if (revs[cnt].data.fd == nl_status_fd)
2438 {
2439 char buf[4096];
2440 /* Read all the data. We do not interpret it here. */
2441 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2442 sizeof (buf))) != -1)
2443 ;
2444
2445 __bump_nl_timestamp ();
2446 }
2447 # endif
2448 else
2449 {
2450 /* Remove the descriptor from the epoll descriptor. */
2451 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2452
2453 /* Get a worker to handle the request. */
2454 fd_ready (revs[cnt].data.fd);
2455
2456 /* Reset the time. */
2457 starttime[revs[cnt].data.fd] = 0;
2458 if (revs[cnt].data.fd == highest)
2459 do
2460 --highest;
2461 while (highest > 0 && starttime[highest] == 0);
2462
2463 --nused;
2464 }
2465
2466 /* Now look for descriptors for accepted connections which have
2467 no reply in too long of a time. */
2468 time_t laststart = now - ACCEPT_TIMEOUT;
2469 assert (starttime[sock] == 0);
2470 # ifdef HAVE_INOTIFY
2471 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2472 # endif
2473 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2474 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2475 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2476 {
2477 /* We are waiting for this one for too long. Close it. */
2478 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2479
2480 (void) close (cnt);
2481
2482 starttime[cnt] = 0;
2483 if (cnt == highest)
2484 --highest;
2485 }
2486 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2487 --highest;
2488
2489 if (restart_p (now))
2490 restart ();
2491 }
2492 }
2493 #endif
2494
2495
2496 /* Start all the threads we want. The initial process is thread no. 1. */
2497 void
2498 start_threads (void)
2499 {
2500 /* Initialize the conditional variable we will use. The only
2501 non-standard attribute we might use is the clock selection. */
2502 pthread_condattr_t condattr;
2503 pthread_condattr_init (&condattr);
2504
2505 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2506 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2507 /* Determine whether the monotonous clock is available. */
2508 struct timespec dummy;
2509 # if _POSIX_MONOTONIC_CLOCK == 0
2510 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2511 # endif
2512 # if _POSIX_CLOCK_SELECTION == 0
2513 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2514 # endif
2515 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2516 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2517 timeout_clock = CLOCK_MONOTONIC;
2518 #endif
2519
2520 /* Create the attribute for the threads. They are all created
2521 detached. */
2522 pthread_attr_init (&attr);
2523 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2524 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2525 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2526
2527 /* We allow less than LASTDB threads only for debugging. */
2528 if (debug_level == 0)
2529 nthreads = MAX (nthreads, lastdb);
2530
2531 /* Create the threads which prune the databases. */
2532 // XXX Ideally this work would be done by some of the worker threads.
2533 // XXX But this is problematic since we would need to be able to wake
2534 // XXX them up explicitly as well as part of the group handling the
2535 // XXX ready-list. This requires an operation where we can wait on
2536 // XXX two conditional variables at the same time. This operation
2537 // XXX does not exist (yet).
2538 for (long int i = 0; i < lastdb; ++i)
2539 {
2540 /* Initialize the conditional variable. */
2541 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2542 {
2543 dbg_log (_("could not initialize conditional variable"));
2544 do_exit (1, 0, NULL);
2545 }
2546
2547 pthread_t th;
2548 if (dbs[i].enabled
2549 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2550 {
2551 dbg_log (_("could not start clean-up thread; terminating"));
2552 do_exit (1, 0, NULL);
2553 }
2554 }
2555
2556 pthread_condattr_destroy (&condattr);
2557
2558 for (long int i = 0; i < nthreads; ++i)
2559 {
2560 pthread_t th;
2561 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2562 {
2563 if (i == 0)
2564 {
2565 dbg_log (_("could not start any worker thread; terminating"));
2566 do_exit (1, 0, NULL);
2567 }
2568
2569 break;
2570 }
2571 }
2572
2573 /* Now it is safe to let the parent know that we're doing fine and it can
2574 exit. */
2575 notify_parent (0);
2576
2577 /* Determine how much room for descriptors we should initially
2578 allocate. This might need to change later if we cap the number
2579 with MAXCONN. */
2580 const long int nfds = sysconf (_SC_OPEN_MAX);
2581 #define MINCONN 32
2582 #define MAXCONN 16384
2583 if (nfds == -1 || nfds > MAXCONN)
2584 nconns = MAXCONN;
2585 else if (nfds < MINCONN)
2586 nconns = MINCONN;
2587 else
2588 nconns = nfds;
2589
2590 /* We need memory to pass descriptors on to the worker threads. */
2591 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2592 /* Array to keep track when connection was accepted. */
2593 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2594
2595 /* In the main thread we execute the loop which handles incoming
2596 connections. */
2597 #ifdef HAVE_EPOLL
2598 int efd = epoll_create (100);
2599 if (efd != -1)
2600 {
2601 main_loop_epoll (efd);
2602 close (efd);
2603 }
2604 #endif
2605
2606 main_loop_poll ();
2607 }
2608
2609
2610 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2611 this function is called, we are not listening on the nscd socket yet so
2612 we can just use the ordinary lookup functions without causing a lockup */
2613 static void
2614 begin_drop_privileges (void)
2615 {
2616 struct passwd *pwd = getpwnam (server_user);
2617
2618 if (pwd == NULL)
2619 {
2620 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2621 do_exit (EXIT_FAILURE, 0,
2622 _("Failed to run nscd as user '%s'"), server_user);
2623 }
2624
2625 server_uid = pwd->pw_uid;
2626 server_gid = pwd->pw_gid;
2627
2628 /* Save the old UID/GID if we have to change back. */
2629 if (paranoia)
2630 {
2631 old_uid = getuid ();
2632 old_gid = getgid ();
2633 }
2634
2635 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2636 {
2637 /* This really must never happen. */
2638 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2639 do_exit (EXIT_FAILURE, errno,
2640 _("initial getgrouplist failed"));
2641 }
2642
2643 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2644
2645 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2646 == -1)
2647 {
2648 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2649 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2650 }
2651 }
2652
2653
2654 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2655 run nscd as the user specified in the configuration file. */
2656 static void
2657 finish_drop_privileges (void)
2658 {
2659 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2660 /* We need to preserve the capabilities to connect to the audit daemon. */
2661 cap_t new_caps = preserve_capabilities ();
2662 #endif
2663
2664 if (setgroups (server_ngroups, server_groups) == -1)
2665 {
2666 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2667 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2668 }
2669
2670 int res;
2671 if (paranoia)
2672 res = setresgid (server_gid, server_gid, old_gid);
2673 else
2674 res = setgid (server_gid);
2675 if (res == -1)
2676 {
2677 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2678 do_exit (4, errno, "setgid");
2679 }
2680
2681 if (paranoia)
2682 res = setresuid (server_uid, server_uid, old_uid);
2683 else
2684 res = setuid (server_uid);
2685 if (res == -1)
2686 {
2687 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2688 do_exit (4, errno, "setuid");
2689 }
2690
2691 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2692 /* Remove the temporary capabilities. */
2693 install_real_capabilities (new_caps);
2694 #endif
2695 }