]> git.ipfire.org Git - thirdparty/glibc.git/blob - nscd/connections.c
Fix Wundef warning for SEPARATE_KEY
[thirdparty/glibc.git] / nscd / connections.c
1 /* Inner loops of cache daemon.
2 Copyright (C) 1998-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
18
19 #include <alloca.h>
20 #include <assert.h>
21 #include <atomic.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <grp.h>
26 #include <ifaddrs.h>
27 #include <libintl.h>
28 #include <pthread.h>
29 #include <pwd.h>
30 #include <resolv.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <stdint.h>
35 #include <arpa/inet.h>
36 #ifdef HAVE_NETLINK
37 # include <linux/netlink.h>
38 # include <linux/rtnetlink.h>
39 #endif
40 #ifdef HAVE_EPOLL
41 # include <sys/epoll.h>
42 #endif
43 #ifdef HAVE_INOTIFY
44 # include <sys/inotify.h>
45 #endif
46 #include <sys/mman.h>
47 #include <sys/param.h>
48 #include <sys/poll.h>
49 #ifdef HAVE_SENDFILE
50 # include <sys/sendfile.h>
51 #endif
52 #include <sys/socket.h>
53 #include <sys/stat.h>
54 #include <sys/un.h>
55
56 #include "nscd.h"
57 #include "dbg_log.h"
58 #include "selinux.h"
59 #include <resolv/resolv.h>
60
61 #include <kernel-features.h>
62
63
64 /* Support to run nscd as an unprivileged user */
65 const char *server_user;
66 static uid_t server_uid;
67 static gid_t server_gid;
68 const char *stat_user;
69 uid_t stat_uid;
70 static gid_t *server_groups;
71 #ifndef NGROUPS
72 # define NGROUPS 32
73 #endif
74 static int server_ngroups;
75
76 static pthread_attr_t attr;
77
78 static void begin_drop_privileges (void);
79 static void finish_drop_privileges (void);
80
81 /* Map request type to a string. */
82 const char *const serv2str[LASTREQ] =
83 {
84 [GETPWBYNAME] = "GETPWBYNAME",
85 [GETPWBYUID] = "GETPWBYUID",
86 [GETGRBYNAME] = "GETGRBYNAME",
87 [GETGRBYGID] = "GETGRBYGID",
88 [GETHOSTBYNAME] = "GETHOSTBYNAME",
89 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
90 [GETHOSTBYADDR] = "GETHOSTBYADDR",
91 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
92 [SHUTDOWN] = "SHUTDOWN",
93 [GETSTAT] = "GETSTAT",
94 [INVALIDATE] = "INVALIDATE",
95 [GETFDPW] = "GETFDPW",
96 [GETFDGR] = "GETFDGR",
97 [GETFDHST] = "GETFDHST",
98 [GETAI] = "GETAI",
99 [INITGROUPS] = "INITGROUPS",
100 [GETSERVBYNAME] = "GETSERVBYNAME",
101 [GETSERVBYPORT] = "GETSERVBYPORT",
102 [GETFDSERV] = "GETFDSERV",
103 [GETNETGRENT] = "GETNETGRENT",
104 [INNETGR] = "INNETGR",
105 [GETFDNETGR] = "GETFDNETGR"
106 };
107
108 /* The control data structures for the services. */
109 struct database_dyn dbs[lastdb] =
110 {
111 [pwddb] = {
112 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
113 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
114 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
115 .enabled = 0,
116 .check_file = 1,
117 .persistent = 0,
118 .propagate = 1,
119 .shared = 0,
120 .max_db_size = DEFAULT_MAX_DB_SIZE,
121 .suggested_module = DEFAULT_SUGGESTED_MODULE,
122 .db_filename = _PATH_NSCD_PASSWD_DB,
123 .disabled_iov = &pwd_iov_disabled,
124 .postimeout = 3600,
125 .negtimeout = 20,
126 .wr_fd = -1,
127 .ro_fd = -1,
128 .mmap_used = false
129 },
130 [grpdb] = {
131 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
132 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
133 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
134 .enabled = 0,
135 .check_file = 1,
136 .persistent = 0,
137 .propagate = 1,
138 .shared = 0,
139 .max_db_size = DEFAULT_MAX_DB_SIZE,
140 .suggested_module = DEFAULT_SUGGESTED_MODULE,
141 .db_filename = _PATH_NSCD_GROUP_DB,
142 .disabled_iov = &grp_iov_disabled,
143 .postimeout = 3600,
144 .negtimeout = 60,
145 .wr_fd = -1,
146 .ro_fd = -1,
147 .mmap_used = false
148 },
149 [hstdb] = {
150 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
151 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
152 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
153 .enabled = 0,
154 .check_file = 1,
155 .persistent = 0,
156 .propagate = 0, /* Not used. */
157 .shared = 0,
158 .max_db_size = DEFAULT_MAX_DB_SIZE,
159 .suggested_module = DEFAULT_SUGGESTED_MODULE,
160 .db_filename = _PATH_NSCD_HOSTS_DB,
161 .disabled_iov = &hst_iov_disabled,
162 .postimeout = 3600,
163 .negtimeout = 20,
164 .wr_fd = -1,
165 .ro_fd = -1,
166 .mmap_used = false
167 },
168 [servdb] = {
169 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
170 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
171 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
172 .enabled = 0,
173 .check_file = 1,
174 .persistent = 0,
175 .propagate = 0, /* Not used. */
176 .shared = 0,
177 .max_db_size = DEFAULT_MAX_DB_SIZE,
178 .suggested_module = DEFAULT_SUGGESTED_MODULE,
179 .db_filename = _PATH_NSCD_SERVICES_DB,
180 .disabled_iov = &serv_iov_disabled,
181 .postimeout = 28800,
182 .negtimeout = 20,
183 .wr_fd = -1,
184 .ro_fd = -1,
185 .mmap_used = false
186 },
187 [netgrdb] = {
188 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
189 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
190 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
191 .enabled = 0,
192 .check_file = 1,
193 .persistent = 0,
194 .propagate = 0, /* Not used. */
195 .shared = 0,
196 .max_db_size = DEFAULT_MAX_DB_SIZE,
197 .suggested_module = DEFAULT_SUGGESTED_MODULE,
198 .db_filename = _PATH_NSCD_NETGROUP_DB,
199 .disabled_iov = &netgroup_iov_disabled,
200 .postimeout = 28800,
201 .negtimeout = 20,
202 .wr_fd = -1,
203 .ro_fd = -1,
204 .mmap_used = false
205 }
206 };
207
208
209 /* Mapping of request type to database. */
210 static struct
211 {
212 bool data_request;
213 struct database_dyn *db;
214 } const reqinfo[LASTREQ] =
215 {
216 [GETPWBYNAME] = { true, &dbs[pwddb] },
217 [GETPWBYUID] = { true, &dbs[pwddb] },
218 [GETGRBYNAME] = { true, &dbs[grpdb] },
219 [GETGRBYGID] = { true, &dbs[grpdb] },
220 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
221 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
222 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
224 [SHUTDOWN] = { false, NULL },
225 [GETSTAT] = { false, NULL },
226 [SHUTDOWN] = { false, NULL },
227 [GETFDPW] = { false, &dbs[pwddb] },
228 [GETFDGR] = { false, &dbs[grpdb] },
229 [GETFDHST] = { false, &dbs[hstdb] },
230 [GETAI] = { true, &dbs[hstdb] },
231 [INITGROUPS] = { true, &dbs[grpdb] },
232 [GETSERVBYNAME] = { true, &dbs[servdb] },
233 [GETSERVBYPORT] = { true, &dbs[servdb] },
234 [GETFDSERV] = { false, &dbs[servdb] },
235 [GETNETGRENT] = { true, &dbs[netgrdb] },
236 [INNETGR] = { true, &dbs[netgrdb] },
237 [GETFDNETGR] = { false, &dbs[netgrdb] }
238 };
239
240
241 /* Initial number of threads to use. */
242 int nthreads = -1;
243 /* Maximum number of threads to use. */
244 int max_nthreads = 32;
245
246 /* Socket for incoming connections. */
247 static int sock;
248
249 #ifdef HAVE_INOTIFY
250 /* Inotify descriptor. */
251 int inotify_fd = -1;
252 #endif
253
254 #ifdef HAVE_NETLINK
255 /* Descriptor for netlink status updates. */
256 static int nl_status_fd = -1;
257 #endif
258
259 #ifndef __ASSUME_SOCK_CLOEXEC
260 /* Negative if SOCK_CLOEXEC is not supported, positive if it is, zero
261 before be know the result. */
262 static int have_sock_cloexec;
263 #endif
264 #ifndef __ASSUME_ACCEPT4
265 static int have_accept4;
266 #endif
267
268 /* Number of times clients had to wait. */
269 unsigned long int client_queued;
270
271
272 ssize_t
273 writeall (int fd, const void *buf, size_t len)
274 {
275 size_t n = len;
276 ssize_t ret;
277 do
278 {
279 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
280 if (ret <= 0)
281 break;
282 buf = (const char *) buf + ret;
283 n -= ret;
284 }
285 while (n > 0);
286 return ret < 0 ? ret : len - n;
287 }
288
289
290 #ifdef HAVE_SENDFILE
291 ssize_t
292 sendfileall (int tofd, int fromfd, off_t off, size_t len)
293 {
294 ssize_t n = len;
295 ssize_t ret;
296
297 do
298 {
299 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
300 if (ret <= 0)
301 break;
302 n -= ret;
303 }
304 while (n > 0);
305 return ret < 0 ? ret : len - n;
306 }
307 #endif
308
309
310 enum usekey
311 {
312 use_not = 0,
313 /* The following three are not really used, they are symbolic constants. */
314 use_first = 16,
315 use_begin = 32,
316 use_end = 64,
317
318 use_he = 1,
319 use_he_begin = use_he | use_begin,
320 use_he_end = use_he | use_end,
321 use_data = 3,
322 use_data_begin = use_data | use_begin,
323 use_data_end = use_data | use_end,
324 use_data_first = use_data_begin | use_first
325 };
326
327
328 static int
329 check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
330 enum usekey use, ref_t start, size_t len)
331 {
332 assert (len >= 2);
333
334 if (start > first_free || start + len > first_free
335 || (start & BLOCK_ALIGN_M1))
336 return 0;
337
338 if (usemap[start] == use_not)
339 {
340 /* Add the start marker. */
341 usemap[start] = use | use_begin;
342 use &= ~use_first;
343
344 while (--len > 0)
345 if (usemap[++start] != use_not)
346 return 0;
347 else
348 usemap[start] = use;
349
350 /* Add the end marker. */
351 usemap[start] = use | use_end;
352 }
353 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
354 {
355 /* Hash entries can't be shared. */
356 if (use == use_he)
357 return 0;
358
359 usemap[start] |= (use & use_first);
360 use &= ~use_first;
361
362 while (--len > 1)
363 if (usemap[++start] != use)
364 return 0;
365
366 if (usemap[++start] != (use | use_end))
367 return 0;
368 }
369 else
370 /* Points to a wrong object or somewhere in the middle. */
371 return 0;
372
373 return 1;
374 }
375
376
377 /* Verify data in persistent database. */
378 static int
379 verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
380 {
381 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
382 || dbnr == netgrdb);
383
384 time_t now = time (NULL);
385
386 struct database_pers_head *head = mem;
387 struct database_pers_head head_copy = *head;
388
389 /* Check that the header that was read matches the head in the database. */
390 if (memcmp (head, readhead, sizeof (*head)) != 0)
391 return 0;
392
393 /* First some easy tests: make sure the database header is sane. */
394 if (head->version != DB_VERSION
395 || head->header_size != sizeof (*head)
396 /* We allow a timestamp to be one hour ahead of the current time.
397 This should cover daylight saving time changes. */
398 || head->timestamp > now + 60 * 60 + 60
399 || (head->gc_cycle & 1)
400 || head->module == 0
401 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
402 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
403 || head->first_free < 0
404 || head->first_free > head->data_size
405 || (head->first_free & BLOCK_ALIGN_M1) != 0
406 || head->maxnentries < 0
407 || head->maxnsearched < 0)
408 return 0;
409
410 uint8_t *usemap = calloc (head->first_free, 1);
411 if (usemap == NULL)
412 return 0;
413
414 const char *data = (char *) &head->array[roundup (head->module,
415 ALIGN / sizeof (ref_t))];
416
417 nscd_ssize_t he_cnt = 0;
418 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
419 {
420 ref_t trail = head->array[cnt];
421 ref_t work = trail;
422 int tick = 0;
423
424 while (work != ENDREF)
425 {
426 if (! check_use (data, head->first_free, usemap, use_he, work,
427 sizeof (struct hashentry)))
428 goto fail;
429
430 /* Now we know we can dereference the record. */
431 struct hashentry *here = (struct hashentry *) (data + work);
432
433 ++he_cnt;
434
435 /* Make sure the record is for this type of service. */
436 if (here->type >= LASTREQ
437 || reqinfo[here->type].db != &dbs[dbnr])
438 goto fail;
439
440 /* Validate boolean field value. */
441 if (here->first != false && here->first != true)
442 goto fail;
443
444 if (here->len < 0)
445 goto fail;
446
447 /* Now the data. */
448 if (here->packet < 0
449 || here->packet > head->first_free
450 || here->packet + sizeof (struct datahead) > head->first_free)
451 goto fail;
452
453 struct datahead *dh = (struct datahead *) (data + here->packet);
454
455 if (! check_use (data, head->first_free, usemap,
456 use_data | (here->first ? use_first : 0),
457 here->packet, dh->allocsize))
458 goto fail;
459
460 if (dh->allocsize < sizeof (struct datahead)
461 || dh->recsize > dh->allocsize
462 || (dh->notfound != false && dh->notfound != true)
463 || (dh->usable != false && dh->usable != true))
464 goto fail;
465
466 if (here->key < here->packet + sizeof (struct datahead)
467 || here->key > here->packet + dh->allocsize
468 || here->key + here->len > here->packet + dh->allocsize)
469 goto fail;
470
471 work = here->next;
472
473 if (work == trail)
474 /* A circular list, this must not happen. */
475 goto fail;
476 if (tick)
477 trail = ((struct hashentry *) (data + trail))->next;
478 tick = 1 - tick;
479 }
480 }
481
482 if (he_cnt != head->nentries)
483 goto fail;
484
485 /* See if all data and keys had at least one reference from
486 he->first == true hashentry. */
487 for (ref_t idx = 0; idx < head->first_free; ++idx)
488 {
489 if (usemap[idx] == use_data_begin)
490 goto fail;
491 }
492
493 /* Finally, make sure the database hasn't changed since the first test. */
494 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
495 goto fail;
496
497 free (usemap);
498 return 1;
499
500 fail:
501 free (usemap);
502 return 0;
503 }
504
505
506 #ifdef O_CLOEXEC
507 # define EXTRA_O_FLAGS O_CLOEXEC
508 #else
509 # define EXTRA_O_FLAGS 0
510 #endif
511
512
513 /* Initialize database information structures. */
514 void
515 nscd_init (void)
516 {
517 /* Look up unprivileged uid/gid/groups before we start listening on the
518 socket */
519 if (server_user != NULL)
520 begin_drop_privileges ();
521
522 if (nthreads == -1)
523 /* No configuration for this value, assume a default. */
524 nthreads = 4;
525
526 for (size_t cnt = 0; cnt < lastdb; ++cnt)
527 if (dbs[cnt].enabled)
528 {
529 pthread_rwlock_init (&dbs[cnt].lock, NULL);
530 pthread_mutex_init (&dbs[cnt].memlock, NULL);
531
532 if (dbs[cnt].persistent)
533 {
534 /* Try to open the appropriate file on disk. */
535 int fd = open (dbs[cnt].db_filename, O_RDWR | EXTRA_O_FLAGS);
536 if (fd != -1)
537 {
538 char *msg = NULL;
539 struct stat64 st;
540 void *mem;
541 size_t total;
542 struct database_pers_head head;
543 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
544 sizeof (head)));
545 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
546 {
547 fail_db_errno:
548 /* The code is single-threaded at this point so
549 using strerror is just fine. */
550 msg = strerror (errno);
551 fail_db:
552 dbg_log (_("invalid persistent database file \"%s\": %s"),
553 dbs[cnt].db_filename, msg);
554 unlink (dbs[cnt].db_filename);
555 }
556 else if (head.module == 0 && head.data_size == 0)
557 {
558 /* The file has been created, but the head has not
559 been initialized yet. */
560 msg = _("uninitialized header");
561 goto fail_db;
562 }
563 else if (head.header_size != (int) sizeof (head))
564 {
565 msg = _("header size does not match");
566 goto fail_db;
567 }
568 else if ((total = (sizeof (head)
569 + roundup (head.module * sizeof (ref_t),
570 ALIGN)
571 + head.data_size))
572 > st.st_size
573 || total < sizeof (head))
574 {
575 msg = _("file size does not match");
576 goto fail_db;
577 }
578 /* Note we map with the maximum size allowed for the
579 database. This is likely much larger than the
580 actual file size. This is OK on most OSes since
581 extensions of the underlying file will
582 automatically translate more pages available for
583 memory access. */
584 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
585 PROT_READ | PROT_WRITE,
586 MAP_SHARED, fd, 0))
587 == MAP_FAILED)
588 goto fail_db_errno;
589 else if (!verify_persistent_db (mem, &head, cnt))
590 {
591 munmap (mem, total);
592 msg = _("verification failed");
593 goto fail_db;
594 }
595 else
596 {
597 /* Success. We have the database. */
598 dbs[cnt].head = mem;
599 dbs[cnt].memsize = total;
600 dbs[cnt].data = (char *)
601 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
602 ALIGN / sizeof (ref_t))];
603 dbs[cnt].mmap_used = true;
604
605 if (dbs[cnt].suggested_module > head.module)
606 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
607 dbnames[cnt]);
608
609 dbs[cnt].wr_fd = fd;
610 fd = -1;
611 /* We also need a read-only descriptor. */
612 if (dbs[cnt].shared)
613 {
614 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
615 O_RDONLY | EXTRA_O_FLAGS);
616 if (dbs[cnt].ro_fd == -1)
617 dbg_log (_("\
618 cannot create read-only descriptor for \"%s\"; no mmap"),
619 dbs[cnt].db_filename);
620 }
621
622 // XXX Shall we test whether the descriptors actually
623 // XXX point to the same file?
624 }
625
626 /* Close the file descriptors in case something went
627 wrong in which case the variable have not been
628 assigned -1. */
629 if (fd != -1)
630 close (fd);
631 }
632 else if (errno == EACCES)
633 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
634 dbs[cnt].db_filename);
635 }
636
637 if (dbs[cnt].head == NULL)
638 {
639 /* No database loaded. Allocate the data structure,
640 possibly on disk. */
641 struct database_pers_head head;
642 size_t total = (sizeof (head)
643 + roundup (dbs[cnt].suggested_module
644 * sizeof (ref_t), ALIGN)
645 + (dbs[cnt].suggested_module
646 * DEFAULT_DATASIZE_PER_BUCKET));
647
648 /* Try to create the database. If we do not need a
649 persistent database create a temporary file. */
650 int fd;
651 int ro_fd = -1;
652 if (dbs[cnt].persistent)
653 {
654 fd = open (dbs[cnt].db_filename,
655 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | EXTRA_O_FLAGS,
656 S_IRUSR | S_IWUSR);
657 if (fd != -1 && dbs[cnt].shared)
658 ro_fd = open (dbs[cnt].db_filename,
659 O_RDONLY | EXTRA_O_FLAGS);
660 }
661 else
662 {
663 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
664 fd = mkostemp (fname, EXTRA_O_FLAGS);
665
666 /* We do not need the file name anymore after we
667 opened another file descriptor in read-only mode. */
668 if (fd != -1)
669 {
670 if (dbs[cnt].shared)
671 ro_fd = open (fname, O_RDONLY | EXTRA_O_FLAGS);
672
673 unlink (fname);
674 }
675 }
676
677 if (fd == -1)
678 {
679 if (errno == EEXIST)
680 {
681 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
682 dbnames[cnt], dbs[cnt].db_filename);
683 do_exit (1, 0, NULL);
684 }
685
686 if (dbs[cnt].persistent)
687 dbg_log (_("cannot create %s; no persistent database used"),
688 dbs[cnt].db_filename);
689 else
690 dbg_log (_("cannot create %s; no sharing possible"),
691 dbs[cnt].db_filename);
692
693 dbs[cnt].persistent = 0;
694 // XXX remember: no mmap
695 }
696 else
697 {
698 /* Tell the user if we could not create the read-only
699 descriptor. */
700 if (ro_fd == -1 && dbs[cnt].shared)
701 dbg_log (_("\
702 cannot create read-only descriptor for \"%s\"; no mmap"),
703 dbs[cnt].db_filename);
704
705 /* Before we create the header, initialize the hash
706 table. That way if we get interrupted while writing
707 the header we can recognize a partially initialized
708 database. */
709 size_t ps = sysconf (_SC_PAGESIZE);
710 char tmpbuf[ps];
711 assert (~ENDREF == 0);
712 memset (tmpbuf, '\xff', ps);
713
714 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
715 off_t offset = sizeof (head);
716
717 size_t towrite;
718 if (offset % ps != 0)
719 {
720 towrite = MIN (remaining, ps - (offset % ps));
721 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
722 goto write_fail;
723 offset += towrite;
724 remaining -= towrite;
725 }
726
727 while (remaining > ps)
728 {
729 if (pwrite (fd, tmpbuf, ps, offset) == -1)
730 goto write_fail;
731 offset += ps;
732 remaining -= ps;
733 }
734
735 if (remaining > 0
736 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
737 goto write_fail;
738
739 /* Create the header of the file. */
740 struct database_pers_head head =
741 {
742 .version = DB_VERSION,
743 .header_size = sizeof (head),
744 .module = dbs[cnt].suggested_module,
745 .data_size = (dbs[cnt].suggested_module
746 * DEFAULT_DATASIZE_PER_BUCKET),
747 .first_free = 0
748 };
749 void *mem;
750
751 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
752 != sizeof (head))
753 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
754 != 0)
755 || (mem = mmap (NULL, dbs[cnt].max_db_size,
756 PROT_READ | PROT_WRITE,
757 MAP_SHARED, fd, 0)) == MAP_FAILED)
758 {
759 write_fail:
760 unlink (dbs[cnt].db_filename);
761 dbg_log (_("cannot write to database file %s: %s"),
762 dbs[cnt].db_filename, strerror (errno));
763 dbs[cnt].persistent = 0;
764 }
765 else
766 {
767 /* Success. */
768 dbs[cnt].head = mem;
769 dbs[cnt].data = (char *)
770 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
771 ALIGN / sizeof (ref_t))];
772 dbs[cnt].memsize = total;
773 dbs[cnt].mmap_used = true;
774
775 /* Remember the descriptors. */
776 dbs[cnt].wr_fd = fd;
777 dbs[cnt].ro_fd = ro_fd;
778 fd = -1;
779 ro_fd = -1;
780 }
781
782 if (fd != -1)
783 close (fd);
784 if (ro_fd != -1)
785 close (ro_fd);
786 }
787 }
788
789 #if !defined O_CLOEXEC || !defined __ASSUME_O_CLOEXEC
790 /* We do not check here whether the O_CLOEXEC provided to the
791 open call was successful or not. The two fcntl calls are
792 only performed once each per process start-up and therefore
793 is not noticeable at all. */
794 if (paranoia
795 && ((dbs[cnt].wr_fd != -1
796 && fcntl (dbs[cnt].wr_fd, F_SETFD, FD_CLOEXEC) == -1)
797 || (dbs[cnt].ro_fd != -1
798 && fcntl (dbs[cnt].ro_fd, F_SETFD, FD_CLOEXEC) == -1)))
799 {
800 dbg_log (_("\
801 cannot set socket to close on exec: %s; disabling paranoia mode"),
802 strerror (errno));
803 paranoia = 0;
804 }
805 #endif
806
807 if (dbs[cnt].head == NULL)
808 {
809 /* We do not use the persistent database. Just
810 create an in-memory data structure. */
811 assert (! dbs[cnt].persistent);
812
813 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
814 + (dbs[cnt].suggested_module
815 * sizeof (ref_t)));
816 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
817 assert (~ENDREF == 0);
818 memset (dbs[cnt].head->array, '\xff',
819 dbs[cnt].suggested_module * sizeof (ref_t));
820 dbs[cnt].head->module = dbs[cnt].suggested_module;
821 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
822 * dbs[cnt].head->module);
823 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
824 dbs[cnt].head->first_free = 0;
825
826 dbs[cnt].shared = 0;
827 assert (dbs[cnt].ro_fd == -1);
828 }
829 }
830
831 /* Create the socket. */
832 #ifndef __ASSUME_SOCK_CLOEXEC
833 sock = -1;
834 if (have_sock_cloexec >= 0)
835 #endif
836 {
837 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
838 #ifndef __ASSUME_SOCK_CLOEXEC
839 if (have_sock_cloexec == 0)
840 have_sock_cloexec = sock != -1 || errno != EINVAL ? 1 : -1;
841 #endif
842 }
843 #ifndef __ASSUME_SOCK_CLOEXEC
844 if (have_sock_cloexec < 0)
845 sock = socket (AF_UNIX, SOCK_STREAM, 0);
846 #endif
847 if (sock < 0)
848 {
849 dbg_log (_("cannot open socket: %s"), strerror (errno));
850 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
851 }
852 /* Bind a name to the socket. */
853 struct sockaddr_un sock_addr;
854 sock_addr.sun_family = AF_UNIX;
855 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
856 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
857 {
858 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
859 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
860 }
861
862 #ifndef __ASSUME_SOCK_CLOEXEC
863 if (have_sock_cloexec < 0)
864 {
865 /* We don't want to get stuck on accept. */
866 int fl = fcntl (sock, F_GETFL);
867 if (fl == -1 || fcntl (sock, F_SETFL, fl | O_NONBLOCK) == -1)
868 {
869 dbg_log (_("cannot change socket to nonblocking mode: %s"),
870 strerror (errno));
871 do_exit (1, 0, NULL);
872 }
873
874 /* The descriptor needs to be closed on exec. */
875 if (paranoia && fcntl (sock, F_SETFD, FD_CLOEXEC) == -1)
876 {
877 dbg_log (_("cannot set socket to close on exec: %s"),
878 strerror (errno));
879 do_exit (1, 0, NULL);
880 }
881 }
882 #endif
883
884 /* Set permissions for the socket. */
885 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
886
887 /* Set the socket up to accept connections. */
888 if (listen (sock, SOMAXCONN) < 0)
889 {
890 dbg_log (_("cannot enable socket to accept connections: %s"),
891 strerror (errno));
892 do_exit (1, 0, NULL);
893 }
894
895 #ifdef HAVE_NETLINK
896 if (dbs[hstdb].enabled)
897 {
898 /* Try to open netlink socket to monitor network setting changes. */
899 nl_status_fd = socket (AF_NETLINK,
900 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
901 NETLINK_ROUTE);
902 if (nl_status_fd != -1)
903 {
904 struct sockaddr_nl snl;
905 memset (&snl, '\0', sizeof (snl));
906 snl.nl_family = AF_NETLINK;
907 /* XXX Is this the best set to use? */
908 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
909 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
910 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
911 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
912 | RTMGRP_IPV6_PREFIX);
913
914 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
915 {
916 close (nl_status_fd);
917 nl_status_fd = -1;
918 }
919 else
920 {
921 /* Start the timestamp process. */
922 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
923 = __bump_nl_timestamp ();
924
925 # ifndef __ASSUME_SOCK_CLOEXEC
926 if (have_sock_cloexec < 0)
927 {
928 /* We don't want to get stuck on accept. */
929 int fl = fcntl (nl_status_fd, F_GETFL);
930 if (fl == -1
931 || fcntl (nl_status_fd, F_SETFL, fl | O_NONBLOCK) == -1)
932 {
933 dbg_log (_("\
934 cannot change socket to nonblocking mode: %s"),
935 strerror (errno));
936 do_exit (1, 0, NULL);
937 }
938
939 /* The descriptor needs to be closed on exec. */
940 if (paranoia
941 && fcntl (nl_status_fd, F_SETFD, FD_CLOEXEC) == -1)
942 {
943 dbg_log (_("cannot set socket to close on exec: %s"),
944 strerror (errno));
945 do_exit (1, 0, NULL);
946 }
947 }
948 # endif
949 }
950 }
951 }
952 #endif
953
954 /* Change to unprivileged uid/gid/groups if specified in config file */
955 if (server_user != NULL)
956 finish_drop_privileges ();
957 }
958
959
960 /* Register the file in FINFO as a traced file for the database DBS[DBIX].
961
962 We support registering multiple files per database. Each call to
963 register_traced_file adds to the list of registered files.
964
965 When we prune the database, either through timeout or a request to
966 invalidate, we will check to see if any of the registered files has changed.
967 When we accept new connections to handle a cache request we will also
968 check to see if any of the registered files has changed.
969
970 If we have inotify support then we install an inotify fd to notify us of
971 file deletion or modification, both of which will require we invalidate
972 the cache for the database. Without inotify support we stat the file and
973 store st_mtime to determine if the file has been modified. */
974 void
975 register_traced_file (size_t dbidx, struct traced_file *finfo)
976 {
977 /* If the database is disabled or file checking is disabled
978 then ignore the registration. */
979 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
980 return;
981
982 if (__glibc_unlikely (debug_level > 0))
983 dbg_log (_("register trace file %s for database %s"),
984 finfo->fname, dbnames[dbidx]);
985
986 #ifdef HAVE_INOTIFY
987 if (inotify_fd < 0
988 || (finfo->inotify_descr = inotify_add_watch (inotify_fd, finfo->fname,
989 IN_DELETE_SELF
990 | IN_MODIFY)) < 0)
991 #endif
992 {
993 /* We need the modification date of the file. */
994 struct stat64 st;
995
996 if (stat64 (finfo->fname, &st) < 0)
997 {
998 /* We cannot stat() the file, disable file checking. */
999 dbg_log (_("cannot stat() file `%s': %s"),
1000 finfo->fname, strerror (errno));
1001 return;
1002 }
1003
1004 finfo->inotify_descr = -1;
1005 finfo->mtime = st.st_mtime;
1006 }
1007
1008 /* Queue up the file name. */
1009 finfo->next = dbs[dbidx].traced_files;
1010 dbs[dbidx].traced_files = finfo;
1011 }
1012
1013
1014 /* Close the connections. */
1015 void
1016 close_sockets (void)
1017 {
1018 close (sock);
1019 }
1020
1021
1022 static void
1023 invalidate_cache (char *key, int fd)
1024 {
1025 dbtype number;
1026 int32_t resp;
1027
1028 for (number = pwddb; number < lastdb; ++number)
1029 if (strcmp (key, dbnames[number]) == 0)
1030 {
1031 if (number == hstdb)
1032 {
1033 struct traced_file *runp = dbs[hstdb].traced_files;
1034 while (runp != NULL)
1035 if (runp->call_res_init)
1036 {
1037 res_init ();
1038 break;
1039 }
1040 else
1041 runp = runp->next;
1042 }
1043 break;
1044 }
1045
1046 if (number == lastdb)
1047 {
1048 resp = EINVAL;
1049 writeall (fd, &resp, sizeof (resp));
1050 return;
1051 }
1052
1053 if (dbs[number].enabled)
1054 {
1055 pthread_mutex_lock (&dbs[number].prune_run_lock);
1056 prune_cache (&dbs[number], LONG_MAX, fd);
1057 pthread_mutex_unlock (&dbs[number].prune_run_lock);
1058 }
1059 else
1060 {
1061 resp = 0;
1062 writeall (fd, &resp, sizeof (resp));
1063 }
1064 }
1065
1066
1067 #ifdef SCM_RIGHTS
1068 static void
1069 send_ro_fd (struct database_dyn *db, char *key, int fd)
1070 {
1071 /* If we do not have an read-only file descriptor do nothing. */
1072 if (db->ro_fd == -1)
1073 return;
1074
1075 /* We need to send some data along with the descriptor. */
1076 uint64_t mapsize = (db->head->data_size
1077 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1078 + sizeof (struct database_pers_head));
1079 struct iovec iov[2];
1080 iov[0].iov_base = key;
1081 iov[0].iov_len = strlen (key) + 1;
1082 iov[1].iov_base = &mapsize;
1083 iov[1].iov_len = sizeof (mapsize);
1084
1085 /* Prepare the control message to transfer the descriptor. */
1086 union
1087 {
1088 struct cmsghdr hdr;
1089 char bytes[CMSG_SPACE (sizeof (int))];
1090 } buf;
1091 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1092 .msg_control = buf.bytes,
1093 .msg_controllen = sizeof (buf) };
1094 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1095
1096 cmsg->cmsg_level = SOL_SOCKET;
1097 cmsg->cmsg_type = SCM_RIGHTS;
1098 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1099
1100 int *ip = (int *) CMSG_DATA (cmsg);
1101 *ip = db->ro_fd;
1102
1103 msg.msg_controllen = cmsg->cmsg_len;
1104
1105 /* Send the control message. We repeat when we are interrupted but
1106 everything else is ignored. */
1107 #ifndef MSG_NOSIGNAL
1108 # define MSG_NOSIGNAL 0
1109 #endif
1110 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1111
1112 if (__glibc_unlikely (debug_level > 0))
1113 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1114 }
1115 #endif /* SCM_RIGHTS */
1116
1117
1118 /* Handle new request. */
1119 static void
1120 handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1121 {
1122 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1123 {
1124 if (debug_level > 0)
1125 dbg_log (_("\
1126 cannot handle old request version %d; current version is %d"),
1127 req->version, NSCD_VERSION);
1128 return;
1129 }
1130
1131 /* Perform the SELinux check before we go on to the standard checks. */
1132 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
1133 {
1134 if (debug_level > 0)
1135 {
1136 #ifdef SO_PEERCRED
1137 # ifdef PATH_MAX
1138 char buf[PATH_MAX];
1139 # else
1140 char buf[4096];
1141 # endif
1142
1143 snprintf (buf, sizeof (buf), "/proc/%ld/exe", (long int) pid);
1144 ssize_t n = readlink (buf, buf, sizeof (buf) - 1);
1145
1146 if (n <= 0)
1147 dbg_log (_("\
1148 request from %ld not handled due to missing permission"), (long int) pid);
1149 else
1150 {
1151 buf[n] = '\0';
1152 dbg_log (_("\
1153 request from '%s' [%ld] not handled due to missing permission"),
1154 buf, (long int) pid);
1155 }
1156 #else
1157 dbg_log (_("request not handled due to missing permission"));
1158 #endif
1159 }
1160 return;
1161 }
1162
1163 struct database_dyn *db = reqinfo[req->type].db;
1164
1165 /* See whether we can service the request from the cache. */
1166 if (__builtin_expect (reqinfo[req->type].data_request, true))
1167 {
1168 if (__builtin_expect (debug_level, 0) > 0)
1169 {
1170 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1171 {
1172 char buf[INET6_ADDRSTRLEN];
1173
1174 dbg_log ("\t%s (%s)", serv2str[req->type],
1175 inet_ntop (req->type == GETHOSTBYADDR
1176 ? AF_INET : AF_INET6,
1177 key, buf, sizeof (buf)));
1178 }
1179 else
1180 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1181 }
1182
1183 /* Is this service enabled? */
1184 if (__glibc_unlikely (!db->enabled))
1185 {
1186 /* No, sent the prepared record. */
1187 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1188 db->disabled_iov->iov_len,
1189 MSG_NOSIGNAL))
1190 != (ssize_t) db->disabled_iov->iov_len
1191 && __builtin_expect (debug_level, 0) > 0)
1192 {
1193 /* We have problems sending the result. */
1194 char buf[256];
1195 dbg_log (_("cannot write result: %s"),
1196 strerror_r (errno, buf, sizeof (buf)));
1197 }
1198
1199 return;
1200 }
1201
1202 /* Be sure we can read the data. */
1203 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1204 {
1205 ++db->head->rdlockdelayed;
1206 pthread_rwlock_rdlock (&db->lock);
1207 }
1208
1209 /* See whether we can handle it from the cache. */
1210 struct datahead *cached;
1211 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1212 db, uid);
1213 if (cached != NULL)
1214 {
1215 /* Hurray it's in the cache. */
1216 ssize_t nwritten;
1217
1218 #ifdef HAVE_SENDFILE
1219 if (__glibc_likely (db->mmap_used))
1220 {
1221 assert (db->wr_fd != -1);
1222 assert ((char *) cached->data > (char *) db->data);
1223 assert ((char *) cached->data - (char *) db->head
1224 + cached->recsize
1225 <= (sizeof (struct database_pers_head)
1226 + db->head->module * sizeof (ref_t)
1227 + db->head->data_size));
1228 nwritten = sendfileall (fd, db->wr_fd,
1229 (char *) cached->data
1230 - (char *) db->head, cached->recsize);
1231 # ifndef __ASSUME_SENDFILE
1232 if (nwritten == -1 && errno == ENOSYS)
1233 goto use_write;
1234 # endif
1235 }
1236 else
1237 # ifndef __ASSUME_SENDFILE
1238 use_write:
1239 # endif
1240 #endif
1241 nwritten = writeall (fd, cached->data, cached->recsize);
1242
1243 if (nwritten != cached->recsize
1244 && __builtin_expect (debug_level, 0) > 0)
1245 {
1246 /* We have problems sending the result. */
1247 char buf[256];
1248 dbg_log (_("cannot write result: %s"),
1249 strerror_r (errno, buf, sizeof (buf)));
1250 }
1251
1252 pthread_rwlock_unlock (&db->lock);
1253
1254 return;
1255 }
1256
1257 pthread_rwlock_unlock (&db->lock);
1258 }
1259 else if (__builtin_expect (debug_level, 0) > 0)
1260 {
1261 if (req->type == INVALIDATE)
1262 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
1263 else
1264 dbg_log ("\t%s", serv2str[req->type]);
1265 }
1266
1267 /* Handle the request. */
1268 switch (req->type)
1269 {
1270 case GETPWBYNAME:
1271 addpwbyname (db, fd, req, key, uid);
1272 break;
1273
1274 case GETPWBYUID:
1275 addpwbyuid (db, fd, req, key, uid);
1276 break;
1277
1278 case GETGRBYNAME:
1279 addgrbyname (db, fd, req, key, uid);
1280 break;
1281
1282 case GETGRBYGID:
1283 addgrbygid (db, fd, req, key, uid);
1284 break;
1285
1286 case GETHOSTBYNAME:
1287 addhstbyname (db, fd, req, key, uid);
1288 break;
1289
1290 case GETHOSTBYNAMEv6:
1291 addhstbynamev6 (db, fd, req, key, uid);
1292 break;
1293
1294 case GETHOSTBYADDR:
1295 addhstbyaddr (db, fd, req, key, uid);
1296 break;
1297
1298 case GETHOSTBYADDRv6:
1299 addhstbyaddrv6 (db, fd, req, key, uid);
1300 break;
1301
1302 case GETAI:
1303 addhstai (db, fd, req, key, uid);
1304 break;
1305
1306 case INITGROUPS:
1307 addinitgroups (db, fd, req, key, uid);
1308 break;
1309
1310 case GETSERVBYNAME:
1311 addservbyname (db, fd, req, key, uid);
1312 break;
1313
1314 case GETSERVBYPORT:
1315 addservbyport (db, fd, req, key, uid);
1316 break;
1317
1318 case GETNETGRENT:
1319 addgetnetgrent (db, fd, req, key, uid);
1320 break;
1321
1322 case INNETGR:
1323 addinnetgr (db, fd, req, key, uid);
1324 break;
1325
1326 case GETSTAT:
1327 case SHUTDOWN:
1328 case INVALIDATE:
1329 {
1330 /* Get the callers credentials. */
1331 #ifdef SO_PEERCRED
1332 struct ucred caller;
1333 socklen_t optlen = sizeof (caller);
1334
1335 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1336 {
1337 char buf[256];
1338
1339 dbg_log (_("error getting caller's id: %s"),
1340 strerror_r (errno, buf, sizeof (buf)));
1341 break;
1342 }
1343
1344 uid = caller.uid;
1345 #else
1346 /* Some systems have no SO_PEERCRED implementation. They don't
1347 care about security so we don't as well. */
1348 uid = 0;
1349 #endif
1350 }
1351
1352 /* Accept shutdown, getstat and invalidate only from root. For
1353 the stat call also allow the user specified in the config file. */
1354 if (req->type == GETSTAT)
1355 {
1356 if (uid == 0 || uid == stat_uid)
1357 send_stats (fd, dbs);
1358 }
1359 else if (uid == 0)
1360 {
1361 if (req->type == INVALIDATE)
1362 invalidate_cache (key, fd);
1363 else
1364 termination_handler (0);
1365 }
1366 break;
1367
1368 case GETFDPW:
1369 case GETFDGR:
1370 case GETFDHST:
1371 case GETFDSERV:
1372 case GETFDNETGR:
1373 #ifdef SCM_RIGHTS
1374 send_ro_fd (reqinfo[req->type].db, key, fd);
1375 #endif
1376 break;
1377
1378 default:
1379 /* Ignore the command, it's nothing we know. */
1380 break;
1381 }
1382 }
1383
1384
1385 /* Restart the process. */
1386 static void
1387 restart (void)
1388 {
1389 /* First determine the parameters. We do not use the parameters
1390 passed to main() since in case nscd is started by running the
1391 dynamic linker this will not work. Yes, this is not the usual
1392 case but nscd is part of glibc and we occasionally do this. */
1393 size_t buflen = 1024;
1394 char *buf = alloca (buflen);
1395 size_t readlen = 0;
1396 int fd = open ("/proc/self/cmdline", O_RDONLY);
1397 if (fd == -1)
1398 {
1399 dbg_log (_("\
1400 cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1401 strerror (errno));
1402
1403 paranoia = 0;
1404 return;
1405 }
1406
1407 while (1)
1408 {
1409 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1410 buflen - readlen));
1411 if (n == -1)
1412 {
1413 dbg_log (_("\
1414 cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
1415 strerror (errno));
1416
1417 close (fd);
1418 paranoia = 0;
1419 return;
1420 }
1421
1422 readlen += n;
1423
1424 if (readlen < buflen)
1425 break;
1426
1427 /* We might have to extend the buffer. */
1428 size_t old_buflen = buflen;
1429 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1430 buf = memmove (newp, buf, old_buflen);
1431 }
1432
1433 close (fd);
1434
1435 /* Parse the command line. Worst case scenario: every two
1436 characters form one parameter (one character plus NUL). */
1437 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1438 int argc = 0;
1439
1440 char *cp = buf;
1441 while (cp < buf + readlen)
1442 {
1443 argv[argc++] = cp;
1444 cp = (char *) rawmemchr (cp, '\0') + 1;
1445 }
1446 argv[argc] = NULL;
1447
1448 /* Second, change back to the old user if we changed it. */
1449 if (server_user != NULL)
1450 {
1451 if (setresuid (old_uid, old_uid, old_uid) != 0)
1452 {
1453 dbg_log (_("\
1454 cannot change to old UID: %s; disabling paranoia mode"),
1455 strerror (errno));
1456
1457 paranoia = 0;
1458 return;
1459 }
1460
1461 if (setresgid (old_gid, old_gid, old_gid) != 0)
1462 {
1463 dbg_log (_("\
1464 cannot change to old GID: %s; disabling paranoia mode"),
1465 strerror (errno));
1466
1467 setuid (server_uid);
1468 paranoia = 0;
1469 return;
1470 }
1471 }
1472
1473 /* Next change back to the old working directory. */
1474 if (chdir (oldcwd) == -1)
1475 {
1476 dbg_log (_("\
1477 cannot change to old working directory: %s; disabling paranoia mode"),
1478 strerror (errno));
1479
1480 if (server_user != NULL)
1481 {
1482 setuid (server_uid);
1483 setgid (server_gid);
1484 }
1485 paranoia = 0;
1486 return;
1487 }
1488
1489 /* Synchronize memory. */
1490 int32_t certainly[lastdb];
1491 for (int cnt = 0; cnt < lastdb; ++cnt)
1492 if (dbs[cnt].enabled)
1493 {
1494 /* Make sure nobody keeps using the database. */
1495 dbs[cnt].head->timestamp = 0;
1496 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1497 dbs[cnt].head->nscd_certainly_running = 0;
1498
1499 if (dbs[cnt].persistent)
1500 // XXX async OK?
1501 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1502 }
1503
1504 /* The preparations are done. */
1505 #ifdef PATH_MAX
1506 char pathbuf[PATH_MAX];
1507 #else
1508 char pathbuf[256];
1509 #endif
1510 /* Try to exec the real nscd program so the process name (as reported
1511 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1512 if readlink or the exec with the result of the readlink call fails. */
1513 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
1514 if (n != -1)
1515 {
1516 pathbuf[n] = '\0';
1517 execv (pathbuf, argv);
1518 }
1519 execv ("/proc/self/exe", argv);
1520
1521 /* If we come here, we will never be able to re-exec. */
1522 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1523 strerror (errno));
1524
1525 if (server_user != NULL)
1526 {
1527 setuid (server_uid);
1528 setgid (server_gid);
1529 }
1530 if (chdir ("/") != 0)
1531 dbg_log (_("cannot change current working directory to \"/\": %s"),
1532 strerror (errno));
1533 paranoia = 0;
1534
1535 /* Reenable the databases. */
1536 time_t now = time (NULL);
1537 for (int cnt = 0; cnt < lastdb; ++cnt)
1538 if (dbs[cnt].enabled)
1539 {
1540 dbs[cnt].head->timestamp = now;
1541 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1542 }
1543 }
1544
1545
1546 /* List of file descriptors. */
1547 struct fdlist
1548 {
1549 int fd;
1550 struct fdlist *next;
1551 };
1552 /* Memory allocated for the list. */
1553 static struct fdlist *fdlist;
1554 /* List of currently ready-to-read file descriptors. */
1555 static struct fdlist *readylist;
1556
1557 /* Conditional variable and mutex to signal availability of entries in
1558 READYLIST. The condvar is initialized dynamically since we might
1559 use a different clock depending on availability. */
1560 static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1561 static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1562
1563 /* The clock to use with the condvar. */
1564 static clockid_t timeout_clock = CLOCK_REALTIME;
1565
1566 /* Number of threads ready to handle the READYLIST. */
1567 static unsigned long int nready;
1568
1569
1570 /* Function for the clean-up threads. */
1571 static void *
1572 __attribute__ ((__noreturn__))
1573 nscd_run_prune (void *p)
1574 {
1575 const long int my_number = (long int) p;
1576 assert (dbs[my_number].enabled);
1577
1578 int dont_need_update = setup_thread (&dbs[my_number]);
1579
1580 time_t now = time (NULL);
1581
1582 /* We are running. */
1583 dbs[my_number].head->timestamp = now;
1584
1585 struct timespec prune_ts;
1586 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1587 /* Should never happen. */
1588 abort ();
1589
1590 /* Compute the initial timeout time. Prevent all the timers to go
1591 off at the same time by adding a db-based value. */
1592 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1593 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1594
1595 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1596 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1597 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1598
1599 pthread_mutex_lock (prune_lock);
1600 while (1)
1601 {
1602 /* Wait, but not forever. */
1603 int e = 0;
1604 if (! dbs[my_number].clear_cache)
1605 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
1606 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1607
1608 time_t next_wait;
1609 now = time (NULL);
1610 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1611 || dbs[my_number].clear_cache)
1612 {
1613 /* We will determine the new timout values based on the
1614 cache content. Should there be concurrent additions to
1615 the cache which are not accounted for in the cache
1616 pruning we want to know about it. Therefore set the
1617 timeout to the maximum. It will be descreased when adding
1618 new entries to the cache, if necessary. */
1619 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1620
1621 /* Unconditionally reset the flag. */
1622 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1623 dbs[my_number].clear_cache = 0;
1624
1625 pthread_mutex_unlock (prune_lock);
1626
1627 /* We use a separate lock for running the prune function (instead
1628 of keeping prune_lock locked) because this enables concurrent
1629 invocations of cache_add which might modify the timeout value. */
1630 pthread_mutex_lock (prune_run_lock);
1631 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
1632 pthread_mutex_unlock (prune_run_lock);
1633
1634 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1635 /* If clients cannot determine for sure whether nscd is running
1636 we need to wake up occasionally to update the timestamp.
1637 Wait 90% of the update period. */
1638 #define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1639 if (__glibc_unlikely (! dont_need_update))
1640 {
1641 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1642 dbs[my_number].head->timestamp = now;
1643 }
1644
1645 pthread_mutex_lock (prune_lock);
1646
1647 /* Make it known when we will wake up again. */
1648 if (now + next_wait < dbs[my_number].wakeup_time)
1649 dbs[my_number].wakeup_time = now + next_wait;
1650 else
1651 next_wait = dbs[my_number].wakeup_time - now;
1652 }
1653 else
1654 /* The cache was just pruned. Do not do it again now. Just
1655 use the new timeout value. */
1656 next_wait = dbs[my_number].wakeup_time - now;
1657
1658 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1659 /* Should never happen. */
1660 abort ();
1661
1662 /* Compute next timeout time. */
1663 prune_ts.tv_sec += next_wait;
1664 }
1665 }
1666
1667
1668 /* This is the main loop. It is replicated in different threads but
1669 the use of the ready list makes sure only one thread handles an
1670 incoming connection. */
1671 static void *
1672 __attribute__ ((__noreturn__))
1673 nscd_run_worker (void *p)
1674 {
1675 char buf[256];
1676
1677 /* Initial locking. */
1678 pthread_mutex_lock (&readylist_lock);
1679
1680 /* One more thread available. */
1681 ++nready;
1682
1683 while (1)
1684 {
1685 while (readylist == NULL)
1686 pthread_cond_wait (&readylist_cond, &readylist_lock);
1687
1688 struct fdlist *it = readylist->next;
1689 if (readylist->next == readylist)
1690 /* Just one entry on the list. */
1691 readylist = NULL;
1692 else
1693 readylist->next = it->next;
1694
1695 /* Extract the information and mark the record ready to be used
1696 again. */
1697 int fd = it->fd;
1698 it->next = NULL;
1699
1700 /* One more thread available. */
1701 --nready;
1702
1703 /* We are done with the list. */
1704 pthread_mutex_unlock (&readylist_lock);
1705
1706 #ifndef __ASSUME_ACCEPT4
1707 if (have_accept4 < 0)
1708 {
1709 /* We do not want to block on a short read or so. */
1710 int fl = fcntl (fd, F_GETFL);
1711 if (fl == -1 || fcntl (fd, F_SETFL, fl | O_NONBLOCK) == -1)
1712 goto close_and_out;
1713 }
1714 #endif
1715
1716 /* Now read the request. */
1717 request_header req;
1718 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1719 != sizeof (req), 0))
1720 {
1721 /* We failed to read data. Note that this also might mean we
1722 failed because we would have blocked. */
1723 if (debug_level > 0)
1724 dbg_log (_("short read while reading request: %s"),
1725 strerror_r (errno, buf, sizeof (buf)));
1726 goto close_and_out;
1727 }
1728
1729 /* Check whether this is a valid request type. */
1730 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1731 goto close_and_out;
1732
1733 /* Some systems have no SO_PEERCRED implementation. They don't
1734 care about security so we don't as well. */
1735 uid_t uid = -1;
1736 #ifdef SO_PEERCRED
1737 pid_t pid = 0;
1738
1739 if (__glibc_unlikely (debug_level > 0))
1740 {
1741 struct ucred caller;
1742 socklen_t optlen = sizeof (caller);
1743
1744 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1745 pid = caller.pid;
1746 }
1747 #else
1748 const pid_t pid = 0;
1749 #endif
1750
1751 /* It should not be possible to crash the nscd with a silly
1752 request (i.e., a terribly large key). We limit the size to 1kb. */
1753 if (__builtin_expect (req.key_len, 1) < 0
1754 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1755 {
1756 if (debug_level > 0)
1757 dbg_log (_("key length in request too long: %d"), req.key_len);
1758 }
1759 else
1760 {
1761 /* Get the key. */
1762 char keybuf[MAXKEYLEN + 1];
1763
1764 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1765 req.key_len))
1766 != req.key_len, 0))
1767 {
1768 /* Again, this can also mean we would have blocked. */
1769 if (debug_level > 0)
1770 dbg_log (_("short read while reading request key: %s"),
1771 strerror_r (errno, buf, sizeof (buf)));
1772 goto close_and_out;
1773 }
1774 keybuf[req.key_len] = '\0';
1775
1776 if (__builtin_expect (debug_level, 0) > 0)
1777 {
1778 #ifdef SO_PEERCRED
1779 if (pid != 0)
1780 dbg_log (_("\
1781 handle_request: request received (Version = %d) from PID %ld"),
1782 req.version, (long int) pid);
1783 else
1784 #endif
1785 dbg_log (_("\
1786 handle_request: request received (Version = %d)"), req.version);
1787 }
1788
1789 /* Phew, we got all the data, now process it. */
1790 handle_request (fd, &req, keybuf, uid, pid);
1791 }
1792
1793 close_and_out:
1794 /* We are done. */
1795 close (fd);
1796
1797 /* Re-locking. */
1798 pthread_mutex_lock (&readylist_lock);
1799
1800 /* One more thread available. */
1801 ++nready;
1802 }
1803 /* NOTREACHED */
1804 }
1805
1806
1807 static unsigned int nconns;
1808
1809 static void
1810 fd_ready (int fd)
1811 {
1812 pthread_mutex_lock (&readylist_lock);
1813
1814 /* Find an empty entry in FDLIST. */
1815 size_t inner;
1816 for (inner = 0; inner < nconns; ++inner)
1817 if (fdlist[inner].next == NULL)
1818 break;
1819 assert (inner < nconns);
1820
1821 fdlist[inner].fd = fd;
1822
1823 if (readylist == NULL)
1824 readylist = fdlist[inner].next = &fdlist[inner];
1825 else
1826 {
1827 fdlist[inner].next = readylist->next;
1828 readylist = readylist->next = &fdlist[inner];
1829 }
1830
1831 bool do_signal = true;
1832 if (__glibc_unlikely (nready == 0))
1833 {
1834 ++client_queued;
1835 do_signal = false;
1836
1837 /* Try to start another thread to help out. */
1838 pthread_t th;
1839 if (nthreads < max_nthreads
1840 && pthread_create (&th, &attr, nscd_run_worker,
1841 (void *) (long int) nthreads) == 0)
1842 {
1843 /* We got another thread. */
1844 ++nthreads;
1845 /* The new thread might need a kick. */
1846 do_signal = true;
1847 }
1848
1849 }
1850
1851 pthread_mutex_unlock (&readylist_lock);
1852
1853 /* Tell one of the worker threads there is work to do. */
1854 if (do_signal)
1855 pthread_cond_signal (&readylist_cond);
1856 }
1857
1858
1859 /* Check whether restarting should happen. */
1860 static bool
1861 restart_p (time_t now)
1862 {
1863 return (paranoia && readylist == NULL && nready == nthreads
1864 && now >= restart_time);
1865 }
1866
1867
1868 /* Array for times a connection was accepted. */
1869 static time_t *starttime;
1870
1871 #ifdef HAVE_INOTIFY
1872 /* Inotify event for changed file. */
1873 union __inev
1874 {
1875 struct inotify_event i;
1876 # ifndef PATH_MAX
1877 # define PATH_MAX 1024
1878 # endif
1879 char buf[sizeof (struct inotify_event) + PATH_MAX];
1880 };
1881
1882 /* Process the inotify event in INEV. If the event matches any of the files
1883 registered with a database then mark that database as requiring its cache
1884 to be cleared. We indicate the cache needs clearing by setting
1885 TO_CLEAR[DBCNT] to true for the matching database. */
1886 static inline void
1887 inotify_check_files (bool *to_clear, union __inev *inev)
1888 {
1889 /* Check which of the files changed. */
1890 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1891 {
1892 struct traced_file *finfo = dbs[dbcnt].traced_files;
1893
1894 while (finfo != NULL)
1895 {
1896 /* Inotify event watch descriptor matches. */
1897 if (finfo->inotify_descr == inev->i.wd)
1898 {
1899 /* Mark cache as needing to be cleared and reinitialize. */
1900 to_clear[dbcnt] = true;
1901 if (finfo->call_res_init)
1902 res_init ();
1903 return;
1904 }
1905
1906 finfo = finfo->next;
1907 }
1908 }
1909 }
1910
1911 /* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1912 for the associated database, otherwise do nothing. The TO_CLEAR array must
1913 have LASTDB entries. */
1914 static inline void
1915 clear_db_cache (bool *to_clear)
1916 {
1917 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1918 if (to_clear[dbcnt])
1919 {
1920 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1921 dbs[dbcnt].clear_cache = 1;
1922 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1923 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1924 }
1925 }
1926
1927 #endif
1928
1929 static void
1930 __attribute__ ((__noreturn__))
1931 main_loop_poll (void)
1932 {
1933 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
1934 * sizeof (conns[0]));
1935
1936 conns[0].fd = sock;
1937 conns[0].events = POLLRDNORM;
1938 size_t nused = 1;
1939 size_t firstfree = 1;
1940
1941 #ifdef HAVE_INOTIFY
1942 if (inotify_fd != -1)
1943 {
1944 conns[1].fd = inotify_fd;
1945 conns[1].events = POLLRDNORM;
1946 nused = 2;
1947 firstfree = 2;
1948 }
1949 #endif
1950
1951 #ifdef HAVE_NETLINK
1952 size_t idx_nl_status_fd = 0;
1953 if (nl_status_fd != -1)
1954 {
1955 idx_nl_status_fd = nused;
1956 conns[nused].fd = nl_status_fd;
1957 conns[nused].events = POLLRDNORM;
1958 ++nused;
1959 firstfree = nused;
1960 }
1961 #endif
1962
1963 while (1)
1964 {
1965 /* Wait for any event. We wait at most a couple of seconds so
1966 that we can check whether we should close any of the accepted
1967 connections since we have not received a request. */
1968 #define MAX_ACCEPT_TIMEOUT 30
1969 #define MIN_ACCEPT_TIMEOUT 5
1970 #define MAIN_THREAD_TIMEOUT \
1971 (MAX_ACCEPT_TIMEOUT * 1000 \
1972 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
1973
1974 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
1975
1976 time_t now = time (NULL);
1977
1978 /* If there is a descriptor ready for reading or there is a new
1979 connection, process this now. */
1980 if (n > 0)
1981 {
1982 if (conns[0].revents != 0)
1983 {
1984 /* We have a new incoming connection. Accept the connection. */
1985 int fd;
1986
1987 #ifndef __ASSUME_ACCEPT4
1988 fd = -1;
1989 if (have_accept4 >= 0)
1990 #endif
1991 {
1992 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
1993 SOCK_NONBLOCK));
1994 #ifndef __ASSUME_ACCEPT4
1995 if (have_accept4 == 0)
1996 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
1997 #endif
1998 }
1999 #ifndef __ASSUME_ACCEPT4
2000 if (have_accept4 < 0)
2001 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2002 #endif
2003
2004 /* Use the descriptor if we have not reached the limit. */
2005 if (fd >= 0)
2006 {
2007 if (firstfree < nconns)
2008 {
2009 conns[firstfree].fd = fd;
2010 conns[firstfree].events = POLLRDNORM;
2011 starttime[firstfree] = now;
2012 if (firstfree >= nused)
2013 nused = firstfree + 1;
2014
2015 do
2016 ++firstfree;
2017 while (firstfree < nused && conns[firstfree].fd != -1);
2018 }
2019 else
2020 /* We cannot use the connection so close it. */
2021 close (fd);
2022 }
2023
2024 --n;
2025 }
2026
2027 size_t first = 1;
2028 #ifdef HAVE_INOTIFY
2029 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2030 {
2031 if (conns[1].revents != 0)
2032 {
2033 bool to_clear[lastdb] = { false, };
2034 union __inev inev;
2035
2036 /* Read all inotify events for files registered via
2037 register_traced_file(). */
2038 while (1)
2039 {
2040 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2041 sizeof (inev)));
2042 if (nb < (ssize_t) sizeof (struct inotify_event))
2043 {
2044 if (__builtin_expect (nb == -1 && errno != EAGAIN,
2045 0))
2046 {
2047 /* Something went wrong when reading the inotify
2048 data. Better disable inotify. */
2049 dbg_log (_("\
2050 disabled inotify after read error %d"),
2051 errno);
2052 conns[1].fd = -1;
2053 firstfree = 1;
2054 if (nused == 2)
2055 nused = 1;
2056 close (inotify_fd);
2057 inotify_fd = -1;
2058 }
2059 break;
2060 }
2061
2062 /* Check which of the files changed. */
2063 inotify_check_files (to_clear, &inev);
2064 }
2065
2066 /* Actually perform the cache clearing. */
2067 clear_db_cache (to_clear);
2068
2069 --n;
2070 }
2071
2072 first = 2;
2073 }
2074 #endif
2075
2076 #ifdef HAVE_NETLINK
2077 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2078 {
2079 char buf[4096];
2080 /* Read all the data. We do not interpret it here. */
2081 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2082 sizeof (buf))) != -1)
2083 ;
2084
2085 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2086 = __bump_nl_timestamp ();
2087 }
2088 #endif
2089
2090 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2091 if (conns[cnt].revents != 0)
2092 {
2093 fd_ready (conns[cnt].fd);
2094
2095 /* Clean up the CONNS array. */
2096 conns[cnt].fd = -1;
2097 if (cnt < firstfree)
2098 firstfree = cnt;
2099 if (cnt == nused - 1)
2100 do
2101 --nused;
2102 while (conns[nused - 1].fd == -1);
2103
2104 --n;
2105 }
2106 }
2107
2108 /* Now find entries which have timed out. */
2109 assert (nused > 0);
2110
2111 /* We make the timeout length depend on the number of file
2112 descriptors currently used. */
2113 #define ACCEPT_TIMEOUT \
2114 (MAX_ACCEPT_TIMEOUT \
2115 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2116 time_t laststart = now - ACCEPT_TIMEOUT;
2117
2118 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2119 {
2120 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2121 {
2122 /* Remove the entry, it timed out. */
2123 (void) close (conns[cnt].fd);
2124 conns[cnt].fd = -1;
2125
2126 if (cnt < firstfree)
2127 firstfree = cnt;
2128 if (cnt == nused - 1)
2129 do
2130 --nused;
2131 while (conns[nused - 1].fd == -1);
2132 }
2133 }
2134
2135 if (restart_p (now))
2136 restart ();
2137 }
2138 }
2139
2140
2141 #ifdef HAVE_EPOLL
2142 static void
2143 main_loop_epoll (int efd)
2144 {
2145 struct epoll_event ev = { 0, };
2146 int nused = 1;
2147 size_t highest = 0;
2148
2149 /* Add the socket. */
2150 ev.events = EPOLLRDNORM;
2151 ev.data.fd = sock;
2152 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2153 /* We cannot use epoll. */
2154 return;
2155
2156 # ifdef HAVE_INOTIFY
2157 if (inotify_fd != -1)
2158 {
2159 ev.events = EPOLLRDNORM;
2160 ev.data.fd = inotify_fd;
2161 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2162 /* We cannot use epoll. */
2163 return;
2164 nused = 2;
2165 }
2166 # endif
2167
2168 # ifdef HAVE_NETLINK
2169 if (nl_status_fd != -1)
2170 {
2171 ev.events = EPOLLRDNORM;
2172 ev.data.fd = nl_status_fd;
2173 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2174 /* We cannot use epoll. */
2175 return;
2176 }
2177 # endif
2178
2179 while (1)
2180 {
2181 struct epoll_event revs[100];
2182 # define nrevs (sizeof (revs) / sizeof (revs[0]))
2183
2184 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2185
2186 time_t now = time (NULL);
2187
2188 for (int cnt = 0; cnt < n; ++cnt)
2189 if (revs[cnt].data.fd == sock)
2190 {
2191 /* A new connection. */
2192 int fd;
2193
2194 # ifndef __ASSUME_ACCEPT4
2195 fd = -1;
2196 if (have_accept4 >= 0)
2197 # endif
2198 {
2199 fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2200 SOCK_NONBLOCK));
2201 # ifndef __ASSUME_ACCEPT4
2202 if (have_accept4 == 0)
2203 have_accept4 = fd != -1 || errno != ENOSYS ? 1 : -1;
2204 # endif
2205 }
2206 # ifndef __ASSUME_ACCEPT4
2207 if (have_accept4 < 0)
2208 fd = TEMP_FAILURE_RETRY (accept (sock, NULL, NULL));
2209 # endif
2210
2211 /* Use the descriptor if we have not reached the limit. */
2212 if (fd >= 0)
2213 {
2214 /* Try to add the new descriptor. */
2215 ev.data.fd = fd;
2216 if (fd >= nconns
2217 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2218 /* The descriptor is too large or something went
2219 wrong. Close the descriptor. */
2220 close (fd);
2221 else
2222 {
2223 /* Remember when we accepted the connection. */
2224 starttime[fd] = now;
2225
2226 if (fd > highest)
2227 highest = fd;
2228
2229 ++nused;
2230 }
2231 }
2232 }
2233 # ifdef HAVE_INOTIFY
2234 else if (revs[cnt].data.fd == inotify_fd)
2235 {
2236 bool to_clear[lastdb] = { false, };
2237 union __inev inev;
2238
2239 /* Read all inotify events for files registered via
2240 register_traced_file(). */
2241 while (1)
2242 {
2243 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd, &inev,
2244 sizeof (inev)));
2245 if (nb < (ssize_t) sizeof (struct inotify_event))
2246 {
2247 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2248 {
2249 /* Something went wrong when reading the inotify
2250 data. Better disable inotify. */
2251 dbg_log (_("disabled inotify after read error %d"),
2252 errno);
2253 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd,
2254 NULL);
2255 close (inotify_fd);
2256 inotify_fd = -1;
2257 }
2258 break;
2259 }
2260
2261 /* Check which of the files changed. */
2262 inotify_check_files(to_clear, &inev);
2263 }
2264
2265 /* Actually perform the cache clearing. */
2266 clear_db_cache (to_clear);
2267 }
2268 # endif
2269 # ifdef HAVE_NETLINK
2270 else if (revs[cnt].data.fd == nl_status_fd)
2271 {
2272 char buf[4096];
2273 /* Read all the data. We do not interpret it here. */
2274 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2275 sizeof (buf))) != -1)
2276 ;
2277
2278 __bump_nl_timestamp ();
2279 }
2280 # endif
2281 else
2282 {
2283 /* Remove the descriptor from the epoll descriptor. */
2284 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
2285
2286 /* Get a worker to handle the request. */
2287 fd_ready (revs[cnt].data.fd);
2288
2289 /* Reset the time. */
2290 starttime[revs[cnt].data.fd] = 0;
2291 if (revs[cnt].data.fd == highest)
2292 do
2293 --highest;
2294 while (highest > 0 && starttime[highest] == 0);
2295
2296 --nused;
2297 }
2298
2299 /* Now look for descriptors for accepted connections which have
2300 no reply in too long of a time. */
2301 time_t laststart = now - ACCEPT_TIMEOUT;
2302 assert (starttime[sock] == 0);
2303 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2304 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2305 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2306 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2307 {
2308 /* We are waiting for this one for too long. Close it. */
2309 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
2310
2311 (void) close (cnt);
2312
2313 starttime[cnt] = 0;
2314 if (cnt == highest)
2315 --highest;
2316 }
2317 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2318 --highest;
2319
2320 if (restart_p (now))
2321 restart ();
2322 }
2323 }
2324 #endif
2325
2326
2327 /* Start all the threads we want. The initial process is thread no. 1. */
2328 void
2329 start_threads (void)
2330 {
2331 /* Initialize the conditional variable we will use. The only
2332 non-standard attribute we might use is the clock selection. */
2333 pthread_condattr_t condattr;
2334 pthread_condattr_init (&condattr);
2335
2336 #if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2337 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2338 /* Determine whether the monotonous clock is available. */
2339 struct timespec dummy;
2340 # if _POSIX_MONOTONIC_CLOCK == 0
2341 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2342 # endif
2343 # if _POSIX_CLOCK_SELECTION == 0
2344 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2345 # endif
2346 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2347 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2348 timeout_clock = CLOCK_MONOTONIC;
2349 #endif
2350
2351 /* Create the attribute for the threads. They are all created
2352 detached. */
2353 pthread_attr_init (&attr);
2354 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
2355 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2356 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
2357
2358 /* We allow less than LASTDB threads only for debugging. */
2359 if (debug_level == 0)
2360 nthreads = MAX (nthreads, lastdb);
2361
2362 /* Create the threads which prune the databases. */
2363 // XXX Ideally this work would be done by some of the worker threads.
2364 // XXX But this is problematic since we would need to be able to wake
2365 // XXX them up explicitly as well as part of the group handling the
2366 // XXX ready-list. This requires an operation where we can wait on
2367 // XXX two conditional variables at the same time. This operation
2368 // XXX does not exist (yet).
2369 for (long int i = 0; i < lastdb; ++i)
2370 {
2371 /* Initialize the conditional variable. */
2372 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2373 {
2374 dbg_log (_("could not initialize conditional variable"));
2375 do_exit (1, 0, NULL);
2376 }
2377
2378 pthread_t th;
2379 if (dbs[i].enabled
2380 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2381 {
2382 dbg_log (_("could not start clean-up thread; terminating"));
2383 do_exit (1, 0, NULL);
2384 }
2385 }
2386
2387 pthread_condattr_destroy (&condattr);
2388
2389 for (long int i = 0; i < nthreads; ++i)
2390 {
2391 pthread_t th;
2392 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2393 {
2394 if (i == 0)
2395 {
2396 dbg_log (_("could not start any worker thread; terminating"));
2397 do_exit (1, 0, NULL);
2398 }
2399
2400 break;
2401 }
2402 }
2403
2404 /* Now it is safe to let the parent know that we're doing fine and it can
2405 exit. */
2406 notify_parent (0);
2407
2408 /* Determine how much room for descriptors we should initially
2409 allocate. This might need to change later if we cap the number
2410 with MAXCONN. */
2411 const long int nfds = sysconf (_SC_OPEN_MAX);
2412 #define MINCONN 32
2413 #define MAXCONN 16384
2414 if (nfds == -1 || nfds > MAXCONN)
2415 nconns = MAXCONN;
2416 else if (nfds < MINCONN)
2417 nconns = MINCONN;
2418 else
2419 nconns = nfds;
2420
2421 /* We need memory to pass descriptors on to the worker threads. */
2422 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2423 /* Array to keep track when connection was accepted. */
2424 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2425
2426 /* In the main thread we execute the loop which handles incoming
2427 connections. */
2428 #ifdef HAVE_EPOLL
2429 int efd = epoll_create (100);
2430 if (efd != -1)
2431 {
2432 main_loop_epoll (efd);
2433 close (efd);
2434 }
2435 #endif
2436
2437 main_loop_poll ();
2438 }
2439
2440
2441 /* Look up the uid, gid, and supplementary groups to run nscd as. When
2442 this function is called, we are not listening on the nscd socket yet so
2443 we can just use the ordinary lookup functions without causing a lockup */
2444 static void
2445 begin_drop_privileges (void)
2446 {
2447 struct passwd *pwd = getpwnam (server_user);
2448
2449 if (pwd == NULL)
2450 {
2451 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2452 do_exit (EXIT_FAILURE, 0,
2453 _("Failed to run nscd as user '%s'"), server_user);
2454 }
2455
2456 server_uid = pwd->pw_uid;
2457 server_gid = pwd->pw_gid;
2458
2459 /* Save the old UID/GID if we have to change back. */
2460 if (paranoia)
2461 {
2462 old_uid = getuid ();
2463 old_gid = getgid ();
2464 }
2465
2466 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2467 {
2468 /* This really must never happen. */
2469 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2470 do_exit (EXIT_FAILURE, errno,
2471 _("initial getgrouplist failed"));
2472 }
2473
2474 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
2475
2476 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2477 == -1)
2478 {
2479 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2480 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2481 }
2482 }
2483
2484
2485 /* Call setgroups(), setgid(), and setuid() to drop root privileges and
2486 run nscd as the user specified in the configuration file. */
2487 static void
2488 finish_drop_privileges (void)
2489 {
2490 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2491 /* We need to preserve the capabilities to connect to the audit daemon. */
2492 cap_t new_caps = preserve_capabilities ();
2493 #endif
2494
2495 if (setgroups (server_ngroups, server_groups) == -1)
2496 {
2497 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2498 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2499 }
2500
2501 int res;
2502 if (paranoia)
2503 res = setresgid (server_gid, server_gid, old_gid);
2504 else
2505 res = setgid (server_gid);
2506 if (res == -1)
2507 {
2508 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2509 do_exit (4, errno, "setgid");
2510 }
2511
2512 if (paranoia)
2513 res = setresuid (server_uid, server_uid, old_uid);
2514 else
2515 res = setuid (server_uid);
2516 if (res == -1)
2517 {
2518 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2519 do_exit (4, errno, "setuid");
2520 }
2521
2522 #if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2523 /* Remove the temporary capabilities. */
2524 install_real_capabilities (new_caps);
2525 #endif
2526 }