]> git.ipfire.org Git - thirdparty/glibc.git/blame - nscd/connections.c
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / nscd / connections.c
CommitLineData
67479a70 1/* Inner loops of cache daemon.
688903eb 2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
d67281a7 3 This file is part of the GNU C Library.
67479a70 4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
d67281a7 5
43bc8ac6 6 This program is free software; you can redistribute it and/or modify
2e2efe65
RM
7 it under the terms of the GNU General Public License as published
8 by the Free Software Foundation; version 2 of the License, or
9 (at your option) any later version.
d67281a7 10
43bc8ac6 11 This program is distributed in the hope that it will be useful,
d67281a7 12 but WITHOUT ANY WARRANTY; without even the implied warranty of
43bc8ac6
UD
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
d67281a7 15
43bc8ac6 16 You should have received a copy of the GNU General Public License
59ba27a6 17 along with this program; if not, see <http://www.gnu.org/licenses/>. */
d67281a7 18
4401d759 19#include <alloca.h>
67479a70 20#include <assert.h>
0fdb4f42 21#include <atomic.h>
d67281a7 22#include <error.h>
67479a70 23#include <errno.h>
d6db0975 24#include <fcntl.h>
057685e4 25#include <grp.h>
3a2c0242 26#include <ifaddrs.h>
a95a08b4 27#include <libintl.h>
d67281a7 28#include <pthread.h>
057685e4 29#include <pwd.h>
482bbeb9 30#include <resolv.h>
057685e4 31#include <stdio.h>
d67281a7
UD
32#include <stdlib.h>
33#include <unistd.h>
e054f494 34#include <stdint.h>
8d8c6efa 35#include <arpa/inet.h>
3a2c0242 36#ifdef HAVE_NETLINK
432d41ce
UD
37# include <linux/netlink.h>
38# include <linux/rtnetlink.h>
3a2c0242 39#endif
fc03df7a
UD
40#ifdef HAVE_EPOLL
41# include <sys/epoll.h>
42#endif
5228ba2f
UD
43#ifdef HAVE_INOTIFY
44# include <sys/inotify.h>
45#endif
a95a08b4 46#include <sys/mman.h>
67479a70 47#include <sys/param.h>
a53bad16 48#include <sys/poll.h>
eac10791
UD
49#ifdef HAVE_SENDFILE
50# include <sys/sendfile.h>
51#endif
d67281a7
UD
52#include <sys/socket.h>
53#include <sys/stat.h>
d67281a7
UD
54#include <sys/un.h>
55
56#include "nscd.h"
57#include "dbg_log.h"
74a30a58 58#include "selinux.h"
a0edbb48 59#include <resolv/resolv.h>
37233df9
TS
60
61#include <kernel-features.h>
9090848d 62#include <libc-diag.h>
a334319f
UD
63
64
057685e4
UD
65/* Support to run nscd as an unprivileged user */
66const char *server_user;
67static uid_t server_uid;
68static gid_t server_gid;
a12ce44f
UD
69const char *stat_user;
70uid_t stat_uid;
057685e4
UD
71static gid_t *server_groups;
72#ifndef NGROUPS
73# define NGROUPS 32
74#endif
a95a08b4 75static int server_ngroups;
057685e4 76
27e82856
UD
77static pthread_attr_t attr;
78
057685e4
UD
79static void begin_drop_privileges (void);
80static void finish_drop_privileges (void);
81
67479a70 82/* Map request type to a string. */
b21fa963 83const char *const serv2str[LASTREQ] =
d67281a7 84{
67479a70
UD
85 [GETPWBYNAME] = "GETPWBYNAME",
86 [GETPWBYUID] = "GETPWBYUID",
87 [GETGRBYNAME] = "GETGRBYNAME",
88 [GETGRBYGID] = "GETGRBYGID",
89 [GETHOSTBYNAME] = "GETHOSTBYNAME",
90 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
91 [GETHOSTBYADDR] = "GETHOSTBYADDR",
92 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
93 [SHUTDOWN] = "SHUTDOWN",
756409c4 94 [GETSTAT] = "GETSTAT",
c207f23b
UD
95 [INVALIDATE] = "INVALIDATE",
96 [GETFDPW] = "GETFDPW",
97 [GETFDGR] = "GETFDGR",
d19687d6 98 [GETFDHST] = "GETFDHST",
f7e7a396 99 [GETAI] = "GETAI",
b21fa963
UD
100 [INITGROUPS] = "INITGROUPS",
101 [GETSERVBYNAME] = "GETSERVBYNAME",
102 [GETSERVBYPORT] = "GETSERVBYPORT",
684ae515
UD
103 [GETFDSERV] = "GETFDSERV",
104 [GETNETGRENT] = "GETNETGRENT",
105 [INNETGR] = "INNETGR",
106 [GETFDNETGR] = "GETFDNETGR"
67479a70
UD
107};
108
109/* The control data structures for the services. */
a95a08b4 110struct database_dyn dbs[lastdb] =
67479a70
UD
111{
112 [pwddb] = {
c2e13112 113 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
ffb1b882 114 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
cd72adeb 115 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
c2e13112
RM
116 .enabled = 0,
117 .check_file = 1,
a95a08b4 118 .persistent = 0,
797ed6f7 119 .propagate = 1,
c207f23b 120 .shared = 0,
2c210d1e 121 .max_db_size = DEFAULT_MAX_DB_SIZE,
27c377dd 122 .suggested_module = DEFAULT_SUGGESTED_MODULE,
a95a08b4 123 .db_filename = _PATH_NSCD_PASSWD_DB,
c2e13112
RM
124 .disabled_iov = &pwd_iov_disabled,
125 .postimeout = 3600,
a95a08b4
UD
126 .negtimeout = 20,
127 .wr_fd = -1,
128 .ro_fd = -1,
129 .mmap_used = false
67479a70
UD
130 },
131 [grpdb] = {
c2e13112 132 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
ffb1b882 133 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
cd72adeb 134 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
c2e13112
RM
135 .enabled = 0,
136 .check_file = 1,
a95a08b4 137 .persistent = 0,
797ed6f7 138 .propagate = 1,
c207f23b 139 .shared = 0,
2c210d1e 140 .max_db_size = DEFAULT_MAX_DB_SIZE,
27c377dd 141 .suggested_module = DEFAULT_SUGGESTED_MODULE,
a95a08b4 142 .db_filename = _PATH_NSCD_GROUP_DB,
c2e13112
RM
143 .disabled_iov = &grp_iov_disabled,
144 .postimeout = 3600,
a95a08b4
UD
145 .negtimeout = 60,
146 .wr_fd = -1,
147 .ro_fd = -1,
148 .mmap_used = false
67479a70
UD
149 },
150 [hstdb] = {
c2e13112 151 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
ffb1b882 152 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
cd72adeb 153 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
c2e13112
RM
154 .enabled = 0,
155 .check_file = 1,
a95a08b4 156 .persistent = 0,
797ed6f7 157 .propagate = 0, /* Not used. */
c207f23b 158 .shared = 0,
2c210d1e 159 .max_db_size = DEFAULT_MAX_DB_SIZE,
27c377dd 160 .suggested_module = DEFAULT_SUGGESTED_MODULE,
a95a08b4 161 .db_filename = _PATH_NSCD_HOSTS_DB,
c2e13112
RM
162 .disabled_iov = &hst_iov_disabled,
163 .postimeout = 3600,
a95a08b4
UD
164 .negtimeout = 20,
165 .wr_fd = -1,
166 .ro_fd = -1,
167 .mmap_used = false
b21fa963
UD
168 },
169 [servdb] = {
170 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
ffb1b882 171 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
cd72adeb 172 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
b21fa963
UD
173 .enabled = 0,
174 .check_file = 1,
175 .persistent = 0,
176 .propagate = 0, /* Not used. */
177 .shared = 0,
178 .max_db_size = DEFAULT_MAX_DB_SIZE,
27c377dd 179 .suggested_module = DEFAULT_SUGGESTED_MODULE,
b21fa963
UD
180 .db_filename = _PATH_NSCD_SERVICES_DB,
181 .disabled_iov = &serv_iov_disabled,
182 .postimeout = 28800,
183 .negtimeout = 20,
184 .wr_fd = -1,
185 .ro_fd = -1,
186 .mmap_used = false
684ae515
UD
187 },
188 [netgrdb] = {
189 .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP,
190 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
191 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
192 .enabled = 0,
193 .check_file = 1,
194 .persistent = 0,
195 .propagate = 0, /* Not used. */
196 .shared = 0,
197 .max_db_size = DEFAULT_MAX_DB_SIZE,
198 .suggested_module = DEFAULT_SUGGESTED_MODULE,
199 .db_filename = _PATH_NSCD_NETGROUP_DB,
200 .disabled_iov = &netgroup_iov_disabled,
201 .postimeout = 28800,
202 .negtimeout = 20,
203 .wr_fd = -1,
204 .ro_fd = -1,
205 .mmap_used = false
67479a70
UD
206 }
207};
d67281a7 208
a95a08b4
UD
209
210/* Mapping of request type to database. */
9691d83c 211static struct
a95a08b4 212{
9691d83c
UD
213 bool data_request;
214 struct database_dyn *db;
000b027e 215} const reqinfo[LASTREQ] =
9691d83c
UD
216{
217 [GETPWBYNAME] = { true, &dbs[pwddb] },
218 [GETPWBYUID] = { true, &dbs[pwddb] },
219 [GETGRBYNAME] = { true, &dbs[grpdb] },
220 [GETGRBYGID] = { true, &dbs[grpdb] },
221 [GETHOSTBYNAME] = { true, &dbs[hstdb] },
222 [GETHOSTBYNAMEv6] = { true, &dbs[hstdb] },
223 [GETHOSTBYADDR] = { true, &dbs[hstdb] },
224 [GETHOSTBYADDRv6] = { true, &dbs[hstdb] },
225 [SHUTDOWN] = { false, NULL },
226 [GETSTAT] = { false, NULL },
227 [SHUTDOWN] = { false, NULL },
228 [GETFDPW] = { false, &dbs[pwddb] },
229 [GETFDGR] = { false, &dbs[grpdb] },
230 [GETFDHST] = { false, &dbs[hstdb] },
231 [GETAI] = { true, &dbs[hstdb] },
232 [INITGROUPS] = { true, &dbs[grpdb] },
233 [GETSERVBYNAME] = { true, &dbs[servdb] },
234 [GETSERVBYPORT] = { true, &dbs[servdb] },
684ae515
UD
235 [GETFDSERV] = { false, &dbs[servdb] },
236 [GETNETGRENT] = { true, &dbs[netgrdb] },
237 [INNETGR] = { true, &dbs[netgrdb] },
238 [GETFDNETGR] = { false, &dbs[netgrdb] }
a95a08b4
UD
239};
240
241
27e82856 242/* Initial number of threads to use. */
67479a70 243int nthreads = -1;
27e82856
UD
244/* Maximum number of threads to use. */
245int max_nthreads = 32;
d67281a7 246
67479a70
UD
247/* Socket for incoming connections. */
248static int sock;
d67281a7 249
5228ba2f
UD
250#ifdef HAVE_INOTIFY
251/* Inotify descriptor. */
319b9ad4 252int inotify_fd = -1;
5228ba2f
UD
253#endif
254
3a2c0242
UD
255#ifdef HAVE_NETLINK
256/* Descriptor for netlink status updates. */
257static int nl_status_fd = -1;
258#endif
259
0fdb4f42
UD
260/* Number of times clients had to wait. */
261unsigned long int client_queued;
262
d67281a7 263
d2dc7d84
UD
264ssize_t
265writeall (int fd, const void *buf, size_t len)
266{
267 size_t n = len;
268 ssize_t ret;
269 do
270 {
2c210d1e 271 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
d2dc7d84
UD
272 if (ret <= 0)
273 break;
274 buf = (const char *) buf + ret;
275 n -= ret;
276 }
277 while (n > 0);
278 return ret < 0 ? ret : len - n;
279}
280
281
bd547139
UD
282#ifdef HAVE_SENDFILE
283ssize_t
284sendfileall (int tofd, int fromfd, off_t off, size_t len)
285{
286 ssize_t n = len;
287 ssize_t ret;
288
289 do
290 {
291 ret = TEMP_FAILURE_RETRY (sendfile (tofd, fromfd, &off, n));
292 if (ret <= 0)
293 break;
294 n -= ret;
295 }
296 while (n > 0);
297 return ret < 0 ? ret : len - n;
298}
299#endif
300
301
dc4bb1c2
UD
302enum usekey
303 {
304 use_not = 0,
305 /* The following three are not really used, they are symbolic constants. */
306 use_first = 16,
307 use_begin = 32,
308 use_end = 64,
309
310 use_he = 1,
311 use_he_begin = use_he | use_begin,
312 use_he_end = use_he | use_end,
dc4bb1c2
UD
313 use_data = 3,
314 use_data_begin = use_data | use_begin,
315 use_data_end = use_data | use_end,
316 use_data_first = use_data_begin | use_first
317 };
318
319
320static int
321check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
322 enum usekey use, ref_t start, size_t len)
323{
324 assert (len >= 2);
325
326 if (start > first_free || start + len > first_free
327 || (start & BLOCK_ALIGN_M1))
328 return 0;
329
330 if (usemap[start] == use_not)
331 {
332 /* Add the start marker. */
333 usemap[start] = use | use_begin;
334 use &= ~use_first;
335
336 while (--len > 0)
337 if (usemap[++start] != use_not)
338 return 0;
339 else
340 usemap[start] = use;
341
342 /* Add the end marker. */
343 usemap[start] = use | use_end;
344 }
345 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
346 {
347 /* Hash entries can't be shared. */
348 if (use == use_he)
349 return 0;
350
351 usemap[start] |= (use & use_first);
352 use &= ~use_first;
353
354 while (--len > 1)
355 if (usemap[++start] != use)
356 return 0;
357
358 if (usemap[++start] != (use | use_end))
359 return 0;
360 }
361 else
362 /* Points to a wrong object or somewhere in the middle. */
363 return 0;
364
365 return 1;
366}
367
368
369/* Verify data in persistent database. */
370static int
371verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
372{
684ae515
UD
373 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
374 || dbnr == netgrdb);
dc4bb1c2
UD
375
376 time_t now = time (NULL);
377
378 struct database_pers_head *head = mem;
379 struct database_pers_head head_copy = *head;
380
381 /* Check that the header that was read matches the head in the database. */
27c377dd 382 if (memcmp (head, readhead, sizeof (*head)) != 0)
dc4bb1c2
UD
383 return 0;
384
385 /* First some easy tests: make sure the database header is sane. */
386 if (head->version != DB_VERSION
387 || head->header_size != sizeof (*head)
388 /* We allow a timestamp to be one hour ahead of the current time.
389 This should cover daylight saving time changes. */
390 || head->timestamp > now + 60 * 60 + 60
391 || (head->gc_cycle & 1)
27c377dd 392 || head->module == 0
dc4bb1c2
UD
393 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
394 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
395 || head->first_free < 0
396 || head->first_free > head->data_size
397 || (head->first_free & BLOCK_ALIGN_M1) != 0
398 || head->maxnentries < 0
399 || head->maxnsearched < 0)
400 return 0;
401
402 uint8_t *usemap = calloc (head->first_free, 1);
403 if (usemap == NULL)
404 return 0;
405
406 const char *data = (char *) &head->array[roundup (head->module,
407 ALIGN / sizeof (ref_t))];
408
409 nscd_ssize_t he_cnt = 0;
410 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
411 {
a6fa5328
UD
412 ref_t trail = head->array[cnt];
413 ref_t work = trail;
414 int tick = 0;
dc4bb1c2
UD
415
416 while (work != ENDREF)
417 {
418 if (! check_use (data, head->first_free, usemap, use_he, work,
419 sizeof (struct hashentry)))
420 goto fail;
421
422 /* Now we know we can dereference the record. */
423 struct hashentry *here = (struct hashentry *) (data + work);
424
425 ++he_cnt;
426
427 /* Make sure the record is for this type of service. */
428 if (here->type >= LASTREQ
000b027e 429 || reqinfo[here->type].db != &dbs[dbnr])
dc4bb1c2
UD
430 goto fail;
431
432 /* Validate boolean field value. */
433 if (here->first != false && here->first != true)
434 goto fail;
435
436 if (here->len < 0)
437 goto fail;
438
439 /* Now the data. */
440 if (here->packet < 0
441 || here->packet > head->first_free
442 || here->packet + sizeof (struct datahead) > head->first_free)
443 goto fail;
444
445 struct datahead *dh = (struct datahead *) (data + here->packet);
446
447 if (! check_use (data, head->first_free, usemap,
448 use_data | (here->first ? use_first : 0),
449 here->packet, dh->allocsize))
450 goto fail;
451
452 if (dh->allocsize < sizeof (struct datahead)
453 || dh->recsize > dh->allocsize
454 || (dh->notfound != false && dh->notfound != true)
455 || (dh->usable != false && dh->usable != true))
456 goto fail;
457
458 if (here->key < here->packet + sizeof (struct datahead)
459 || here->key > here->packet + dh->allocsize
460 || here->key + here->len > here->packet + dh->allocsize)
2aac0a86 461 goto fail;
dc4bb1c2
UD
462
463 work = here->next;
50607309 464
a6fa5328 465 if (work == trail)
50607309
UD
466 /* A circular list, this must not happen. */
467 goto fail;
a6fa5328
UD
468 if (tick)
469 trail = ((struct hashentry *) (data + trail))->next;
470 tick = 1 - tick;
dc4bb1c2
UD
471 }
472 }
473
474 if (he_cnt != head->nentries)
475 goto fail;
476
477 /* See if all data and keys had at least one reference from
478 he->first == true hashentry. */
479 for (ref_t idx = 0; idx < head->first_free; ++idx)
480 {
dc4bb1c2
UD
481 if (usemap[idx] == use_data_begin)
482 goto fail;
483 }
484
485 /* Finally, make sure the database hasn't changed since the first test. */
486 if (memcmp (mem, &head_copy, sizeof (*head)) != 0)
487 goto fail;
488
489 free (usemap);
490 return 1;
491
492fail:
493 free (usemap);
494 return 0;
495}
496
497
a334319f
UD
498/* Initialize database information structures. */
499void
500nscd_init (void)
0ecb606c 501{
057685e4
UD
502 /* Look up unprivileged uid/gid/groups before we start listening on the
503 socket */
504 if (server_user != NULL)
505 begin_drop_privileges ();
506
67479a70
UD
507 if (nthreads == -1)
508 /* No configuration for this value, assume a default. */
ffb1b882 509 nthreads = 4;
d67281a7 510
d2dc7d84 511 for (size_t cnt = 0; cnt < lastdb; ++cnt)
67479a70 512 if (dbs[cnt].enabled)
9db29cde 513 {
67479a70 514 pthread_rwlock_init (&dbs[cnt].lock, NULL);
a95a08b4 515 pthread_mutex_init (&dbs[cnt].memlock, NULL);
264d5b94 516
a95a08b4 517 if (dbs[cnt].persistent)
e09edf23 518 {
a95a08b4 519 /* Try to open the appropriate file on disk. */
cef9b653 520 int fd = open (dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
a95a08b4
UD
521 if (fd != -1)
522 {
27c377dd 523 char *msg = NULL;
a95a08b4
UD
524 struct stat64 st;
525 void *mem;
526 size_t total;
527 struct database_pers_head head;
528 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
529 sizeof (head)));
530 if (n != sizeof (head) || fstat64 (fd, &st) != 0)
531 {
27c377dd
UD
532 fail_db_errno:
533 /* The code is single-threaded at this point so
534 using strerror is just fine. */
535 msg = strerror (errno);
a95a08b4
UD
536 fail_db:
537 dbg_log (_("invalid persistent database file \"%s\": %s"),
27c377dd 538 dbs[cnt].db_filename, msg);
dc4bb1c2 539 unlink (dbs[cnt].db_filename);
a95a08b4
UD
540 }
541 else if (head.module == 0 && head.data_size == 0)
542 {
27c377dd
UD
543 /* The file has been created, but the head has not
544 been initialized yet. */
545 msg = _("uninitialized header");
546 goto fail_db;
a95a08b4
UD
547 }
548 else if (head.header_size != (int) sizeof (head))
549 {
27c377dd
UD
550 msg = _("header size does not match");
551 goto fail_db;
a95a08b4
UD
552 }
553 else if ((total = (sizeof (head)
c207f23b 554 + roundup (head.module * sizeof (ref_t),
a95a08b4
UD
555 ALIGN)
556 + head.data_size))
dc4bb1c2
UD
557 > st.st_size
558 || total < sizeof (head))
a95a08b4 559 {
27c377dd
UD
560 msg = _("file size does not match");
561 goto fail_db;
a95a08b4 562 }
2c210d1e
UD
563 /* Note we map with the maximum size allowed for the
564 database. This is likely much larger than the
565 actual file size. This is OK on most OSes since
566 extensions of the underlying file will
567 automatically translate more pages available for
568 memory access. */
569 else if ((mem = mmap (NULL, dbs[cnt].max_db_size,
570 PROT_READ | PROT_WRITE,
571 MAP_SHARED, fd, 0))
572 == MAP_FAILED)
27c377dd 573 goto fail_db_errno;
dc4bb1c2
UD
574 else if (!verify_persistent_db (mem, &head, cnt))
575 {
576 munmap (mem, total);
27c377dd
UD
577 msg = _("verification failed");
578 goto fail_db;
dc4bb1c2 579 }
a95a08b4
UD
580 else
581 {
582 /* Success. We have the database. */
583 dbs[cnt].head = mem;
584 dbs[cnt].memsize = total;
585 dbs[cnt].data = (char *)
586 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
587 ALIGN / sizeof (ref_t))];
588 dbs[cnt].mmap_used = true;
589
590 if (dbs[cnt].suggested_module > head.module)
591 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
592 dbnames[cnt]);
593
594 dbs[cnt].wr_fd = fd;
595 fd = -1;
596 /* We also need a read-only descriptor. */
d13a3c57
UD
597 if (dbs[cnt].shared)
598 {
d7e23b02 599 dbs[cnt].ro_fd = open (dbs[cnt].db_filename,
cef9b653 600 O_RDONLY | O_CLOEXEC);
d13a3c57
UD
601 if (dbs[cnt].ro_fd == -1)
602 dbg_log (_("\
a95a08b4 603cannot create read-only descriptor for \"%s\"; no mmap"),
d13a3c57
UD
604 dbs[cnt].db_filename);
605 }
a95a08b4
UD
606
607 // XXX Shall we test whether the descriptors actually
608 // XXX point to the same file?
609 }
610
611 /* Close the file descriptors in case something went
612 wrong in which case the variable have not been
613 assigned -1. */
614 if (fd != -1)
615 close (fd);
616 }
31d322a2 617 else if (errno == EACCES)
532a6035
SP
618 do_exit (EXIT_FAILURE, 0, _("cannot access '%s'"),
619 dbs[cnt].db_filename);
a95a08b4
UD
620 }
621
622 if (dbs[cnt].head == NULL)
623 {
624 /* No database loaded. Allocate the data structure,
625 possibly on disk. */
626 struct database_pers_head head;
627 size_t total = (sizeof (head)
628 + roundup (dbs[cnt].suggested_module
629 * sizeof (ref_t), ALIGN)
630 + (dbs[cnt].suggested_module
631 * DEFAULT_DATASIZE_PER_BUCKET));
632
633 /* Try to create the database. If we do not need a
634 persistent database create a temporary file. */
635 int fd;
636 int ro_fd = -1;
637 if (dbs[cnt].persistent)
638 {
639 fd = open (dbs[cnt].db_filename,
cef9b653 640 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
a95a08b4 641 S_IRUSR | S_IWUSR);
d13a3c57 642 if (fd != -1 && dbs[cnt].shared)
d7e23b02 643 ro_fd = open (dbs[cnt].db_filename,
cef9b653 644 O_RDONLY | O_CLOEXEC);
a95a08b4
UD
645 }
646 else
647 {
a8a58967 648 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
cef9b653 649 fd = mkostemp (fname, O_CLOEXEC);
a95a08b4
UD
650
651 /* We do not need the file name anymore after we
652 opened another file descriptor in read-only mode. */
5ca3d19c 653 if (fd != -1)
a95a08b4 654 {
5ca3d19c 655 if (dbs[cnt].shared)
cef9b653 656 ro_fd = open (fname, O_RDONLY | O_CLOEXEC);
a95a08b4
UD
657
658 unlink (fname);
659 }
660 }
661
662 if (fd == -1)
663 {
664 if (errno == EEXIST)
665 {
666 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
667 dbnames[cnt], dbs[cnt].db_filename);
532a6035 668 do_exit (1, 0, NULL);
a95a08b4
UD
669 }
670
671 if (dbs[cnt].persistent)
672 dbg_log (_("cannot create %s; no persistent database used"),
673 dbs[cnt].db_filename);
674 else
675 dbg_log (_("cannot create %s; no sharing possible"),
676 dbs[cnt].db_filename);
677
678 dbs[cnt].persistent = 0;
679 // XXX remember: no mmap
680 }
681 else
682 {
683 /* Tell the user if we could not create the read-only
684 descriptor. */
d13a3c57 685 if (ro_fd == -1 && dbs[cnt].shared)
a95a08b4
UD
686 dbg_log (_("\
687cannot create read-only descriptor for \"%s\"; no mmap"),
688 dbs[cnt].db_filename);
689
a31ee4b3
SP
690 /* Before we create the header, initialize the hash
691 table. That way if we get interrupted while writing
a95a08b4
UD
692 the header we can recognize a partially initialized
693 database. */
694 size_t ps = sysconf (_SC_PAGESIZE);
695 char tmpbuf[ps];
696 assert (~ENDREF == 0);
697 memset (tmpbuf, '\xff', ps);
698
699 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
700 off_t offset = sizeof (head);
701
702 size_t towrite;
703 if (offset % ps != 0)
704 {
705 towrite = MIN (remaining, ps - (offset % ps));
233399bc
UD
706 if (pwrite (fd, tmpbuf, towrite, offset) != towrite)
707 goto write_fail;
a95a08b4
UD
708 offset += towrite;
709 remaining -= towrite;
710 }
711
712 while (remaining > ps)
713 {
233399bc
UD
714 if (pwrite (fd, tmpbuf, ps, offset) == -1)
715 goto write_fail;
a95a08b4
UD
716 offset += ps;
717 remaining -= ps;
718 }
719
233399bc
UD
720 if (remaining > 0
721 && pwrite (fd, tmpbuf, remaining, offset) != remaining)
722 goto write_fail;
a95a08b4
UD
723
724 /* Create the header of the file. */
725 struct database_pers_head head =
726 {
727 .version = DB_VERSION,
728 .header_size = sizeof (head),
729 .module = dbs[cnt].suggested_module,
730 .data_size = (dbs[cnt].suggested_module
731 * DEFAULT_DATASIZE_PER_BUCKET),
732 .first_free = 0
733 };
734 void *mem;
735
736 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
737 != sizeof (head))
2c210d1e
UD
738 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
739 != 0)
740 || (mem = mmap (NULL, dbs[cnt].max_db_size,
741 PROT_READ | PROT_WRITE,
a95a08b4
UD
742 MAP_SHARED, fd, 0)) == MAP_FAILED)
743 {
233399bc 744 write_fail:
a95a08b4
UD
745 unlink (dbs[cnt].db_filename);
746 dbg_log (_("cannot write to database file %s: %s"),
747 dbs[cnt].db_filename, strerror (errno));
748 dbs[cnt].persistent = 0;
749 }
750 else
751 {
752 /* Success. */
753 dbs[cnt].head = mem;
754 dbs[cnt].data = (char *)
755 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
756 ALIGN / sizeof (ref_t))];
757 dbs[cnt].memsize = total;
758 dbs[cnt].mmap_used = true;
759
760 /* Remember the descriptors. */
761 dbs[cnt].wr_fd = fd;
762 dbs[cnt].ro_fd = ro_fd;
763 fd = -1;
764 ro_fd = -1;
765 }
766
767 if (fd != -1)
768 close (fd);
769 if (ro_fd != -1)
770 close (ro_fd);
771 }
772 }
773
774 if (dbs[cnt].head == NULL)
775 {
776 /* We do not use the persistent database. Just
777 create an in-memory data structure. */
778 assert (! dbs[cnt].persistent);
779
780 dbs[cnt].head = xmalloc (sizeof (struct database_pers_head)
781 + (dbs[cnt].suggested_module
782 * sizeof (ref_t)));
25059769 783 memset (dbs[cnt].head, '\0', sizeof (struct database_pers_head));
a95a08b4
UD
784 assert (~ENDREF == 0);
785 memset (dbs[cnt].head->array, '\xff',
786 dbs[cnt].suggested_module * sizeof (ref_t));
787 dbs[cnt].head->module = dbs[cnt].suggested_module;
788 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
789 * dbs[cnt].head->module);
790 dbs[cnt].data = xmalloc (dbs[cnt].head->data_size);
791 dbs[cnt].head->first_free = 0;
c207f23b
UD
792
793 dbs[cnt].shared = 0;
794 assert (dbs[cnt].ro_fd == -1);
e09edf23 795 }
67479a70 796 }
d67281a7
UD
797
798 /* Create the socket. */
52fb79d6 799 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
67479a70 800 if (sock < 0)
d67281a7 801 {
67479a70 802 dbg_log (_("cannot open socket: %s"), strerror (errno));
532a6035 803 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
d67281a7
UD
804 }
805 /* Bind a name to the socket. */
d2dc7d84 806 struct sockaddr_un sock_addr;
d67281a7
UD
807 sock_addr.sun_family = AF_UNIX;
808 strcpy (sock_addr.sun_path, _PATH_NSCDSOCKET);
67479a70 809 if (bind (sock, (struct sockaddr *) &sock_addr, sizeof (sock_addr)) < 0)
d67281a7
UD
810 {
811 dbg_log ("%s: %s", _PATH_NSCDSOCKET, strerror (errno));
532a6035 812 do_exit (errno == EACCES ? 4 : 1, 0, NULL);
d67281a7 813 }
67479a70 814
d67281a7 815 /* Set permissions for the socket. */
a95a08b4 816 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
d67281a7
UD
817
818 /* Set the socket up to accept connections. */
67479a70 819 if (listen (sock, SOMAXCONN) < 0)
d67281a7 820 {
67479a70
UD
821 dbg_log (_("cannot enable socket to accept connections: %s"),
822 strerror (errno));
532a6035 823 do_exit (1, 0, NULL);
d67281a7 824 }
057685e4 825
3a2c0242
UD
826#ifdef HAVE_NETLINK
827 if (dbs[hstdb].enabled)
828 {
829 /* Try to open netlink socket to monitor network setting changes. */
830 nl_status_fd = socket (AF_NETLINK,
831 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
832 NETLINK_ROUTE);
833 if (nl_status_fd != -1)
834 {
835 struct sockaddr_nl snl;
836 memset (&snl, '\0', sizeof (snl));
837 snl.nl_family = AF_NETLINK;
838 /* XXX Is this the best set to use? */
839 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
840 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
841 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
842 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
843 | RTMGRP_IPV6_PREFIX);
844
845 if (bind (nl_status_fd, (struct sockaddr *) &snl, sizeof (snl)) != 0)
846 {
847 close (nl_status_fd);
848 nl_status_fd = -1;
849 }
850 else
851 {
852 /* Start the timestamp process. */
853 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
854 = __bump_nl_timestamp ();
3a2c0242
UD
855 }
856 }
857 }
858#endif
859
319b9ad4 860 /* Change to unprivileged uid/gid/groups if specified in config file */
057685e4
UD
861 if (server_user != NULL)
862 finish_drop_privileges ();
d67281a7
UD
863}
864
cf9313e7
CD
865#ifdef HAVE_INOTIFY
866#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
867#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
868void
869install_watches (struct traced_file *finfo)
870{
871 /* Use inotify support if we have it. */
872 if (finfo->inotify_descr[TRACED_FILE] < 0)
873 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (inotify_fd,
874 finfo->fname,
875 TRACED_FILE_MASK);
876 if (finfo->inotify_descr[TRACED_FILE] < 0)
877 {
878 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
879 finfo->fname, strerror (errno));
880 return;
881 }
882 dbg_log (_("monitoring file `%s` (%d)"),
883 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
884 /* Additionally listen for events in the file's parent directory.
885 We do this because the file to be watched might be
886 deleted and then added back again. When it is added back again
887 we must re-add the watch. We must also cover IN_MOVED_TO to
888 detect a file being moved into the directory. */
889 if (finfo->inotify_descr[TRACED_DIR] < 0)
890 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (inotify_fd,
891 finfo->dname,
892 TRACED_DIR_MASK);
893 if (finfo->inotify_descr[TRACED_DIR] < 0)
894 {
895 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
896 finfo->fname, strerror (errno));
897 return;
898 }
899 dbg_log (_("monitoring directory `%s` (%d)"),
900 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
901}
902#endif
67479a70 903
471514d3
CD
904/* Register the file in FINFO as a traced file for the database DBS[DBIX].
905
906 We support registering multiple files per database. Each call to
907 register_traced_file adds to the list of registered files.
908
909 When we prune the database, either through timeout or a request to
910 invalidate, we will check to see if any of the registered files has changed.
911 When we accept new connections to handle a cache request we will also
912 check to see if any of the registered files has changed.
913
914 If we have inotify support then we install an inotify fd to notify us of
915 file deletion or modification, both of which will require we invalidate
916 the cache for the database. Without inotify support we stat the file and
917 store st_mtime to determine if the file has been modified. */
319b9ad4
UD
918void
919register_traced_file (size_t dbidx, struct traced_file *finfo)
920{
471514d3
CD
921 /* If the database is disabled or file checking is disabled
922 then ignore the registration. */
21fd49a9 923 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
319b9ad4
UD
924 return;
925
a1ffb40e 926 if (__glibc_unlikely (debug_level > 0))
cf9313e7 927 dbg_log (_("monitoring file %s for database %s"),
319b9ad4
UD
928 finfo->fname, dbnames[dbidx]);
929
930#ifdef HAVE_INOTIFY
cf9313e7 931 install_watches (finfo);
319b9ad4 932#endif
cf9313e7
CD
933 struct stat64 st;
934 if (stat64 (finfo->fname, &st) < 0)
319b9ad4 935 {
cf9313e7
CD
936 /* We cannot stat() the file. Set mtime to zero and try again later. */
937 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
938 finfo->fname, strerror (errno));
939 finfo->mtime = 0;
319b9ad4 940 }
cf9313e7
CD
941 else
942 finfo->mtime = st.st_mtime;
319b9ad4
UD
943
944 /* Queue up the file name. */
945 finfo->next = dbs[dbidx].traced_files;
946 dbs[dbidx].traced_files = finfo;
947}
948
949
67479a70 950/* Close the connections. */
d67281a7 951void
67479a70 952close_sockets (void)
d67281a7 953{
67479a70
UD
954 close (sock);
955}
d67281a7 956
a12ce44f 957
756409c4 958static void
902c4291 959invalidate_cache (char *key, int fd)
756409c4
UD
960{
961 dbtype number;
902c4291 962 int32_t resp;
756409c4 963
b21fa963
UD
964 for (number = pwddb; number < lastdb; ++number)
965 if (strcmp (key, dbnames[number]) == 0)
966 {
cf9313e7
CD
967 struct traced_file *runp = dbs[number].traced_files;
968 while (runp != NULL)
319b9ad4 969 {
cf9313e7
CD
970 /* Make sure we reload from file when checking mtime. */
971 runp->mtime = 0;
972#ifdef HAVE_INOTIFY
973 /* During an invalidation we try to reload the traced
974 file watches. This allows the user to re-sync if
975 inotify events were lost. Similar to what we do during
976 pruning. */
977 install_watches (runp);
978#endif
979 if (runp->call_res_init)
980 {
981 res_init ();
982 break;
983 }
984 runp = runp->next;
319b9ad4 985 }
b21fa963 986 break;
cf9313e7 987 }
b21fa963
UD
988
989 if (number == lastdb)
902c4291
UD
990 {
991 resp = EINVAL;
992 writeall (fd, &resp, sizeof (resp));
993 return;
994 }
756409c4 995
fd665070 996 if (dbs[number].enabled)
ffb1b882 997 {
cd72adeb 998 pthread_mutex_lock (&dbs[number].prune_run_lock);
ffb1b882 999 prune_cache (&dbs[number], LONG_MAX, fd);
cd72adeb 1000 pthread_mutex_unlock (&dbs[number].prune_run_lock);
ffb1b882 1001 }
902c4291
UD
1002 else
1003 {
1004 resp = 0;
1005 writeall (fd, &resp, sizeof (resp));
1006 }
756409c4
UD
1007}
1008
67479a70 1009
c207f23b
UD
1010#ifdef SCM_RIGHTS
1011static void
1012send_ro_fd (struct database_dyn *db, char *key, int fd)
1013{
1014 /* If we do not have an read-only file descriptor do nothing. */
1015 if (db->ro_fd == -1)
1016 return;
1017
1018 /* We need to send some data along with the descriptor. */
f3c54060
UD
1019 uint64_t mapsize = (db->head->data_size
1020 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1021 + sizeof (struct database_pers_head));
1022 struct iovec iov[2];
c207f23b
UD
1023 iov[0].iov_base = key;
1024 iov[0].iov_len = strlen (key) + 1;
f3c54060
UD
1025 iov[1].iov_base = &mapsize;
1026 iov[1].iov_len = sizeof (mapsize);
c207f23b
UD
1027
1028 /* Prepare the control message to transfer the descriptor. */
a08ab897
UD
1029 union
1030 {
1031 struct cmsghdr hdr;
1032 char bytes[CMSG_SPACE (sizeof (int))];
1033 } buf;
f3c54060 1034 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
a08ab897
UD
1035 .msg_control = buf.bytes,
1036 .msg_controllen = sizeof (buf) };
c207f23b
UD
1037 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1038
1039 cmsg->cmsg_level = SOL_SOCKET;
1040 cmsg->cmsg_type = SCM_RIGHTS;
1041 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1042
1ac03a1e
UD
1043 int *ip = (int *) CMSG_DATA (cmsg);
1044 *ip = db->ro_fd;
c207f23b
UD
1045
1046 msg.msg_controllen = cmsg->cmsg_len;
1047
1048 /* Send the control message. We repeat when we are interrupted but
1049 everything else is ignored. */
6925ef9a
UD
1050#ifndef MSG_NOSIGNAL
1051# define MSG_NOSIGNAL 0
1052#endif
1053 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
c207f23b 1054
a1ffb40e 1055 if (__glibc_unlikely (debug_level > 0))
c207f23b
UD
1056 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1057}
1058#endif /* SCM_RIGHTS */
1059
1060
67479a70
UD
1061/* Handle new request. */
1062static void
c52137d3 1063handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
67479a70 1064{
23700036 1065 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
d67281a7 1066 {
98e75a1c
UD
1067 if (debug_level > 0)
1068 dbg_log (_("\
67479a70 1069cannot handle old request version %d; current version is %d"),
98e75a1c 1070 req->version, NSCD_VERSION);
d67281a7
UD
1071 return;
1072 }
1073
ffb1b882 1074 /* Perform the SELinux check before we go on to the standard checks. */
000b027e 1075 if (selinux_enabled && nscd_request_avc_has_perm (fd, req->type) != 0)
ffb1b882
UD
1076 {
1077 if (debug_level > 0)
c52137d3
UD
1078 {
1079#ifdef SO_PEERCRED
49b036bc 1080 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
c52137d3
UD
1081# ifdef PATH_MAX
1082 char buf[PATH_MAX];
1083# else
1084 char buf[4096];
1085# endif
1086
49b036bc
JM
1087 snprintf (pbuf, sizeof (pbuf), "/proc/%ld/exe", (long int) pid);
1088 ssize_t n = readlink (pbuf, buf, sizeof (buf) - 1);
c52137d3
UD
1089
1090 if (n <= 0)
1091 dbg_log (_("\
1092request from %ld not handled due to missing permission"), (long int) pid);
1093 else
1094 {
1095 buf[n] = '\0';
1096 dbg_log (_("\
1097request from '%s' [%ld] not handled due to missing permission"),
1098 buf, (long int) pid);
1099 }
1100#else
1101 dbg_log (_("request not handled due to missing permission"));
1102#endif
1103 }
ffb1b882
UD
1104 return;
1105 }
74a30a58 1106
000b027e 1107 struct database_dyn *db = reqinfo[req->type].db;
a95a08b4 1108
9691d83c 1109 /* See whether we can service the request from the cache. */
000b027e 1110 if (__builtin_expect (reqinfo[req->type].data_request, true))
d67281a7 1111 {
23700036 1112 if (__builtin_expect (debug_level, 0) > 0)
8d8c6efa
UD
1113 {
1114 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1115 {
1116 char buf[INET6_ADDRSTRLEN];
1117
1118 dbg_log ("\t%s (%s)", serv2str[req->type],
1119 inet_ntop (req->type == GETHOSTBYADDR
1120 ? AF_INET : AF_INET6,
1121 key, buf, sizeof (buf)));
1122 }
1123 else
a95a08b4 1124 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
8d8c6efa 1125 }
d67281a7 1126
67479a70 1127 /* Is this service enabled? */
a1ffb40e 1128 if (__glibc_unlikely (!db->enabled))
67479a70 1129 {
ce85d65b 1130 /* No, sent the prepared record. */
2c210d1e
UD
1131 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1132 db->disabled_iov->iov_len,
1133 MSG_NOSIGNAL))
4c5dd2a2 1134 != (ssize_t) db->disabled_iov->iov_len
23700036 1135 && __builtin_expect (debug_level, 0) > 0)
67479a70
UD
1136 {
1137 /* We have problems sending the result. */
1138 char buf[256];
1139 dbg_log (_("cannot write result: %s"),
1140 strerror_r (errno, buf, sizeof (buf)));
1141 }
d67281a7 1142
67479a70
UD
1143 return;
1144 }
d67281a7 1145
67479a70 1146 /* Be sure we can read the data. */
a1ffb40e 1147 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
c86e6aec 1148 {
a95a08b4 1149 ++db->head->rdlockdelayed;
c86e6aec
UD
1150 pthread_rwlock_rdlock (&db->lock);
1151 }
67479a70
UD
1152
1153 /* See whether we can handle it from the cache. */
a95a08b4
UD
1154 struct datahead *cached;
1155 cached = (struct datahead *) cache_search (req->type, key, req->key_len,
1156 db, uid);
67479a70
UD
1157 if (cached != NULL)
1158 {
1159 /* Hurray it's in the cache. */
eac10791
UD
1160 ssize_t nwritten;
1161
1162#ifdef HAVE_SENDFILE
a1ffb40e 1163 if (__glibc_likely (db->mmap_used))
eac10791
UD
1164 {
1165 assert (db->wr_fd != -1);
1166 assert ((char *) cached->data > (char *) db->data);
1167 assert ((char *) cached->data - (char *) db->head
1168 + cached->recsize
1169 <= (sizeof (struct database_pers_head)
1170 + db->head->module * sizeof (ref_t)
1171 + db->head->data_size));
bd547139
UD
1172 nwritten = sendfileall (fd, db->wr_fd,
1173 (char *) cached->data
1174 - (char *) db->head, cached->recsize);
eac10791
UD
1175# ifndef __ASSUME_SENDFILE
1176 if (nwritten == -1 && errno == ENOSYS)
1177 goto use_write;
1178# endif
1179 }
1180 else
1181# ifndef __ASSUME_SENDFILE
1182 use_write:
1183# endif
1184#endif
1185 nwritten = writeall (fd, cached->data, cached->recsize);
1186
1187 if (nwritten != cached->recsize
23700036 1188 && __builtin_expect (debug_level, 0) > 0)
67479a70
UD
1189 {
1190 /* We have problems sending the result. */
1191 char buf[256];
1192 dbg_log (_("cannot write result: %s"),
1193 strerror_r (errno, buf, sizeof (buf)));
1194 }
1195
1196 pthread_rwlock_unlock (&db->lock);
1197
1198 return;
1199 }
1200
1201 pthread_rwlock_unlock (&db->lock);
d67281a7 1202 }
23700036 1203 else if (__builtin_expect (debug_level, 0) > 0)
756409c4
UD
1204 {
1205 if (req->type == INVALIDATE)
c207f23b 1206 dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key);
0532e21b 1207 else
a95a08b4 1208 dbg_log ("\t%s", serv2str[req->type]);
756409c4 1209 }
67479a70
UD
1210
1211 /* Handle the request. */
1212 switch (req->type)
d67281a7 1213 {
67479a70 1214 case GETPWBYNAME:
a95a08b4 1215 addpwbyname (db, fd, req, key, uid);
67479a70
UD
1216 break;
1217
1218 case GETPWBYUID:
a95a08b4 1219 addpwbyuid (db, fd, req, key, uid);
67479a70
UD
1220 break;
1221
1222 case GETGRBYNAME:
a95a08b4 1223 addgrbyname (db, fd, req, key, uid);
67479a70
UD
1224 break;
1225
1226 case GETGRBYGID:
a95a08b4 1227 addgrbygid (db, fd, req, key, uid);
67479a70
UD
1228 break;
1229
1230 case GETHOSTBYNAME:
a95a08b4 1231 addhstbyname (db, fd, req, key, uid);
67479a70
UD
1232 break;
1233
1234 case GETHOSTBYNAMEv6:
a95a08b4 1235 addhstbynamev6 (db, fd, req, key, uid);
67479a70
UD
1236 break;
1237
1238 case GETHOSTBYADDR:
a95a08b4 1239 addhstbyaddr (db, fd, req, key, uid);
67479a70
UD
1240 break;
1241
1242 case GETHOSTBYADDRv6:
a95a08b4 1243 addhstbyaddrv6 (db, fd, req, key, uid);
67479a70
UD
1244 break;
1245
d19687d6
UD
1246 case GETAI:
1247 addhstai (db, fd, req, key, uid);
1248 break;
1249
f7e7a396
UD
1250 case INITGROUPS:
1251 addinitgroups (db, fd, req, key, uid);
1252 break;
1253
b21fa963
UD
1254 case GETSERVBYNAME:
1255 addservbyname (db, fd, req, key, uid);
1256 break;
1257
1258 case GETSERVBYPORT:
1259 addservbyport (db, fd, req, key, uid);
1260 break;
1261
684ae515
UD
1262 case GETNETGRENT:
1263 addgetnetgrent (db, fd, req, key, uid);
1264 break;
1265
1266 case INNETGR:
1267 addinnetgr (db, fd, req, key, uid);
1268 break;
1269
67479a70 1270 case GETSTAT:
67479a70 1271 case SHUTDOWN:
756409c4 1272 case INVALIDATE:
70e2ebba
UD
1273 {
1274 /* Get the callers credentials. */
cedc8559 1275#ifdef SO_PEERCRED
70e2ebba
UD
1276 struct ucred caller;
1277 socklen_t optlen = sizeof (caller);
be3c40b6 1278
70e2ebba
UD
1279 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) < 0)
1280 {
1281 char buf[256];
a1c542bf 1282
c69136ae 1283 dbg_log (_("error getting caller's id: %s"),
70e2ebba
UD
1284 strerror_r (errno, buf, sizeof (buf)));
1285 break;
1286 }
a12ce44f 1287
70e2ebba 1288 uid = caller.uid;
a12ce44f 1289#else
70e2ebba
UD
1290 /* Some systems have no SO_PEERCRED implementation. They don't
1291 care about security so we don't as well. */
1292 uid = 0;
cedc8559 1293#endif
70e2ebba 1294 }
a12ce44f
UD
1295
1296 /* Accept shutdown, getstat and invalidate only from root. For
1297 the stat call also allow the user specified in the config file. */
1298 if (req->type == GETSTAT)
1299 {
1300 if (uid == 0 || uid == stat_uid)
1301 send_stats (fd, dbs);
1302 }
1303 else if (uid == 0)
1304 {
1305 if (req->type == INVALIDATE)
902c4291 1306 invalidate_cache (key, fd);
a12ce44f
UD
1307 else
1308 termination_handler (0);
a1c542bf 1309 }
67479a70
UD
1310 break;
1311
c207f23b
UD
1312 case GETFDPW:
1313 case GETFDGR:
1314 case GETFDHST:
b21fa963 1315 case GETFDSERV:
684ae515 1316 case GETFDNETGR:
c207f23b 1317#ifdef SCM_RIGHTS
000b027e 1318 send_ro_fd (reqinfo[req->type].db, key, fd);
c207f23b
UD
1319#endif
1320 break;
1321
67479a70 1322 default:
64acf8ed
UD
1323 /* Ignore the command, it's nothing we know. */
1324 break;
d67281a7 1325 }
67479a70
UD
1326}
1327
d67281a7 1328
4401d759
UD
1329/* Restart the process. */
1330static void
1331restart (void)
1332{
1333 /* First determine the parameters. We do not use the parameters
1334 passed to main() since in case nscd is started by running the
1335 dynamic linker this will not work. Yes, this is not the usual
1336 case but nscd is part of glibc and we occasionally do this. */
1337 size_t buflen = 1024;
1338 char *buf = alloca (buflen);
1339 size_t readlen = 0;
1340 int fd = open ("/proc/self/cmdline", O_RDONLY);
1341 if (fd == -1)
1342 {
1343 dbg_log (_("\
1344cannot open /proc/self/cmdline: %s; disabling paranoia mode"),
1345 strerror (errno));
1346
1347 paranoia = 0;
1348 return;
1349 }
1350
1351 while (1)
1352 {
1353 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + readlen,
1354 buflen - readlen));
1355 if (n == -1)
1356 {
1357 dbg_log (_("\
b21fa963 1358cannot read /proc/self/cmdline: %s; disabling paranoia mode"),
4401d759
UD
1359 strerror (errno));
1360
1361 close (fd);
1362 paranoia = 0;
1363 return;
1364 }
1365
1366 readlen += n;
1367
1368 if (readlen < buflen)
1369 break;
1370
1371 /* We might have to extend the buffer. */
1372 size_t old_buflen = buflen;
1373 char *newp = extend_alloca (buf, buflen, 2 * buflen);
1374 buf = memmove (newp, buf, old_buflen);
1375 }
1376
1377 close (fd);
1378
1379 /* Parse the command line. Worst case scenario: every two
1380 characters form one parameter (one character plus NUL). */
1381 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1382 int argc = 0;
1383
1384 char *cp = buf;
1385 while (cp < buf + readlen)
1386 {
1387 argv[argc++] = cp;
1388 cp = (char *) rawmemchr (cp, '\0') + 1;
1389 }
1390 argv[argc] = NULL;
1391
1392 /* Second, change back to the old user if we changed it. */
1393 if (server_user != NULL)
1394 {
464c9fad 1395 if (setresuid (old_uid, old_uid, old_uid) != 0)
4401d759
UD
1396 {
1397 dbg_log (_("\
1398cannot change to old UID: %s; disabling paranoia mode"),
1399 strerror (errno));
1400
1401 paranoia = 0;
1402 return;
1403 }
1404
464c9fad 1405 if (setresgid (old_gid, old_gid, old_gid) != 0)
4401d759
UD
1406 {
1407 dbg_log (_("\
1408cannot change to old GID: %s; disabling paranoia mode"),
1409 strerror (errno));
1410
8915eace 1411 ignore_value (setuid (server_uid));
4401d759
UD
1412 paranoia = 0;
1413 return;
1414 }
1415 }
1416
1417 /* Next change back to the old working directory. */
1418 if (chdir (oldcwd) == -1)
1419 {
1420 dbg_log (_("\
1421cannot change to old working directory: %s; disabling paranoia mode"),
1422 strerror (errno));
1423
1424 if (server_user != NULL)
1425 {
8915eace
JM
1426 ignore_value (setuid (server_uid));
1427 ignore_value (setgid (server_gid));
4401d759
UD
1428 }
1429 paranoia = 0;
1430 return;
1431 }
1432
1433 /* Synchronize memory. */
528741cb 1434 int32_t certainly[lastdb];
4401d759 1435 for (int cnt = 0; cnt < lastdb; ++cnt)
0fbfe2f7 1436 if (dbs[cnt].enabled)
3e6ce4d7
UD
1437 {
1438 /* Make sure nobody keeps using the database. */
1439 dbs[cnt].head->timestamp = 0;
528741cb
UD
1440 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1441 dbs[cnt].head->nscd_certainly_running = 0;
4401d759 1442
3e6ce4d7
UD
1443 if (dbs[cnt].persistent)
1444 // XXX async OK?
1445 msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC);
1446 }
4401d759
UD
1447
1448 /* The preparations are done. */
d9822dbe
UD
1449#ifdef PATH_MAX
1450 char pathbuf[PATH_MAX];
1451#else
1452 char pathbuf[256];
1453#endif
1454 /* Try to exec the real nscd program so the process name (as reported
1455 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
351fe947 1456 if readlink or the exec with the result of the readlink call fails. */
d9822dbe 1457 ssize_t n = readlink ("/proc/self/exe", pathbuf, sizeof (pathbuf) - 1);
351fe947 1458 if (n != -1)
d9822dbe
UD
1459 {
1460 pathbuf[n] = '\0';
1461 execv (pathbuf, argv);
1462 }
351fe947 1463 execv ("/proc/self/exe", argv);
4401d759
UD
1464
1465 /* If we come here, we will never be able to re-exec. */
1466 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1467 strerror (errno));
1468
1469 if (server_user != NULL)
1470 {
8915eace
JM
1471 ignore_value (setuid (server_uid));
1472 ignore_value (setgid (server_gid));
4401d759 1473 }
233399bc
UD
1474 if (chdir ("/") != 0)
1475 dbg_log (_("cannot change current working directory to \"/\": %s"),
1476 strerror (errno));
4401d759 1477 paranoia = 0;
528741cb
UD
1478
1479 /* Reenable the databases. */
1480 time_t now = time (NULL);
1481 for (int cnt = 0; cnt < lastdb; ++cnt)
1482 if (dbs[cnt].enabled)
1483 {
1484 dbs[cnt].head->timestamp = now;
1485 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1486 }
4401d759
UD
1487}
1488
1489
1945c96f
UD
1490/* List of file descriptors. */
1491struct fdlist
1492{
1493 int fd;
1494 struct fdlist *next;
1495};
1496/* Memory allocated for the list. */
1497static struct fdlist *fdlist;
1498/* List of currently ready-to-read file descriptors. */
1499static struct fdlist *readylist;
1500
1501/* Conditional variable and mutex to signal availability of entries in
1502 READYLIST. The condvar is initialized dynamically since we might
1503 use a different clock depending on availability. */
ffb1b882 1504static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1945c96f
UD
1505static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1506
1507/* The clock to use with the condvar. */
1508static clockid_t timeout_clock = CLOCK_REALTIME;
1509
1510/* Number of threads ready to handle the READYLIST. */
1511static unsigned long int nready;
1512
1513
ffb1b882 1514/* Function for the clean-up threads. */
67479a70
UD
1515static void *
1516__attribute__ ((__noreturn__))
ffb1b882 1517nscd_run_prune (void *p)
67479a70 1518{
1945c96f 1519 const long int my_number = (long int) p;
ffb1b882
UD
1520 assert (dbs[my_number].enabled);
1521
1522 int dont_need_update = setup_thread (&dbs[my_number]);
1523
528741cb
UD
1524 time_t now = time (NULL);
1525
ffb1b882 1526 /* We are running. */
528741cb 1527 dbs[my_number].head->timestamp = now;
ffb1b882 1528
1945c96f 1529 struct timespec prune_ts;
a1ffb40e 1530 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
ffb1b882
UD
1531 /* Should never happen. */
1532 abort ();
1533
1534 /* Compute the initial timeout time. Prevent all the timers to go
1535 off at the same time by adding a db-based value. */
1536 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
528741cb
UD
1537 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1538
1539 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
cd72adeb 1540 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
528741cb 1541 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
264d5b94 1542
528741cb 1543 pthread_mutex_lock (prune_lock);
ffb1b882 1544 while (1)
1945c96f 1545 {
ffb1b882 1546 /* Wait, but not forever. */
5228ba2f
UD
1547 int e = 0;
1548 if (! dbs[my_number].clear_cache)
1549 e = pthread_cond_timedwait (prune_cond, prune_lock, &prune_ts);
528741cb 1550 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
081fc592 1551
ffb1b882 1552 time_t next_wait;
528741cb 1553 now = time (NULL);
5228ba2f
UD
1554 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1555 || dbs[my_number].clear_cache)
ffb1b882 1556 {
528741cb
UD
1557 /* We will determine the new timout values based on the
1558 cache content. Should there be concurrent additions to
1559 the cache which are not accounted for in the cache
1560 pruning we want to know about it. Therefore set the
1561 timeout to the maximum. It will be descreased when adding
1562 new entries to the cache, if necessary. */
a4c7ea7b 1563 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
528741cb 1564
5228ba2f
UD
1565 /* Unconditionally reset the flag. */
1566 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1567 dbs[my_number].clear_cache = 0;
1568
528741cb
UD
1569 pthread_mutex_unlock (prune_lock);
1570
cd72adeb
UD
1571 /* We use a separate lock for running the prune function (instead
1572 of keeping prune_lock locked) because this enables concurrent
1573 invocations of cache_add which might modify the timeout value. */
1574 pthread_mutex_lock (prune_run_lock);
5228ba2f 1575 next_wait = prune_cache (&dbs[my_number], prune_now, -1);
cd72adeb 1576 pthread_mutex_unlock (prune_run_lock);
528741cb 1577
ffb1b882
UD
1578 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1579 /* If clients cannot determine for sure whether nscd is running
1580 we need to wake up occasionally to update the timestamp.
1581 Wait 90% of the update period. */
1582#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
a1ffb40e 1583 if (__glibc_unlikely (! dont_need_update))
528741cb
UD
1584 {
1585 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1586 dbs[my_number].head->timestamp = now;
1587 }
1588
1589 pthread_mutex_lock (prune_lock);
ffb1b882
UD
1590
1591 /* Make it known when we will wake up again. */
528741cb
UD
1592 if (now + next_wait < dbs[my_number].wakeup_time)
1593 dbs[my_number].wakeup_time = now + next_wait;
1594 else
1595 next_wait = dbs[my_number].wakeup_time - now;
ffb1b882
UD
1596 }
1597 else
1598 /* The cache was just pruned. Do not do it again now. Just
1599 use the new timeout value. */
1600 next_wait = dbs[my_number].wakeup_time - now;
d67281a7 1601
1945c96f
UD
1602 if (clock_gettime (timeout_clock, &prune_ts) == -1)
1603 /* Should never happen. */
1604 abort ();
0fdb4f42 1605
ffb1b882
UD
1606 /* Compute next timeout time. */
1607 prune_ts.tv_sec += next_wait;
1945c96f 1608 }
ffb1b882
UD
1609}
1610
1611
1612/* This is the main loop. It is replicated in different threads but
ded5b9b7 1613 the use of the ready list makes sure only one thread handles an
ffb1b882
UD
1614 incoming connection. */
1615static void *
1616__attribute__ ((__noreturn__))
1617nscd_run_worker (void *p)
1618{
1619 char buf[256];
1945c96f
UD
1620
1621 /* Initial locking. */
1622 pthread_mutex_lock (&readylist_lock);
1623
1624 /* One more thread available. */
1625 ++nready;
0fdb4f42 1626
1945c96f
UD
1627 while (1)
1628 {
1629 while (readylist == NULL)
ffb1b882 1630 pthread_cond_wait (&readylist_cond, &readylist_lock);
0fdb4f42 1631
1945c96f
UD
1632 struct fdlist *it = readylist->next;
1633 if (readylist->next == readylist)
1634 /* Just one entry on the list. */
1635 readylist = NULL;
1636 else
1637 readylist->next = it->next;
0fdb4f42 1638
1945c96f
UD
1639 /* Extract the information and mark the record ready to be used
1640 again. */
1641 int fd = it->fd;
1642 it->next = NULL;
0fdb4f42 1643
1945c96f
UD
1644 /* One more thread available. */
1645 --nready;
67479a70 1646
1945c96f
UD
1647 /* We are done with the list. */
1648 pthread_mutex_unlock (&readylist_lock);
67479a70 1649
0fdb4f42 1650 /* Now read the request. */
1945c96f 1651 request_header req;
0fdb4f42
UD
1652 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1653 != sizeof (req), 0))
d67281a7 1654 {
1945c96f
UD
1655 /* We failed to read data. Note that this also might mean we
1656 failed because we would have blocked. */
0fdb4f42
UD
1657 if (debug_level > 0)
1658 dbg_log (_("short read while reading request: %s"),
1659 strerror_r (errno, buf, sizeof (buf)));
1945c96f 1660 goto close_and_out;
0fdb4f42
UD
1661 }
1662
3c82c131
UD
1663 /* Check whether this is a valid request type. */
1664 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1665 goto close_and_out;
1666
0fdb4f42
UD
1667 /* Some systems have no SO_PEERCRED implementation. They don't
1668 care about security so we don't as well. */
1945c96f 1669 uid_t uid = -1;
c86e6aec 1670#ifdef SO_PEERCRED
1945c96f
UD
1671 pid_t pid = 0;
1672
a1ffb40e 1673 if (__glibc_unlikely (debug_level > 0))
0fdb4f42
UD
1674 {
1675 struct ucred caller;
1676 socklen_t optlen = sizeof (caller);
c86e6aec 1677
0fdb4f42
UD
1678 if (getsockopt (fd, SOL_SOCKET, SO_PEERCRED, &caller, &optlen) == 0)
1679 pid = caller.pid;
1680 }
c52137d3
UD
1681#else
1682 const pid_t pid = 0;
cedc8559 1683#endif
a1c542bf 1684
0fdb4f42
UD
1685 /* It should not be possible to crash the nscd with a silly
1686 request (i.e., a terribly large key). We limit the size to 1kb. */
1687 if (__builtin_expect (req.key_len, 1) < 0
1945c96f 1688 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
0fdb4f42
UD
1689 {
1690 if (debug_level > 0)
1691 dbg_log (_("key length in request too long: %d"), req.key_len);
0fdb4f42
UD
1692 }
1693 else
1694 {
1695 /* Get the key. */
50fd745b 1696 char keybuf[MAXKEYLEN + 1];
0fdb4f42
UD
1697
1698 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1699 req.key_len))
1700 != req.key_len, 0))
67479a70 1701 {
1945c96f 1702 /* Again, this can also mean we would have blocked. */
98e75a1c 1703 if (debug_level > 0)
0fdb4f42
UD
1704 dbg_log (_("short read while reading request key: %s"),
1705 strerror_r (errno, buf, sizeof (buf)));
1945c96f 1706 goto close_and_out;
67479a70 1707 }
50fd745b 1708 keybuf[req.key_len] = '\0';
0fdb4f42
UD
1709
1710 if (__builtin_expect (debug_level, 0) > 0)
67479a70 1711 {
c86e6aec 1712#ifdef SO_PEERCRED
0fdb4f42
UD
1713 if (pid != 0)
1714 dbg_log (_("\
c86e6aec 1715handle_request: request received (Version = %d) from PID %ld"),
0fdb4f42
UD
1716 req.version, (long int) pid);
1717 else
c86e6aec 1718#endif
0fdb4f42 1719 dbg_log (_("\
c86e6aec 1720handle_request: request received (Version = %d)"), req.version);
0fdb4f42 1721 }
c86e6aec 1722
0fdb4f42 1723 /* Phew, we got all the data, now process it. */
c52137d3 1724 handle_request (fd, &req, keybuf, uid, pid);
d67281a7 1725 }
264d5b94 1726
3c82c131
UD
1727 close_and_out:
1728 /* We are done. */
1729 close (fd);
1730
1945c96f
UD
1731 /* Re-locking. */
1732 pthread_mutex_lock (&readylist_lock);
1733
1734 /* One more thread available. */
1735 ++nready;
1736 }
1ac03a1e 1737 /* NOTREACHED */
1945c96f
UD
1738}
1739
1740
fc03df7a
UD
1741static unsigned int nconns;
1742
1945c96f 1743static void
fc03df7a 1744fd_ready (int fd)
1945c96f 1745{
fc03df7a
UD
1746 pthread_mutex_lock (&readylist_lock);
1747
1748 /* Find an empty entry in FDLIST. */
1749 size_t inner;
1750 for (inner = 0; inner < nconns; ++inner)
1751 if (fdlist[inner].next == NULL)
1752 break;
1753 assert (inner < nconns);
1754
1755 fdlist[inner].fd = fd;
1756
1757 if (readylist == NULL)
1758 readylist = fdlist[inner].next = &fdlist[inner];
1945c96f 1759 else
fc03df7a
UD
1760 {
1761 fdlist[inner].next = readylist->next;
1762 readylist = readylist->next = &fdlist[inner];
1763 }
1764
1765 bool do_signal = true;
a1ffb40e 1766 if (__glibc_unlikely (nready == 0))
fc03df7a
UD
1767 {
1768 ++client_queued;
1769 do_signal = false;
27e82856
UD
1770
1771 /* Try to start another thread to help out. */
1772 pthread_t th;
1773 if (nthreads < max_nthreads
ffb1b882 1774 && pthread_create (&th, &attr, nscd_run_worker,
27e82856
UD
1775 (void *) (long int) nthreads) == 0)
1776 {
1777 /* We got another thread. */
1778 ++nthreads;
908c9e87 1779 /* The new thread might need a kick. */
27e82856
UD
1780 do_signal = true;
1781 }
1782
fc03df7a
UD
1783 }
1784
1785 pthread_mutex_unlock (&readylist_lock);
1786
1787 /* Tell one of the worker threads there is work to do. */
1788 if (do_signal)
1789 pthread_cond_signal (&readylist_cond);
1790}
1945c96f 1791
fc03df7a 1792
4401d759 1793/* Check whether restarting should happen. */
f1d70dad 1794static bool
4401d759
UD
1795restart_p (time_t now)
1796{
1797 return (paranoia && readylist == NULL && nready == nthreads
1798 && now >= restart_time);
1799}
1800
1801
1802/* Array for times a connection was accepted. */
fc03df7a
UD
1803static time_t *starttime;
1804
471514d3
CD
1805#ifdef HAVE_INOTIFY
1806/* Inotify event for changed file. */
1807union __inev
1808{
1809 struct inotify_event i;
1810# ifndef PATH_MAX
1811# define PATH_MAX 1024
1812# endif
1813 char buf[sizeof (struct inotify_event) + PATH_MAX];
1814};
1815
cf9313e7
CD
1816/* Returns 0 if the file is there otherwise -1. */
1817int
1818check_file (struct traced_file *finfo)
1819{
1820 struct stat64 st;
1821 /* We could check mtime and if different re-add
1822 the watches, and invalidate the database, but we
1823 don't because we are called from inotify_check_files
1824 which should be doing that work. If sufficient inotify
1825 events were lost then the next pruning or invalidation
1826 will do the stat and mtime check. We don't do it here to
1827 keep the logic simple. */
1828 if (stat64 (finfo->fname, &st) < 0)
1829 return -1;
1830 return 0;
1831}
1832
471514d3
CD
1833/* Process the inotify event in INEV. If the event matches any of the files
1834 registered with a database then mark that database as requiring its cache
1835 to be cleared. We indicate the cache needs clearing by setting
1836 TO_CLEAR[DBCNT] to true for the matching database. */
cf9313e7 1837static void
471514d3
CD
1838inotify_check_files (bool *to_clear, union __inev *inev)
1839{
1840 /* Check which of the files changed. */
1841 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1842 {
1843 struct traced_file *finfo = dbs[dbcnt].traced_files;
1844
1845 while (finfo != NULL)
1846 {
cf9313e7
CD
1847 /* The configuration file was moved or deleted.
1848 We stop watching it at that point, and reinitialize. */
1849 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1850 && ((inev->i.mask & IN_MOVE_SELF)
1851 || (inev->i.mask & IN_DELETE_SELF)
1852 || (inev->i.mask & IN_IGNORED)))
1853 {
1854 int ret;
1855 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1856
1857 if (check_file (finfo) == 0)
1858 {
1859 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1860 finfo->fname);
1861 return;
1862 }
1863
1864 dbg_log (_("monitored file `%s` was %s, removing watch"),
1865 finfo->fname, moved ? "moved" : "deleted");
1866 /* File was moved out, remove the watch. Watches are
1867 automatically removed when the file is deleted. */
1868 if (moved)
1869 {
1870 ret = inotify_rm_watch (inotify_fd, inev->i.wd);
1871 if (ret < 0)
1872 dbg_log (_("failed to remove file watch `%s`: %s"),
1873 finfo->fname, strerror (errno));
1874 }
1875 finfo->inotify_descr[TRACED_FILE] = -1;
1876 to_clear[dbcnt] = true;
1877 if (finfo->call_res_init)
1878 res_init ();
1879 return;
1880 }
1881 /* The configuration file was open for writing and has just closed.
1882 We reset the cache and reinitialize. */
1883 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1884 && inev->i.mask & IN_CLOSE_WRITE)
471514d3
CD
1885 {
1886 /* Mark cache as needing to be cleared and reinitialize. */
cf9313e7 1887 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
471514d3
CD
1888 to_clear[dbcnt] = true;
1889 if (finfo->call_res_init)
1890 res_init ();
1891 return;
1892 }
cf9313e7
CD
1893 /* The parent directory was moved or deleted. We trigger one last
1894 invalidation. At the next pruning or invalidation we may add
1895 this watch back if the file is present again. */
1896 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1897 && ((inev->i.mask & IN_DELETE_SELF)
1898 || (inev->i.mask & IN_MOVE_SELF)
1899 || (inev->i.mask & IN_IGNORED)))
1900 {
1901 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1902 /* The directory watch may have already been removed
1903 but we don't know so we just remove it again and
1904 ignore the error. Then we remove the file watch.
1905 Note: watches are automatically removed for deleted
1906 files. */
1907 if (moved)
1908 inotify_rm_watch (inotify_fd, inev->i.wd);
1909 if (finfo->inotify_descr[TRACED_FILE] != -1)
1910 {
1911 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1912 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1913 if (inotify_rm_watch (inotify_fd, finfo->inotify_descr[TRACED_FILE]) < 0)
1914 dbg_log (_("failed to remove file watch `%s`: %s"),
1915 finfo->dname, strerror (errno));
1916 }
1917 finfo->inotify_descr[TRACED_FILE] = -1;
1918 finfo->inotify_descr[TRACED_DIR] = -1;
1919 to_clear[dbcnt] = true;
1920 if (finfo->call_res_init)
1921 res_init ();
1922 /* Continue to the next entry since this might be the
1923 parent directory for multiple registered files and
1924 we want to remove watches for all registered files. */
1925 continue;
1926 }
1927 /* The parent directory had a create or moved to event. */
1928 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1929 && ((inev->i.mask & IN_MOVED_TO)
1930 || (inev->i.mask & IN_CREATE))
1931 && strcmp (inev->i.name, finfo->sfname) == 0)
1932 {
1933 /* We detected a directory change. We look for the creation
1934 of the file we are tracking or the move of the same file
1935 into the directory. */
1936 int ret;
1937 dbg_log (_("monitored file `%s` was %s, adding watch"),
1938 finfo->fname,
1939 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1940 /* File was moved in or created. Regenerate the watch. */
1941 if (finfo->inotify_descr[TRACED_FILE] != -1)
1942 inotify_rm_watch (inotify_fd,
1943 finfo->inotify_descr[TRACED_FILE]);
1944
1945 ret = inotify_add_watch (inotify_fd,
1946 finfo->fname,
1947 TRACED_FILE_MASK);
1948 if (ret < 0)
1949 dbg_log (_("failed to add file watch `%s`: %s"),
1950 finfo->fname, strerror (errno));
1951
1952 finfo->inotify_descr[TRACED_FILE] = ret;
1953
1954 /* The file is new or moved so mark cache as needing to
1955 be cleared and reinitialize. */
1956 to_clear[dbcnt] = true;
1957 if (finfo->call_res_init)
1958 res_init ();
471514d3 1959
cf9313e7
CD
1960 /* Done re-adding the watch. Don't return, we may still
1961 have other files in this same directory, same watch
1962 descriptor, and need to process them. */
1963 }
1964 /* Other events are ignored, and we move on to the next file. */
471514d3
CD
1965 finfo = finfo->next;
1966 }
1967 }
1968}
1969
1970/* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1971 for the associated database, otherwise do nothing. The TO_CLEAR array must
1972 have LASTDB entries. */
1973static inline void
1974clear_db_cache (bool *to_clear)
1975{
1976 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1977 if (to_clear[dbcnt])
1978 {
1979 pthread_mutex_lock (&dbs[dbcnt].prune_lock);
1980 dbs[dbcnt].clear_cache = 1;
1981 pthread_mutex_unlock (&dbs[dbcnt].prune_lock);
1982 pthread_cond_signal (&dbs[dbcnt].prune_cond);
1983 }
1984}
1985
cf9313e7
CD
1986int
1987handle_inotify_events (void)
1988{
1989 bool to_clear[lastdb] = { false, };
1990 union __inev inev;
1991
1992 /* Read all inotify events for files registered via
1993 register_traced_file(). */
1994 while (1)
1995 {
1996 /* Potentially read multiple events into buf. */
1997 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1998 &inev.buf,
1999 sizeof (inev)));
2000 if (nb < (ssize_t) sizeof (struct inotify_event))
2001 {
2002 /* Not even 1 event. */
2003 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
2004 return -1;
2005 /* Done reading events that are ready. */
2006 break;
2007 }
2008 /* Process all events. The normal inotify interface delivers
2009 complete events on a read and never a partial event. */
2010 char *eptr = &inev.buf[0];
2011 ssize_t count;
2012 while (1)
2013 {
2014 /* Check which of the files changed. */
2015 inotify_check_files (to_clear, &inev);
2016 count = sizeof (struct inotify_event) + inev.i.len;
2017 eptr += count;
2018 nb -= count;
2019 if (nb >= (ssize_t) sizeof (struct inotify_event))
2020 memcpy (&inev, eptr, nb);
2021 else
2022 break;
2023 }
2024 continue;
2025 }
2026 /* Actually perform the cache clearing. */
2027 clear_db_cache (to_clear);
2028 return 0;
2029}
2030
471514d3 2031#endif
fc03df7a
UD
2032
2033static void
2034__attribute__ ((__noreturn__))
2035main_loop_poll (void)
2036{
1945c96f
UD
2037 struct pollfd *conns = (struct pollfd *) xmalloc (nconns
2038 * sizeof (conns[0]));
2039
1945c96f
UD
2040 conns[0].fd = sock;
2041 conns[0].events = POLLRDNORM;
2042 size_t nused = 1;
2043 size_t firstfree = 1;
2044
5228ba2f
UD
2045#ifdef HAVE_INOTIFY
2046 if (inotify_fd != -1)
2047 {
2048 conns[1].fd = inotify_fd;
2049 conns[1].events = POLLRDNORM;
2050 nused = 2;
2051 firstfree = 2;
2052 }
2053#endif
2054
3a2c0242
UD
2055#ifdef HAVE_NETLINK
2056 size_t idx_nl_status_fd = 0;
2057 if (nl_status_fd != -1)
2058 {
2059 idx_nl_status_fd = nused;
2060 conns[nused].fd = nl_status_fd;
2061 conns[nused].events = POLLRDNORM;
2062 ++nused;
2063 firstfree = nused;
2064 }
2065#endif
2066
1945c96f
UD
2067 while (1)
2068 {
2069 /* Wait for any event. We wait at most a couple of seconds so
2070 that we can check whether we should close any of the accepted
2071 connections since we have not received a request. */
2072#define MAX_ACCEPT_TIMEOUT 30
2073#define MIN_ACCEPT_TIMEOUT 5
2074#define MAIN_THREAD_TIMEOUT \
2075 (MAX_ACCEPT_TIMEOUT * 1000 \
2076 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2077
2078 int n = poll (conns, nused, MAIN_THREAD_TIMEOUT);
2079
2080 time_t now = time (NULL);
2081
2082 /* If there is a descriptor ready for reading or there is a new
2083 connection, process this now. */
2084 if (n > 0)
67479a70 2085 {
1945c96f
UD
2086 if (conns[0].revents != 0)
2087 {
2088 /* We have a new incoming connection. Accept the connection. */
e9203023 2089 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
3ff2c948 2090 SOCK_NONBLOCK));
1945c96f 2091
9388dcbb
UD
2092 /* Use the descriptor if we have not reached the limit. */
2093 if (fd >= 0)
1945c96f 2094 {
9388dcbb
UD
2095 if (firstfree < nconns)
2096 {
2097 conns[firstfree].fd = fd;
2098 conns[firstfree].events = POLLRDNORM;
2099 starttime[firstfree] = now;
2100 if (firstfree >= nused)
2101 nused = firstfree + 1;
2102
2103 do
2104 ++firstfree;
2105 while (firstfree < nused && conns[firstfree].fd != -1);
2106 }
2107 else
2108 /* We cannot use the connection so close it. */
2109 close (fd);
1945c96f
UD
2110 }
2111
1945c96f
UD
2112 --n;
2113 }
2114
5228ba2f
UD
2115 size_t first = 1;
2116#ifdef HAVE_INOTIFY
b7432416 2117 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
5228ba2f
UD
2118 {
2119 if (conns[1].revents != 0)
2120 {
cf9313e7
CD
2121 int ret;
2122 ret = handle_inotify_events ();
2123 if (ret == -1)
5228ba2f 2124 {
cf9313e7
CD
2125 /* Something went wrong when reading the inotify
2126 data. Better disable inotify. */
2127 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2128 conns[1].fd = -1;
2129 firstfree = 1;
2130 if (nused == 2)
2131 nused = 1;
2132 close (inotify_fd);
2133 inotify_fd = -1;
5228ba2f 2134 }
5228ba2f
UD
2135 --n;
2136 }
2137
2138 first = 2;
2139 }
2140#endif
2141
3a2c0242
UD
2142#ifdef HAVE_NETLINK
2143 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2144 {
2145 char buf[4096];
2146 /* Read all the data. We do not interpret it here. */
2147 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2148 sizeof (buf))) != -1)
2149 ;
2150
2151 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2152 = __bump_nl_timestamp ();
2153 }
2154#endif
2155
5228ba2f 2156 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
1945c96f
UD
2157 if (conns[cnt].revents != 0)
2158 {
fc03df7a 2159 fd_ready (conns[cnt].fd);
1945c96f
UD
2160
2161 /* Clean up the CONNS array. */
2162 conns[cnt].fd = -1;
2163 if (cnt < firstfree)
2164 firstfree = cnt;
2165 if (cnt == nused - 1)
2166 do
2167 --nused;
2168 while (conns[nused - 1].fd == -1);
2169
2170 --n;
2171 }
2172 }
2173
2174 /* Now find entries which have timed out. */
2175 assert (nused > 0);
fc03df7a
UD
2176
2177 /* We make the timeout length depend on the number of file
2178 descriptors currently used. */
1945c96f
UD
2179#define ACCEPT_TIMEOUT \
2180 (MAX_ACCEPT_TIMEOUT \
2181 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
fc03df7a 2182 time_t laststart = now - ACCEPT_TIMEOUT;
1945c96f 2183
fc03df7a
UD
2184 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2185 {
1945c96f
UD
2186 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2187 {
2188 /* Remove the entry, it timed out. */
2189 (void) close (conns[cnt].fd);
2190 conns[cnt].fd = -1;
2191
2192 if (cnt < firstfree)
2193 firstfree = cnt;
2194 if (cnt == nused - 1)
2195 do
2196 --nused;
2197 while (conns[nused - 1].fd == -1);
2198 }
67479a70 2199 }
4401d759
UD
2200
2201 if (restart_p (now))
2202 restart ();
67308730 2203 }
d67281a7
UD
2204}
2205
67479a70 2206
fc03df7a
UD
2207#ifdef HAVE_EPOLL
2208static void
2209main_loop_epoll (int efd)
2210{
2211 struct epoll_event ev = { 0, };
2212 int nused = 1;
2213 size_t highest = 0;
2214
2215 /* Add the socket. */
2216 ev.events = EPOLLRDNORM;
2217 ev.data.fd = sock;
2218 if (epoll_ctl (efd, EPOLL_CTL_ADD, sock, &ev) == -1)
2219 /* We cannot use epoll. */
2220 return;
2221
f93fc0b7 2222# ifdef HAVE_INOTIFY
5228ba2f
UD
2223 if (inotify_fd != -1)
2224 {
2225 ev.events = EPOLLRDNORM;
2226 ev.data.fd = inotify_fd;
2227 if (epoll_ctl (efd, EPOLL_CTL_ADD, inotify_fd, &ev) == -1)
2228 /* We cannot use epoll. */
2229 return;
2230 nused = 2;
2231 }
f93fc0b7 2232# endif
5228ba2f 2233
3a2c0242
UD
2234# ifdef HAVE_NETLINK
2235 if (nl_status_fd != -1)
2236 {
2237 ev.events = EPOLLRDNORM;
2238 ev.data.fd = nl_status_fd;
2239 if (epoll_ctl (efd, EPOLL_CTL_ADD, nl_status_fd, &ev) == -1)
2240 /* We cannot use epoll. */
2241 return;
2242 }
2243# endif
2244
fc03df7a
UD
2245 while (1)
2246 {
2247 struct epoll_event revs[100];
2248# define nrevs (sizeof (revs) / sizeof (revs[0]))
2249
2250 int n = epoll_wait (efd, revs, nrevs, MAIN_THREAD_TIMEOUT);
2251
2252 time_t now = time (NULL);
2253
2254 for (int cnt = 0; cnt < n; ++cnt)
2255 if (revs[cnt].data.fd == sock)
2256 {
2257 /* A new connection. */
e9203023 2258 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
f93fc0b7 2259 SOCK_NONBLOCK));
fc03df7a 2260
f93fc0b7 2261 /* Use the descriptor if we have not reached the limit. */
fc03df7a
UD
2262 if (fd >= 0)
2263 {
2264 /* Try to add the new descriptor. */
2265 ev.data.fd = fd;
2266 if (fd >= nconns
2267 || epoll_ctl (efd, EPOLL_CTL_ADD, fd, &ev) == -1)
2268 /* The descriptor is too large or something went
2269 wrong. Close the descriptor. */
2270 close (fd);
2271 else
2272 {
2273 /* Remember when we accepted the connection. */
2274 starttime[fd] = now;
2275
2276 if (fd > highest)
2277 highest = fd;
2278
2279 ++nused;
2280 }
2281 }
2282 }
f93fc0b7 2283# ifdef HAVE_INOTIFY
5228ba2f
UD
2284 else if (revs[cnt].data.fd == inotify_fd)
2285 {
cf9313e7
CD
2286 int ret;
2287 ret = handle_inotify_events ();
2288 if (ret == -1)
5228ba2f 2289 {
cf9313e7
CD
2290 /* Something went wrong when reading the inotify
2291 data. Better disable inotify. */
2292 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2293 (void) epoll_ctl (efd, EPOLL_CTL_DEL, inotify_fd, NULL);
2294 close (inotify_fd);
2295 inotify_fd = -1;
2296 break;
5228ba2f
UD
2297 }
2298 }
3a2c0242
UD
2299# endif
2300# ifdef HAVE_NETLINK
2301 else if (revs[cnt].data.fd == nl_status_fd)
2302 {
2303 char buf[4096];
2304 /* Read all the data. We do not interpret it here. */
2305 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2306 sizeof (buf))) != -1)
2307 ;
2308
2309 __bump_nl_timestamp ();
2310 }
f93fc0b7 2311# endif
fc03df7a
UD
2312 else
2313 {
2314 /* Remove the descriptor from the epoll descriptor. */
908c9e87 2315 (void) epoll_ctl (efd, EPOLL_CTL_DEL, revs[cnt].data.fd, NULL);
fc03df7a 2316
2461e3dc 2317 /* Get a worker to handle the request. */
fc03df7a
UD
2318 fd_ready (revs[cnt].data.fd);
2319
2320 /* Reset the time. */
2321 starttime[revs[cnt].data.fd] = 0;
2322 if (revs[cnt].data.fd == highest)
2323 do
2324 --highest;
2325 while (highest > 0 && starttime[highest] == 0);
2326
2327 --nused;
2328 }
2329
2330 /* Now look for descriptors for accepted connections which have
2331 no reply in too long of a time. */
2332 time_t laststart = now - ACCEPT_TIMEOUT;
b7432416 2333 assert (starttime[sock] == 0);
cf9313e7 2334# ifdef HAVE_INOTIFY
b7432416 2335 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
cf9313e7 2336# endif
3a2c0242 2337 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
fc03df7a 2338 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
b7432416 2339 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
fc03df7a
UD
2340 {
2341 /* We are waiting for this one for too long. Close it. */
908c9e87 2342 (void) epoll_ctl (efd, EPOLL_CTL_DEL, cnt, NULL);
fc03df7a
UD
2343
2344 (void) close (cnt);
2345
2346 starttime[cnt] = 0;
2347 if (cnt == highest)
2348 --highest;
2349 }
2350 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2351 --highest;
4401d759
UD
2352
2353 if (restart_p (now))
2354 restart ();
fc03df7a
UD
2355 }
2356}
2357#endif
2358
2359
67479a70 2360/* Start all the threads we want. The initial process is thread no. 1. */
d67281a7 2361void
67479a70 2362start_threads (void)
d67281a7 2363{
1945c96f
UD
2364 /* Initialize the conditional variable we will use. The only
2365 non-standard attribute we might use is the clock selection. */
2366 pthread_condattr_t condattr;
2367 pthread_condattr_init (&condattr);
2368
3078cba2
UD
2369#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2370 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
1945c96f
UD
2371 /* Determine whether the monotonous clock is available. */
2372 struct timespec dummy;
3078cba2 2373# if _POSIX_MONOTONIC_CLOCK == 0
94d824f9 2374 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
3078cba2
UD
2375# endif
2376# if _POSIX_CLOCK_SELECTION == 0
94d824f9 2377 if (sysconf (_SC_CLOCK_SELECTION) > 0)
3078cba2 2378# endif
94d824f9
UD
2379 if (clock_getres (CLOCK_MONOTONIC, &dummy) == 0
2380 && pthread_condattr_setclock (&condattr, CLOCK_MONOTONIC) == 0)
2381 timeout_clock = CLOCK_MONOTONIC;
1945c96f 2382#endif
d67281a7 2383
1945c96f
UD
2384 /* Create the attribute for the threads. They are all created
2385 detached. */
67479a70
UD
2386 pthread_attr_init (&attr);
2387 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
27e82856 2388 /* Use 1MB stacks, twice as much for 64-bit architectures. */
7ea8eb02 2389 pthread_attr_setstacksize (&attr, NSCD_THREAD_STACKSIZE);
d67281a7 2390
67479a70
UD
2391 /* We allow less than LASTDB threads only for debugging. */
2392 if (debug_level == 0)
2393 nthreads = MAX (nthreads, lastdb);
d67281a7 2394
ffb1b882
UD
2395 /* Create the threads which prune the databases. */
2396 // XXX Ideally this work would be done by some of the worker threads.
2397 // XXX But this is problematic since we would need to be able to wake
2398 // XXX them up explicitly as well as part of the group handling the
2399 // XXX ready-list. This requires an operation where we can wait on
2400 // XXX two conditional variables at the same time. This operation
2401 // XXX does not exist (yet).
2402 for (long int i = 0; i < lastdb; ++i)
1945c96f 2403 {
ffb1b882
UD
2404 /* Initialize the conditional variable. */
2405 if (pthread_cond_init (&dbs[i].prune_cond, &condattr) != 0)
2406 {
2407 dbg_log (_("could not initialize conditional variable"));
532a6035 2408 do_exit (1, 0, NULL);
ffb1b882
UD
2409 }
2410
1945c96f 2411 pthread_t th;
ffb1b882
UD
2412 if (dbs[i].enabled
2413 && pthread_create (&th, &attr, nscd_run_prune, (void *) i) != 0)
2414 {
2415 dbg_log (_("could not start clean-up thread; terminating"));
532a6035 2416 do_exit (1, 0, NULL);
ffb1b882 2417 }
27e82856 2418 }
ffb1b882
UD
2419
2420 pthread_condattr_destroy (&condattr);
2421
2422 for (long int i = 0; i < nthreads; ++i)
27e82856 2423 {
ffb1b882
UD
2424 pthread_t th;
2425 if (pthread_create (&th, &attr, nscd_run_worker, NULL) != 0)
2426 {
2427 if (i == 0)
2428 {
2429 dbg_log (_("could not start any worker thread; terminating"));
532a6035 2430 do_exit (1, 0, NULL);
ffb1b882
UD
2431 }
2432
2433 break;
2434 }
1945c96f 2435 }
d67281a7 2436
532a6035
SP
2437 /* Now it is safe to let the parent know that we're doing fine and it can
2438 exit. */
2439 notify_parent (0);
2440
fc03df7a
UD
2441 /* Determine how much room for descriptors we should initially
2442 allocate. This might need to change later if we cap the number
2443 with MAXCONN. */
2444 const long int nfds = sysconf (_SC_OPEN_MAX);
2445#define MINCONN 32
2446#define MAXCONN 16384
2447 if (nfds == -1 || nfds > MAXCONN)
2448 nconns = MAXCONN;
2449 else if (nfds < MINCONN)
2450 nconns = MINCONN;
2451 else
2452 nconns = nfds;
2453
2454 /* We need memory to pass descriptors on to the worker threads. */
2455 fdlist = (struct fdlist *) xcalloc (nconns, sizeof (fdlist[0]));
2456 /* Array to keep track when connection was accepted. */
2457 starttime = (time_t *) xcalloc (nconns, sizeof (starttime[0]));
2458
1945c96f
UD
2459 /* In the main thread we execute the loop which handles incoming
2460 connections. */
fc03df7a
UD
2461#ifdef HAVE_EPOLL
2462 int efd = epoll_create (100);
2463 if (efd != -1)
2464 {
2465 main_loop_epoll (efd);
2466 close (efd);
2467 }
2468#endif
2469
2470 main_loop_poll ();
d67281a7 2471}
057685e4
UD
2472
2473
2474/* Look up the uid, gid, and supplementary groups to run nscd as. When
2475 this function is called, we are not listening on the nscd socket yet so
2476 we can just use the ordinary lookup functions without causing a lockup */
2477static void
2478begin_drop_privileges (void)
2479{
a95a08b4 2480 struct passwd *pwd = getpwnam (server_user);
057685e4
UD
2481
2482 if (pwd == NULL)
2483 {
2484 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035
SP
2485 do_exit (EXIT_FAILURE, 0,
2486 _("Failed to run nscd as user '%s'"), server_user);
057685e4
UD
2487 }
2488
2489 server_uid = pwd->pw_uid;
2490 server_gid = pwd->pw_gid;
2491
4401d759
UD
2492 /* Save the old UID/GID if we have to change back. */
2493 if (paranoia)
2494 {
2495 old_uid = getuid ();
2496 old_gid = getgid ();
2497 }
2498
a95a08b4
UD
2499 if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0)
2500 {
2501 /* This really must never happen. */
2502 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035
SP
2503 do_exit (EXIT_FAILURE, errno,
2504 _("initial getgrouplist failed"));
a95a08b4 2505 }
057685e4 2506
a95a08b4 2507 server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t));
057685e4
UD
2508
2509 if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups)
2510 == -1)
2511 {
2512 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035 2513 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
057685e4
UD
2514 }
2515}
2516
2517
2518/* Call setgroups(), setgid(), and setuid() to drop root privileges and
2519 run nscd as the user specified in the configuration file. */
2520static void
2521finish_drop_privileges (void)
2522{
1f063dca
UD
2523#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2524 /* We need to preserve the capabilities to connect to the audit daemon. */
2525 cap_t new_caps = preserve_capabilities ();
2526#endif
2527
057685e4
UD
2528 if (setgroups (server_ngroups, server_groups) == -1)
2529 {
2530 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035 2531 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
057685e4
UD
2532 }
2533
91287339
UD
2534 int res;
2535 if (paranoia)
2536 res = setresgid (server_gid, server_gid, old_gid);
2537 else
2538 res = setgid (server_gid);
2539 if (res == -1)
057685e4
UD
2540 {
2541 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035 2542 do_exit (4, errno, "setgid");
057685e4
UD
2543 }
2544
91287339
UD
2545 if (paranoia)
2546 res = setresuid (server_uid, server_uid, old_uid);
2547 else
2548 res = setuid (server_uid);
2549 if (res == -1)
057685e4
UD
2550 {
2551 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
532a6035 2552 do_exit (4, errno, "setuid");
057685e4 2553 }
1f063dca
UD
2554
2555#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2556 /* Remove the temporary capabilities. */
2557 install_real_capabilities (new_caps);
2558#endif
057685e4 2559}