]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
tty-ask-password: Split out password sending
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2011 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #ifdef HAVE_SELINUX
23 #include <selinux/selinux.h>
24 #endif
25 #include <sys/ioctl.h>
26 #include <sys/mman.h>
27 #include <sys/signalfd.h>
28 #include <sys/statvfs.h>
29 #include <linux/sockios.h>
30
31 #include "libudev.h"
32 #include "sd-daemon.h"
33 #include "sd-journal.h"
34 #include "sd-messages.h"
35
36 #include "acl-util.h"
37 #include "alloc-util.h"
38 #include "audit-util.h"
39 #include "cgroup-util.h"
40 #include "conf-parser.h"
41 #include "dirent-util.h"
42 #include "extract-word.h"
43 #include "fd-util.h"
44 #include "fileio.h"
45 #include "formats-util.h"
46 #include "fs-util.h"
47 #include "hashmap.h"
48 #include "hostname-util.h"
49 #include "io-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-kmsg.h"
56 #include "journald-native.h"
57 #include "journald-rate-limit.h"
58 #include "journald-server.h"
59 #include "journald-stream.h"
60 #include "journald-syslog.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "log.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_space_for(
91 Server *s,
92 JournalMetrics *metrics,
93 const char *path,
94 const char *name,
95 bool verbose,
96 bool patch_min_use,
97 uint64_t *available,
98 uint64_t *limit) {
99
100 uint64_t sum = 0, ss_avail, avail;
101 _cleanup_closedir_ DIR *d = NULL;
102 struct dirent *de;
103 struct statvfs ss;
104 const char *p;
105 usec_t ts;
106
107 assert(s);
108 assert(metrics);
109 assert(path);
110 assert(name);
111
112 ts = now(CLOCK_MONOTONIC);
113
114 if (!verbose && s->cached_space_timestamp + RECHECK_SPACE_USEC > ts) {
115
116 if (available)
117 *available = s->cached_space_available;
118 if (limit)
119 *limit = s->cached_space_limit;
120
121 return 0;
122 }
123
124 p = strjoina(path, SERVER_MACHINE_ID(s));
125 d = opendir(p);
126 if (!d)
127 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR, errno, "Failed to open %s: %m", p);
128
129 if (fstatvfs(dirfd(d), &ss) < 0)
130 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", p);
131
132 FOREACH_DIRENT_ALL(de, d, break) {
133 struct stat st;
134
135 if (!endswith(de->d_name, ".journal") &&
136 !endswith(de->d_name, ".journal~"))
137 continue;
138
139 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
140 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", p, de->d_name);
141 continue;
142 }
143
144 if (!S_ISREG(st.st_mode))
145 continue;
146
147 sum += (uint64_t) st.st_blocks * 512UL;
148 }
149
150 /* If requested, then let's bump the min_use limit to the
151 * current usage on disk. We do this when starting up and
152 * first opening the journal files. This way sudden spikes in
153 * disk usage will not cause journald to vacuum files without
154 * bounds. Note that this means that only a restart of
155 * journald will make it reset this value. */
156
157 if (patch_min_use)
158 metrics->min_use = MAX(metrics->min_use, sum);
159
160 ss_avail = ss.f_bsize * ss.f_bavail;
161 avail = LESS_BY(ss_avail, metrics->keep_free);
162
163 s->cached_space_limit = MIN(MAX(sum + avail, metrics->min_use), metrics->max_use);
164 s->cached_space_available = LESS_BY(s->cached_space_limit, sum);
165 s->cached_space_timestamp = ts;
166
167 if (verbose) {
168 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
169 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
170 format_bytes(fb1, sizeof(fb1), sum);
171 format_bytes(fb2, sizeof(fb2), metrics->max_use);
172 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
173 format_bytes(fb4, sizeof(fb4), ss_avail);
174 format_bytes(fb5, sizeof(fb5), s->cached_space_limit);
175 format_bytes(fb6, sizeof(fb6), s->cached_space_available);
176
177 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
178 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
179 name, path, fb1, fb5, fb6),
180 "JOURNAL_NAME=%s", name,
181 "JOURNAL_PATH=%s", path,
182 "CURRENT_USE=%"PRIu64, sum,
183 "CURRENT_USE_PRETTY=%s", fb1,
184 "MAX_USE=%"PRIu64, metrics->max_use,
185 "MAX_USE_PRETTY=%s", fb2,
186 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
187 "DISK_KEEP_FREE_PRETTY=%s", fb3,
188 "DISK_AVAILABLE=%"PRIu64, ss_avail,
189 "DISK_AVAILABLE_PRETTY=%s", fb4,
190 "LIMIT=%"PRIu64, s->cached_space_limit,
191 "LIMIT_PRETTY=%s", fb5,
192 "AVAILABLE=%"PRIu64, s->cached_space_available,
193 "AVAILABLE_PRETTY=%s", fb6,
194 NULL);
195 }
196
197 if (available)
198 *available = s->cached_space_available;
199 if (limit)
200 *limit = s->cached_space_limit;
201
202 return 1;
203 }
204
205 static int determine_space(Server *s, bool verbose, bool patch_min_use, uint64_t *available, uint64_t *limit) {
206 JournalMetrics *metrics;
207 const char *path, *name;
208
209 assert(s);
210
211 if (s->system_journal) {
212 path = "/var/log/journal/";
213 metrics = &s->system_metrics;
214 name = "System journal";
215 } else {
216 path = "/run/log/journal/";
217 metrics = &s->runtime_metrics;
218 name = "Runtime journal";
219 }
220
221 return determine_space_for(s, metrics, path, name, verbose, patch_min_use, available, limit);
222 }
223
224 static void server_add_acls(JournalFile *f, uid_t uid) {
225 #ifdef HAVE_ACL
226 int r;
227 #endif
228 assert(f);
229
230 #ifdef HAVE_ACL
231 if (uid <= SYSTEM_UID_MAX)
232 return;
233
234 r = add_acls_for_user(f->fd, uid);
235 if (r < 0)
236 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
237 #endif
238 }
239
240 static int open_journal(
241 Server *s,
242 bool reliably,
243 const char *fname,
244 int flags,
245 bool seal,
246 JournalMetrics *metrics,
247 JournalFile *template,
248 JournalFile **ret) {
249 int r;
250 JournalFile *f;
251
252 assert(s);
253 assert(fname);
254 assert(ret);
255
256 if (reliably)
257 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, &f);
258 else
259 r = journal_file_open(fname, flags, 0640, s->compress, seal, metrics, s->mmap, template, &f);
260 if (r < 0)
261 return r;
262
263 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
264 if (r < 0) {
265 journal_file_close(f);
266 return r;
267 }
268
269 *ret = f;
270 return r;
271 }
272
273 static JournalFile* find_journal(Server *s, uid_t uid) {
274 _cleanup_free_ char *p = NULL;
275 int r;
276 JournalFile *f;
277 sd_id128_t machine;
278
279 assert(s);
280
281 /* We split up user logs only on /var, not on /run. If the
282 * runtime file is open, we write to it exclusively, in order
283 * to guarantee proper order as soon as we flush /run to
284 * /var and close the runtime file. */
285
286 if (s->runtime_journal)
287 return s->runtime_journal;
288
289 if (uid <= SYSTEM_UID_MAX)
290 return s->system_journal;
291
292 r = sd_id128_get_machine(&machine);
293 if (r < 0)
294 return s->system_journal;
295
296 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
297 if (f)
298 return f;
299
300 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
301 SD_ID128_FORMAT_VAL(machine), uid) < 0)
302 return s->system_journal;
303
304 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
305 /* Too many open? Then let's close one */
306 f = ordered_hashmap_steal_first(s->user_journals);
307 assert(f);
308 journal_file_close(f);
309 }
310
311 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &f);
312 if (r < 0)
313 return s->system_journal;
314
315 server_add_acls(f, uid);
316
317 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
318 if (r < 0) {
319 journal_file_close(f);
320 return s->system_journal;
321 }
322
323 return f;
324 }
325
326 static int do_rotate(
327 Server *s,
328 JournalFile **f,
329 const char* name,
330 bool seal,
331 uint32_t uid) {
332
333 int r;
334 assert(s);
335
336 if (!*f)
337 return -EINVAL;
338
339 r = journal_file_rotate(f, s->compress, seal);
340 if (r < 0)
341 if (*f)
342 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
343 else
344 log_error_errno(r, "Failed to create new %s journal: %m", name);
345 else
346 server_add_acls(*f, uid);
347
348 return r;
349 }
350
351 void server_rotate(Server *s) {
352 JournalFile *f;
353 void *k;
354 Iterator i;
355 int r;
356
357 log_debug("Rotating...");
358
359 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
360 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
361
362 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
363 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
364 if (r >= 0)
365 ordered_hashmap_replace(s->user_journals, k, f);
366 else if (!f)
367 /* Old file has been closed and deallocated */
368 ordered_hashmap_remove(s->user_journals, k);
369 }
370 }
371
372 void server_sync(Server *s) {
373 JournalFile *f;
374 Iterator i;
375 int r;
376
377 if (s->system_journal) {
378 r = journal_file_set_offline(s->system_journal);
379 if (r < 0)
380 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
381 }
382
383 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
384 r = journal_file_set_offline(f);
385 if (r < 0)
386 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
387 }
388
389 if (s->sync_event_source) {
390 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
391 if (r < 0)
392 log_error_errno(r, "Failed to disable sync timer source: %m");
393 }
394
395 s->sync_scheduled = false;
396 }
397
398 static void do_vacuum(
399 Server *s,
400 JournalFile *f,
401 JournalMetrics *metrics,
402 const char *path,
403 const char *name,
404 bool verbose,
405 bool patch_min_use) {
406
407 const char *p;
408 uint64_t limit;
409 int r;
410
411 assert(s);
412 assert(metrics);
413 assert(path);
414 assert(name);
415
416 if (!f)
417 return;
418
419 p = strjoina(path, SERVER_MACHINE_ID(s));
420
421 limit = metrics->max_use;
422 (void) determine_space_for(s, metrics, path, name, verbose, patch_min_use, NULL, &limit);
423
424 r = journal_directory_vacuum(p, limit, metrics->n_max_files, s->max_retention_usec, &s->oldest_file_usec, verbose);
425 if (r < 0 && r != -ENOENT)
426 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", p);
427 }
428
429 int server_vacuum(Server *s, bool verbose, bool patch_min_use) {
430 assert(s);
431
432 log_debug("Vacuuming...");
433
434 s->oldest_file_usec = 0;
435
436 do_vacuum(s, s->system_journal, &s->system_metrics, "/var/log/journal/", "System journal", verbose, patch_min_use);
437 do_vacuum(s, s->runtime_journal, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", verbose, patch_min_use);
438
439 s->cached_space_limit = 0;
440 s->cached_space_available = 0;
441 s->cached_space_timestamp = 0;
442
443 return 0;
444 }
445
446 static void server_cache_machine_id(Server *s) {
447 sd_id128_t id;
448 int r;
449
450 assert(s);
451
452 r = sd_id128_get_machine(&id);
453 if (r < 0)
454 return;
455
456 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
457 }
458
459 static void server_cache_boot_id(Server *s) {
460 sd_id128_t id;
461 int r;
462
463 assert(s);
464
465 r = sd_id128_get_boot(&id);
466 if (r < 0)
467 return;
468
469 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
470 }
471
472 static void server_cache_hostname(Server *s) {
473 _cleanup_free_ char *t = NULL;
474 char *x;
475
476 assert(s);
477
478 t = gethostname_malloc();
479 if (!t)
480 return;
481
482 x = strappend("_HOSTNAME=", t);
483 if (!x)
484 return;
485
486 free(s->hostname_field);
487 s->hostname_field = x;
488 }
489
490 static bool shall_try_append_again(JournalFile *f, int r) {
491
492 /* -E2BIG Hit configured limit
493 -EFBIG Hit fs limit
494 -EDQUOT Quota limit hit
495 -ENOSPC Disk full
496 -EIO I/O error of some kind (mmap)
497 -EHOSTDOWN Other machine
498 -EBUSY Unclean shutdown
499 -EPROTONOSUPPORT Unsupported feature
500 -EBADMSG Corrupted
501 -ENODATA Truncated
502 -ESHUTDOWN Already archived
503 -EIDRM Journal file has been deleted */
504
505 if (r == -E2BIG || r == -EFBIG || r == -EDQUOT || r == -ENOSPC)
506 log_debug("%s: Allocation limit reached, rotating.", f->path);
507 else if (r == -EHOSTDOWN)
508 log_info("%s: Journal file from other machine, rotating.", f->path);
509 else if (r == -EBUSY)
510 log_info("%s: Unclean shutdown, rotating.", f->path);
511 else if (r == -EPROTONOSUPPORT)
512 log_info("%s: Unsupported feature, rotating.", f->path);
513 else if (r == -EBADMSG || r == -ENODATA || r == ESHUTDOWN)
514 log_warning("%s: Journal file corrupted, rotating.", f->path);
515 else if (r == -EIO)
516 log_warning("%s: IO error, rotating.", f->path);
517 else if (r == -EIDRM)
518 log_warning("%s: Journal file has been deleted, rotating.", f->path);
519 else
520 return false;
521
522 return true;
523 }
524
525 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
526 JournalFile *f;
527 bool vacuumed = false;
528 int r;
529
530 assert(s);
531 assert(iovec);
532 assert(n > 0);
533
534 f = find_journal(s, uid);
535 if (!f)
536 return;
537
538 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
539 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
540 server_rotate(s);
541 server_vacuum(s, false, false);
542 vacuumed = true;
543
544 f = find_journal(s, uid);
545 if (!f)
546 return;
547 }
548
549 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
550 if (r >= 0) {
551 server_schedule_sync(s, priority);
552 return;
553 }
554
555 if (vacuumed || !shall_try_append_again(f, r)) {
556 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
557 return;
558 }
559
560 server_rotate(s);
561 server_vacuum(s, false, false);
562
563 f = find_journal(s, uid);
564 if (!f)
565 return;
566
567 log_debug("Retrying write.");
568 r = journal_file_append_entry(f, NULL, iovec, n, &s->seqnum, NULL, NULL);
569 if (r < 0)
570 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
571 else
572 server_schedule_sync(s, priority);
573 }
574
575 static void dispatch_message_real(
576 Server *s,
577 struct iovec *iovec, unsigned n, unsigned m,
578 const struct ucred *ucred,
579 const struct timeval *tv,
580 const char *label, size_t label_len,
581 const char *unit_id,
582 int priority,
583 pid_t object_pid) {
584
585 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
586 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
587 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
588 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
589 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
590 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
591 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
592 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
593 uid_t object_uid;
594 gid_t object_gid;
595 char *x;
596 int r;
597 char *t, *c;
598 uid_t realuid = 0, owner = 0, journal_uid;
599 bool owner_valid = false;
600 #ifdef HAVE_AUDIT
601 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
602 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
603 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
604 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
605
606 uint32_t audit;
607 uid_t loginuid;
608 #endif
609
610 assert(s);
611 assert(iovec);
612 assert(n > 0);
613 assert(n + N_IOVEC_META_FIELDS + (object_pid ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
614
615 if (ucred) {
616 realuid = ucred->uid;
617
618 sprintf(pid, "_PID="PID_FMT, ucred->pid);
619 IOVEC_SET_STRING(iovec[n++], pid);
620
621 sprintf(uid, "_UID="UID_FMT, ucred->uid);
622 IOVEC_SET_STRING(iovec[n++], uid);
623
624 sprintf(gid, "_GID="GID_FMT, ucred->gid);
625 IOVEC_SET_STRING(iovec[n++], gid);
626
627 r = get_process_comm(ucred->pid, &t);
628 if (r >= 0) {
629 x = strjoina("_COMM=", t);
630 free(t);
631 IOVEC_SET_STRING(iovec[n++], x);
632 }
633
634 r = get_process_exe(ucred->pid, &t);
635 if (r >= 0) {
636 x = strjoina("_EXE=", t);
637 free(t);
638 IOVEC_SET_STRING(iovec[n++], x);
639 }
640
641 r = get_process_cmdline(ucred->pid, 0, false, &t);
642 if (r >= 0) {
643 x = strjoina("_CMDLINE=", t);
644 free(t);
645 IOVEC_SET_STRING(iovec[n++], x);
646 }
647
648 r = get_process_capeff(ucred->pid, &t);
649 if (r >= 0) {
650 x = strjoina("_CAP_EFFECTIVE=", t);
651 free(t);
652 IOVEC_SET_STRING(iovec[n++], x);
653 }
654
655 #ifdef HAVE_AUDIT
656 r = audit_session_from_pid(ucred->pid, &audit);
657 if (r >= 0) {
658 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
659 IOVEC_SET_STRING(iovec[n++], audit_session);
660 }
661
662 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
663 if (r >= 0) {
664 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
665 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
666 }
667 #endif
668
669 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
670 if (r >= 0) {
671 char *session = NULL;
672
673 x = strjoina("_SYSTEMD_CGROUP=", c);
674 IOVEC_SET_STRING(iovec[n++], x);
675
676 r = cg_path_get_session(c, &t);
677 if (r >= 0) {
678 session = strjoina("_SYSTEMD_SESSION=", t);
679 free(t);
680 IOVEC_SET_STRING(iovec[n++], session);
681 }
682
683 if (cg_path_get_owner_uid(c, &owner) >= 0) {
684 owner_valid = true;
685
686 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
687 IOVEC_SET_STRING(iovec[n++], owner_uid);
688 }
689
690 if (cg_path_get_unit(c, &t) >= 0) {
691 x = strjoina("_SYSTEMD_UNIT=", t);
692 free(t);
693 IOVEC_SET_STRING(iovec[n++], x);
694 } else if (unit_id && !session) {
695 x = strjoina("_SYSTEMD_UNIT=", unit_id);
696 IOVEC_SET_STRING(iovec[n++], x);
697 }
698
699 if (cg_path_get_user_unit(c, &t) >= 0) {
700 x = strjoina("_SYSTEMD_USER_UNIT=", t);
701 free(t);
702 IOVEC_SET_STRING(iovec[n++], x);
703 } else if (unit_id && session) {
704 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
705 IOVEC_SET_STRING(iovec[n++], x);
706 }
707
708 if (cg_path_get_slice(c, &t) >= 0) {
709 x = strjoina("_SYSTEMD_SLICE=", t);
710 free(t);
711 IOVEC_SET_STRING(iovec[n++], x);
712 }
713
714 free(c);
715 } else if (unit_id) {
716 x = strjoina("_SYSTEMD_UNIT=", unit_id);
717 IOVEC_SET_STRING(iovec[n++], x);
718 }
719
720 #ifdef HAVE_SELINUX
721 if (mac_selinux_have()) {
722 if (label) {
723 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
724
725 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
726 IOVEC_SET_STRING(iovec[n++], x);
727 } else {
728 security_context_t con;
729
730 if (getpidcon(ucred->pid, &con) >= 0) {
731 x = strjoina("_SELINUX_CONTEXT=", con);
732
733 freecon(con);
734 IOVEC_SET_STRING(iovec[n++], x);
735 }
736 }
737 }
738 #endif
739 }
740 assert(n <= m);
741
742 if (object_pid) {
743 r = get_process_uid(object_pid, &object_uid);
744 if (r >= 0) {
745 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
746 IOVEC_SET_STRING(iovec[n++], o_uid);
747 }
748
749 r = get_process_gid(object_pid, &object_gid);
750 if (r >= 0) {
751 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
752 IOVEC_SET_STRING(iovec[n++], o_gid);
753 }
754
755 r = get_process_comm(object_pid, &t);
756 if (r >= 0) {
757 x = strjoina("OBJECT_COMM=", t);
758 free(t);
759 IOVEC_SET_STRING(iovec[n++], x);
760 }
761
762 r = get_process_exe(object_pid, &t);
763 if (r >= 0) {
764 x = strjoina("OBJECT_EXE=", t);
765 free(t);
766 IOVEC_SET_STRING(iovec[n++], x);
767 }
768
769 r = get_process_cmdline(object_pid, 0, false, &t);
770 if (r >= 0) {
771 x = strjoina("OBJECT_CMDLINE=", t);
772 free(t);
773 IOVEC_SET_STRING(iovec[n++], x);
774 }
775
776 #ifdef HAVE_AUDIT
777 r = audit_session_from_pid(object_pid, &audit);
778 if (r >= 0) {
779 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
780 IOVEC_SET_STRING(iovec[n++], o_audit_session);
781 }
782
783 r = audit_loginuid_from_pid(object_pid, &loginuid);
784 if (r >= 0) {
785 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
786 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
787 }
788 #endif
789
790 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
791 if (r >= 0) {
792 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
793 IOVEC_SET_STRING(iovec[n++], x);
794
795 r = cg_path_get_session(c, &t);
796 if (r >= 0) {
797 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
798 free(t);
799 IOVEC_SET_STRING(iovec[n++], x);
800 }
801
802 if (cg_path_get_owner_uid(c, &owner) >= 0) {
803 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
804 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
805 }
806
807 if (cg_path_get_unit(c, &t) >= 0) {
808 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
809 free(t);
810 IOVEC_SET_STRING(iovec[n++], x);
811 }
812
813 if (cg_path_get_user_unit(c, &t) >= 0) {
814 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
815 free(t);
816 IOVEC_SET_STRING(iovec[n++], x);
817 }
818
819 free(c);
820 }
821 }
822 assert(n <= m);
823
824 if (tv) {
825 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=%llu", (unsigned long long) timeval_load(tv));
826 IOVEC_SET_STRING(iovec[n++], source_time);
827 }
828
829 /* Note that strictly speaking storing the boot id here is
830 * redundant since the entry includes this in-line
831 * anyway. However, we need this indexed, too. */
832 if (!isempty(s->boot_id_field))
833 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
834
835 if (!isempty(s->machine_id_field))
836 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
837
838 if (!isempty(s->hostname_field))
839 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
840
841 assert(n <= m);
842
843 if (s->split_mode == SPLIT_UID && realuid > 0)
844 /* Split up strictly by any UID */
845 journal_uid = realuid;
846 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
847 /* Split up by login UIDs. We do this only if the
848 * realuid is not root, in order not to accidentally
849 * leak privileged information to the user that is
850 * logged by a privileged process that is part of an
851 * unprivileged session. */
852 journal_uid = owner;
853 else
854 journal_uid = 0;
855
856 write_to_journal(s, journal_uid, iovec, n, priority);
857 }
858
859 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
860 char mid[11 + 32 + 1];
861 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
862 unsigned n = 0, m;
863 int r;
864 va_list ap;
865 struct ucred ucred = {};
866
867 assert(s);
868 assert(format);
869
870 assert_cc(3 == LOG_FAC(LOG_DAEMON));
871 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
872 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
873
874 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
875 assert_cc(6 == LOG_INFO);
876 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
877
878 if (!sd_id128_equal(message_id, SD_ID128_NULL)) {
879 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
880 IOVEC_SET_STRING(iovec[n++], mid);
881 }
882
883 m = n;
884
885 va_start(ap, format);
886 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
887 /* Error handling below */
888 va_end(ap);
889
890 ucred.pid = getpid();
891 ucred.uid = getuid();
892 ucred.gid = getgid();
893
894 if (r >= 0)
895 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
896
897 while (m < n)
898 free(iovec[m++].iov_base);
899
900 if (r < 0) {
901 /* We failed to format the message. Emit a warning instead. */
902 char buf[LINE_MAX];
903
904 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
905
906 n = 3;
907 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
908 IOVEC_SET_STRING(iovec[n++], buf);
909 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
910 }
911 }
912
913 void server_dispatch_message(
914 Server *s,
915 struct iovec *iovec, unsigned n, unsigned m,
916 const struct ucred *ucred,
917 const struct timeval *tv,
918 const char *label, size_t label_len,
919 const char *unit_id,
920 int priority,
921 pid_t object_pid) {
922
923 int rl, r;
924 _cleanup_free_ char *path = NULL;
925 uint64_t available = 0;
926 char *c;
927
928 assert(s);
929 assert(iovec || n == 0);
930
931 if (n == 0)
932 return;
933
934 if (LOG_PRI(priority) > s->max_level_store)
935 return;
936
937 /* Stop early in case the information will not be stored
938 * in a journal. */
939 if (s->storage == STORAGE_NONE)
940 return;
941
942 if (!ucred)
943 goto finish;
944
945 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
946 if (r < 0)
947 goto finish;
948
949 /* example: /user/lennart/3/foobar
950 * /system/dbus.service/foobar
951 *
952 * So let's cut of everything past the third /, since that is
953 * where user directories start */
954
955 c = strchr(path, '/');
956 if (c) {
957 c = strchr(c+1, '/');
958 if (c) {
959 c = strchr(c+1, '/');
960 if (c)
961 *c = 0;
962 }
963 }
964
965 (void) determine_space(s, false, false, &available, NULL);
966 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
967 if (rl == 0)
968 return;
969
970 /* Write a suppression message if we suppressed something */
971 if (rl > 1)
972 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
973 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
974 NULL);
975
976 finish:
977 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
978 }
979
980
981 static int system_journal_open(Server *s, bool flush_requested) {
982 const char *fn;
983 int r = 0;
984
985 if (!s->system_journal &&
986 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
987 (flush_requested
988 || access("/run/systemd/journal/flushed", F_OK) >= 0)) {
989
990 /* If in auto mode: first try to create the machine
991 * path, but not the prefix.
992 *
993 * If in persistent mode: create /var/log/journal and
994 * the machine path */
995
996 if (s->storage == STORAGE_PERSISTENT)
997 (void) mkdir_p("/var/log/journal/", 0755);
998
999 fn = strjoina("/var/log/journal/", SERVER_MACHINE_ID(s));
1000 (void) mkdir(fn, 0755);
1001
1002 fn = strjoina(fn, "/system.journal");
1003 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_metrics, NULL, &s->system_journal);
1004 if (r >= 0) {
1005 server_add_acls(s->system_journal, 0);
1006 (void) determine_space_for(s, &s->system_metrics, "/var/log/journal/", "System journal", true, true, NULL, NULL);
1007 } else if (r < 0) {
1008 if (r != -ENOENT && r != -EROFS)
1009 log_warning_errno(r, "Failed to open system journal: %m");
1010
1011 r = 0;
1012 }
1013 }
1014
1015 if (!s->runtime_journal &&
1016 (s->storage != STORAGE_NONE)) {
1017
1018 fn = strjoina("/run/log/journal/", SERVER_MACHINE_ID(s), "/system.journal");
1019
1020 if (s->system_journal) {
1021
1022 /* Try to open the runtime journal, but only
1023 * if it already exists, so that we can flush
1024 * it into the system journal */
1025
1026 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_metrics, NULL, &s->runtime_journal);
1027 if (r < 0) {
1028 if (r != -ENOENT)
1029 log_warning_errno(r, "Failed to open runtime journal: %m");
1030
1031 r = 0;
1032 }
1033
1034 } else {
1035
1036 /* OK, we really need the runtime journal, so create
1037 * it if necessary. */
1038
1039 (void) mkdir("/run/log", 0755);
1040 (void) mkdir("/run/log/journal", 0755);
1041 (void) mkdir_parents(fn, 0750);
1042
1043 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_metrics, NULL, &s->runtime_journal);
1044 if (r < 0)
1045 return log_error_errno(r, "Failed to open runtime journal: %m");
1046 }
1047
1048 if (s->runtime_journal) {
1049 server_add_acls(s->runtime_journal, 0);
1050 (void) determine_space_for(s, &s->runtime_metrics, "/run/log/journal/", "Runtime journal", true, true, NULL, NULL);
1051 }
1052 }
1053
1054 return r;
1055 }
1056
1057 int server_flush_to_var(Server *s) {
1058 sd_id128_t machine;
1059 sd_journal *j = NULL;
1060 char ts[FORMAT_TIMESPAN_MAX];
1061 usec_t start;
1062 unsigned n = 0;
1063 int r;
1064
1065 assert(s);
1066
1067 if (s->storage != STORAGE_AUTO &&
1068 s->storage != STORAGE_PERSISTENT)
1069 return 0;
1070
1071 if (!s->runtime_journal)
1072 return 0;
1073
1074 (void) system_journal_open(s, true);
1075
1076 if (!s->system_journal)
1077 return 0;
1078
1079 log_debug("Flushing to /var...");
1080
1081 start = now(CLOCK_MONOTONIC);
1082
1083 r = sd_id128_get_machine(&machine);
1084 if (r < 0)
1085 return r;
1086
1087 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1088 if (r < 0)
1089 return log_error_errno(r, "Failed to read runtime journal: %m");
1090
1091 sd_journal_set_data_threshold(j, 0);
1092
1093 SD_JOURNAL_FOREACH(j) {
1094 Object *o = NULL;
1095 JournalFile *f;
1096
1097 f = j->current_file;
1098 assert(f && f->current_offset > 0);
1099
1100 n++;
1101
1102 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1103 if (r < 0) {
1104 log_error_errno(r, "Can't read entry: %m");
1105 goto finish;
1106 }
1107
1108 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1109 if (r >= 0)
1110 continue;
1111
1112 if (!shall_try_append_again(s->system_journal, r)) {
1113 log_error_errno(r, "Can't write entry: %m");
1114 goto finish;
1115 }
1116
1117 server_rotate(s);
1118 server_vacuum(s, false, false);
1119
1120 if (!s->system_journal) {
1121 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1122 r = -EIO;
1123 goto finish;
1124 }
1125
1126 log_debug("Retrying write.");
1127 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1128 if (r < 0) {
1129 log_error_errno(r, "Can't write entry: %m");
1130 goto finish;
1131 }
1132 }
1133
1134 r = 0;
1135
1136 finish:
1137 journal_file_post_change(s->system_journal);
1138
1139 s->runtime_journal = journal_file_close(s->runtime_journal);
1140
1141 if (r >= 0)
1142 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1143
1144 sd_journal_close(j);
1145
1146 server_driver_message(s, SD_ID128_NULL,
1147 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1148 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1149 n),
1150 NULL);
1151
1152 return r;
1153 }
1154
1155 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1156 Server *s = userdata;
1157 struct ucred *ucred = NULL;
1158 struct timeval *tv = NULL;
1159 struct cmsghdr *cmsg;
1160 char *label = NULL;
1161 size_t label_len = 0, m;
1162 struct iovec iovec;
1163 ssize_t n;
1164 int *fds = NULL, v = 0;
1165 unsigned n_fds = 0;
1166
1167 union {
1168 struct cmsghdr cmsghdr;
1169
1170 /* We use NAME_MAX space for the SELinux label
1171 * here. The kernel currently enforces no
1172 * limit, but according to suggestions from
1173 * the SELinux people this will change and it
1174 * will probably be identical to NAME_MAX. For
1175 * now we use that, but this should be updated
1176 * one day when the final limit is known. */
1177 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1178 CMSG_SPACE(sizeof(struct timeval)) +
1179 CMSG_SPACE(sizeof(int)) + /* fd */
1180 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1181 } control = {};
1182
1183 union sockaddr_union sa = {};
1184
1185 struct msghdr msghdr = {
1186 .msg_iov = &iovec,
1187 .msg_iovlen = 1,
1188 .msg_control = &control,
1189 .msg_controllen = sizeof(control),
1190 .msg_name = &sa,
1191 .msg_namelen = sizeof(sa),
1192 };
1193
1194 assert(s);
1195 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1196
1197 if (revents != EPOLLIN) {
1198 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1199 return -EIO;
1200 }
1201
1202 /* Try to get the right size, if we can. (Not all
1203 * sockets support SIOCINQ, hence we just try, but
1204 * don't rely on it. */
1205 (void) ioctl(fd, SIOCINQ, &v);
1206
1207 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1208 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1209 (size_t) LINE_MAX,
1210 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1211
1212 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1213 return log_oom();
1214
1215 iovec.iov_base = s->buffer;
1216 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1217
1218 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1219 if (n < 0) {
1220 if (errno == EINTR || errno == EAGAIN)
1221 return 0;
1222
1223 return log_error_errno(errno, "recvmsg() failed: %m");
1224 }
1225
1226 CMSG_FOREACH(cmsg, &msghdr) {
1227
1228 if (cmsg->cmsg_level == SOL_SOCKET &&
1229 cmsg->cmsg_type == SCM_CREDENTIALS &&
1230 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1231 ucred = (struct ucred*) CMSG_DATA(cmsg);
1232 else if (cmsg->cmsg_level == SOL_SOCKET &&
1233 cmsg->cmsg_type == SCM_SECURITY) {
1234 label = (char*) CMSG_DATA(cmsg);
1235 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1236 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1237 cmsg->cmsg_type == SO_TIMESTAMP &&
1238 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1239 tv = (struct timeval*) CMSG_DATA(cmsg);
1240 else if (cmsg->cmsg_level == SOL_SOCKET &&
1241 cmsg->cmsg_type == SCM_RIGHTS) {
1242 fds = (int*) CMSG_DATA(cmsg);
1243 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1244 }
1245 }
1246
1247 /* And a trailing NUL, just in case */
1248 s->buffer[n] = 0;
1249
1250 if (fd == s->syslog_fd) {
1251 if (n > 0 && n_fds == 0)
1252 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1253 else if (n_fds > 0)
1254 log_warning("Got file descriptors via syslog socket. Ignoring.");
1255
1256 } else if (fd == s->native_fd) {
1257 if (n > 0 && n_fds == 0)
1258 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1259 else if (n == 0 && n_fds == 1)
1260 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1261 else if (n_fds > 0)
1262 log_warning("Got too many file descriptors via native socket. Ignoring.");
1263
1264 } else {
1265 assert(fd == s->audit_fd);
1266
1267 if (n > 0 && n_fds == 0)
1268 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1269 else if (n_fds > 0)
1270 log_warning("Got file descriptors via audit socket. Ignoring.");
1271 }
1272
1273 close_many(fds, n_fds);
1274 return 0;
1275 }
1276
1277 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1278 Server *s = userdata;
1279 int r;
1280
1281 assert(s);
1282
1283 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1284
1285 server_flush_to_var(s);
1286 server_sync(s);
1287 server_vacuum(s, false, false);
1288
1289 r = touch("/run/systemd/journal/flushed");
1290 if (r < 0)
1291 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1292
1293 return 0;
1294 }
1295
1296 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1297 Server *s = userdata;
1298 int r;
1299
1300 assert(s);
1301
1302 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1303 server_rotate(s);
1304 server_vacuum(s, true, true);
1305
1306 /* Let clients know when the most recent rotation happened. */
1307 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1308 if (r < 0)
1309 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1310
1311 return 0;
1312 }
1313
1314 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1315 Server *s = userdata;
1316
1317 assert(s);
1318
1319 log_received_signal(LOG_INFO, si);
1320
1321 sd_event_exit(s->event, 0);
1322 return 0;
1323 }
1324
1325 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1326 Server *s = userdata;
1327 int r;
1328
1329 assert(s);
1330
1331 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1332
1333 server_sync(s);
1334
1335 /* Let clients know when the most recent sync happened. */
1336 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1337 if (r < 0)
1338 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1339
1340 return 0;
1341 }
1342
1343 static int setup_signals(Server *s) {
1344 int r;
1345
1346 assert(s);
1347
1348 assert(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1349
1350 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1351 if (r < 0)
1352 return r;
1353
1354 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1355 if (r < 0)
1356 return r;
1357
1358 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1359 if (r < 0)
1360 return r;
1361
1362 /* Let's process SIGTERM late, so that we flush all queued
1363 * messages to disk before we exit */
1364 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1365 if (r < 0)
1366 return r;
1367
1368 /* When journald is invoked on the terminal (when debugging),
1369 * it's useful if C-c is handled equivalent to SIGTERM. */
1370 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1371 if (r < 0)
1372 return r;
1373
1374 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1375 if (r < 0)
1376 return r;
1377
1378 /* SIGRTMIN+1 causes an immediate sync. We process this very
1379 * late, so that everything else queued at this point is
1380 * really written to disk. Clients can watch
1381 * /run/systemd/journal/synced with inotify until its mtime
1382 * changes to see when a sync happened. */
1383 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1384 if (r < 0)
1385 return r;
1386
1387 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1388 if (r < 0)
1389 return r;
1390
1391 return 0;
1392 }
1393
1394 static int server_parse_proc_cmdline(Server *s) {
1395 _cleanup_free_ char *line = NULL;
1396 const char *p;
1397 int r;
1398
1399 r = proc_cmdline(&line);
1400 if (r < 0) {
1401 log_warning_errno(r, "Failed to read /proc/cmdline, ignoring: %m");
1402 return 0;
1403 }
1404
1405 p = line;
1406 for(;;) {
1407 _cleanup_free_ char *word = NULL;
1408
1409 r = extract_first_word(&p, &word, NULL, 0);
1410 if (r < 0)
1411 return log_error_errno(r, "Failed to parse journald syntax \"%s\": %m", line);
1412
1413 if (r == 0)
1414 break;
1415
1416 if (startswith(word, "systemd.journald.forward_to_syslog=")) {
1417 r = parse_boolean(word + 35);
1418 if (r < 0)
1419 log_warning("Failed to parse forward to syslog switch %s. Ignoring.", word + 35);
1420 else
1421 s->forward_to_syslog = r;
1422 } else if (startswith(word, "systemd.journald.forward_to_kmsg=")) {
1423 r = parse_boolean(word + 33);
1424 if (r < 0)
1425 log_warning("Failed to parse forward to kmsg switch %s. Ignoring.", word + 33);
1426 else
1427 s->forward_to_kmsg = r;
1428 } else if (startswith(word, "systemd.journald.forward_to_console=")) {
1429 r = parse_boolean(word + 36);
1430 if (r < 0)
1431 log_warning("Failed to parse forward to console switch %s. Ignoring.", word + 36);
1432 else
1433 s->forward_to_console = r;
1434 } else if (startswith(word, "systemd.journald.forward_to_wall=")) {
1435 r = parse_boolean(word + 33);
1436 if (r < 0)
1437 log_warning("Failed to parse forward to wall switch %s. Ignoring.", word + 33);
1438 else
1439 s->forward_to_wall = r;
1440 } else if (startswith(word, "systemd.journald"))
1441 log_warning("Invalid systemd.journald parameter. Ignoring.");
1442 }
1443
1444 /* do not warn about state here, since probably systemd already did */
1445 return 0;
1446 }
1447
1448 static int server_parse_config_file(Server *s) {
1449 assert(s);
1450
1451 return config_parse_many(PKGSYSCONFDIR "/journald.conf",
1452 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1453 "Journal\0",
1454 config_item_perf_lookup, journald_gperf_lookup,
1455 false, s);
1456 }
1457
1458 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1459 Server *s = userdata;
1460
1461 assert(s);
1462
1463 server_sync(s);
1464 return 0;
1465 }
1466
1467 int server_schedule_sync(Server *s, int priority) {
1468 int r;
1469
1470 assert(s);
1471
1472 if (priority <= LOG_CRIT) {
1473 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1474 server_sync(s);
1475 return 0;
1476 }
1477
1478 if (s->sync_scheduled)
1479 return 0;
1480
1481 if (s->sync_interval_usec > 0) {
1482 usec_t when;
1483
1484 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1485 if (r < 0)
1486 return r;
1487
1488 when += s->sync_interval_usec;
1489
1490 if (!s->sync_event_source) {
1491 r = sd_event_add_time(
1492 s->event,
1493 &s->sync_event_source,
1494 CLOCK_MONOTONIC,
1495 when, 0,
1496 server_dispatch_sync, s);
1497 if (r < 0)
1498 return r;
1499
1500 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1501 } else {
1502 r = sd_event_source_set_time(s->sync_event_source, when);
1503 if (r < 0)
1504 return r;
1505
1506 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1507 }
1508 if (r < 0)
1509 return r;
1510
1511 s->sync_scheduled = true;
1512 }
1513
1514 return 0;
1515 }
1516
1517 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1518 Server *s = userdata;
1519
1520 assert(s);
1521
1522 server_cache_hostname(s);
1523 return 0;
1524 }
1525
1526 static int server_open_hostname(Server *s) {
1527 int r;
1528
1529 assert(s);
1530
1531 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1532 if (s->hostname_fd < 0)
1533 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1534
1535 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1536 if (r < 0) {
1537 /* kernels prior to 3.2 don't support polling this file. Ignore
1538 * the failure. */
1539 if (r == -EPERM) {
1540 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1541 s->hostname_fd = safe_close(s->hostname_fd);
1542 return 0;
1543 }
1544
1545 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1546 }
1547
1548 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1549 if (r < 0)
1550 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1551
1552 return 0;
1553 }
1554
1555 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1556 Server *s = userdata;
1557 int r;
1558
1559 assert(s);
1560 assert(s->notify_event_source == es);
1561 assert(s->notify_fd == fd);
1562
1563 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1564 * message on it. Either it's the wtachdog event, the initial
1565 * READY=1 event or an stdout stream event. If there's nothing
1566 * to write anymore, turn our event source off. The next time
1567 * there's something to send it will be turned on again. */
1568
1569 if (!s->sent_notify_ready) {
1570 static const char p[] =
1571 "READY=1\n"
1572 "STATUS=Processing requests...";
1573 ssize_t l;
1574
1575 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1576 if (l < 0) {
1577 if (errno == EAGAIN)
1578 return 0;
1579
1580 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1581 }
1582
1583 s->sent_notify_ready = true;
1584 log_debug("Sent READY=1 notification.");
1585
1586 } else if (s->send_watchdog) {
1587
1588 static const char p[] =
1589 "WATCHDOG=1";
1590
1591 ssize_t l;
1592
1593 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1594 if (l < 0) {
1595 if (errno == EAGAIN)
1596 return 0;
1597
1598 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1599 }
1600
1601 s->send_watchdog = false;
1602 log_debug("Sent WATCHDOG=1 notification.");
1603
1604 } else if (s->stdout_streams_notify_queue)
1605 /* Dispatch one stream notification event */
1606 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1607
1608 /* Leave us enabled if there's still more to to do. */
1609 if (s->send_watchdog || s->stdout_streams_notify_queue)
1610 return 0;
1611
1612 /* There was nothing to do anymore, let's turn ourselves off. */
1613 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1614 if (r < 0)
1615 return log_error_errno(r, "Failed to turn off notify event source: %m");
1616
1617 return 0;
1618 }
1619
1620 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1621 Server *s = userdata;
1622 int r;
1623
1624 assert(s);
1625
1626 s->send_watchdog = true;
1627
1628 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1629 if (r < 0)
1630 log_warning_errno(r, "Failed to turn on notify event source: %m");
1631
1632 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1633 if (r < 0)
1634 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1635
1636 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1637 if (r < 0)
1638 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1639
1640 return 0;
1641 }
1642
1643 static int server_connect_notify(Server *s) {
1644 union sockaddr_union sa = {
1645 .un.sun_family = AF_UNIX,
1646 };
1647 const char *e;
1648 int r;
1649
1650 assert(s);
1651 assert(s->notify_fd < 0);
1652 assert(!s->notify_event_source);
1653
1654 /*
1655 So here's the problem: we'd like to send notification
1656 messages to PID 1, but we cannot do that via sd_notify(),
1657 since that's synchronous, and we might end up blocking on
1658 it. Specifically: given that PID 1 might block on
1659 dbus-daemon during IPC, and dbus-daemon is logging to us,
1660 and might hence block on us, we might end up in a deadlock
1661 if we block on sending PID 1 notification messages -- by
1662 generating a full blocking circle. To avoid this, let's
1663 create a non-blocking socket, and connect it to the
1664 notification socket, and then wait for POLLOUT before we
1665 send anything. This should efficiently avoid any deadlocks,
1666 as we'll never block on PID 1, hence PID 1 can safely block
1667 on dbus-daemon which can safely block on us again.
1668
1669 Don't think that this issue is real? It is, see:
1670 https://github.com/systemd/systemd/issues/1505
1671 */
1672
1673 e = getenv("NOTIFY_SOCKET");
1674 if (!e)
1675 return 0;
1676
1677 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1678 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1679 return -EINVAL;
1680 }
1681
1682 if (strlen(e) > sizeof(sa.un.sun_path)) {
1683 log_error("NOTIFY_SOCKET path too long: %s", e);
1684 return -EINVAL;
1685 }
1686
1687 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1688 if (s->notify_fd < 0)
1689 return log_error_errno(errno, "Failed to create notify socket: %m");
1690
1691 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1692
1693 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1694 if (sa.un.sun_path[0] == '@')
1695 sa.un.sun_path[0] = 0;
1696
1697 r = connect(s->notify_fd, &sa.sa, offsetof(struct sockaddr_un, sun_path) + strlen(e));
1698 if (r < 0)
1699 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1700
1701 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1702 if (r < 0)
1703 return log_error_errno(r, "Failed to watch notification socket: %m");
1704
1705 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1706 s->send_watchdog = true;
1707
1708 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1709 if (r < 0)
1710 return log_error_errno(r, "Failed to add watchdog time event: %m");
1711 }
1712
1713 /* This should fire pretty soon, which we'll use to send the
1714 * READY=1 event. */
1715
1716 return 0;
1717 }
1718
1719 int server_init(Server *s) {
1720 _cleanup_fdset_free_ FDSet *fds = NULL;
1721 int n, r, fd;
1722 bool no_sockets;
1723
1724 assert(s);
1725
1726 zero(*s);
1727 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1728 s->compress = true;
1729 s->seal = true;
1730
1731 s->watchdog_usec = USEC_INFINITY;
1732
1733 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1734 s->sync_scheduled = false;
1735
1736 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1737 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1738
1739 s->forward_to_wall = true;
1740
1741 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1742
1743 s->max_level_store = LOG_DEBUG;
1744 s->max_level_syslog = LOG_DEBUG;
1745 s->max_level_kmsg = LOG_NOTICE;
1746 s->max_level_console = LOG_INFO;
1747 s->max_level_wall = LOG_EMERG;
1748
1749 journal_reset_metrics(&s->system_metrics);
1750 journal_reset_metrics(&s->runtime_metrics);
1751
1752 server_parse_config_file(s);
1753 server_parse_proc_cmdline(s);
1754
1755 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1756 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1757 s->rate_limit_interval, s->rate_limit_burst);
1758 s->rate_limit_interval = s->rate_limit_burst = 0;
1759 }
1760
1761 (void) mkdir_p("/run/systemd/journal", 0755);
1762
1763 s->user_journals = ordered_hashmap_new(NULL);
1764 if (!s->user_journals)
1765 return log_oom();
1766
1767 s->mmap = mmap_cache_new();
1768 if (!s->mmap)
1769 return log_oom();
1770
1771 r = sd_event_default(&s->event);
1772 if (r < 0)
1773 return log_error_errno(r, "Failed to create event loop: %m");
1774
1775 n = sd_listen_fds(true);
1776 if (n < 0)
1777 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1778
1779 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1780
1781 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1782
1783 if (s->native_fd >= 0) {
1784 log_error("Too many native sockets passed.");
1785 return -EINVAL;
1786 }
1787
1788 s->native_fd = fd;
1789
1790 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1791
1792 if (s->stdout_fd >= 0) {
1793 log_error("Too many stdout sockets passed.");
1794 return -EINVAL;
1795 }
1796
1797 s->stdout_fd = fd;
1798
1799 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1800 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1801
1802 if (s->syslog_fd >= 0) {
1803 log_error("Too many /dev/log sockets passed.");
1804 return -EINVAL;
1805 }
1806
1807 s->syslog_fd = fd;
1808
1809 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1810
1811 if (s->audit_fd >= 0) {
1812 log_error("Too many audit sockets passed.");
1813 return -EINVAL;
1814 }
1815
1816 s->audit_fd = fd;
1817
1818 } else {
1819
1820 if (!fds) {
1821 fds = fdset_new();
1822 if (!fds)
1823 return log_oom();
1824 }
1825
1826 r = fdset_put(fds, fd);
1827 if (r < 0)
1828 return log_oom();
1829 }
1830 }
1831
1832 /* Try to restore streams, but don't bother if this fails */
1833 (void) server_restore_streams(s, fds);
1834
1835 if (fdset_size(fds) > 0) {
1836 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1837 fds = fdset_free(fds);
1838 }
1839
1840 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1841
1842 /* always open stdout, syslog, native, and kmsg sockets */
1843
1844 /* systemd-journald.socket: /run/systemd/journal/stdout */
1845 r = server_open_stdout_socket(s);
1846 if (r < 0)
1847 return r;
1848
1849 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1850 r = server_open_syslog_socket(s);
1851 if (r < 0)
1852 return r;
1853
1854 /* systemd-journald.socket: /run/systemd/journal/socket */
1855 r = server_open_native_socket(s);
1856 if (r < 0)
1857 return r;
1858
1859 /* /dev/ksmg */
1860 r = server_open_dev_kmsg(s);
1861 if (r < 0)
1862 return r;
1863
1864 /* Unless we got *some* sockets and not audit, open audit socket */
1865 if (s->audit_fd >= 0 || no_sockets) {
1866 r = server_open_audit(s);
1867 if (r < 0)
1868 return r;
1869 }
1870
1871 r = server_open_kernel_seqnum(s);
1872 if (r < 0)
1873 return r;
1874
1875 r = server_open_hostname(s);
1876 if (r < 0)
1877 return r;
1878
1879 r = setup_signals(s);
1880 if (r < 0)
1881 return r;
1882
1883 s->udev = udev_new();
1884 if (!s->udev)
1885 return -ENOMEM;
1886
1887 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
1888 if (!s->rate_limit)
1889 return -ENOMEM;
1890
1891 r = cg_get_root_path(&s->cgroup_root);
1892 if (r < 0)
1893 return r;
1894
1895 server_cache_hostname(s);
1896 server_cache_boot_id(s);
1897 server_cache_machine_id(s);
1898
1899 (void) server_connect_notify(s);
1900
1901 return system_journal_open(s, false);
1902 }
1903
1904 void server_maybe_append_tags(Server *s) {
1905 #ifdef HAVE_GCRYPT
1906 JournalFile *f;
1907 Iterator i;
1908 usec_t n;
1909
1910 n = now(CLOCK_REALTIME);
1911
1912 if (s->system_journal)
1913 journal_file_maybe_append_tag(s->system_journal, n);
1914
1915 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
1916 journal_file_maybe_append_tag(f, n);
1917 #endif
1918 }
1919
1920 void server_done(Server *s) {
1921 JournalFile *f;
1922 assert(s);
1923
1924 while (s->stdout_streams)
1925 stdout_stream_free(s->stdout_streams);
1926
1927 if (s->system_journal)
1928 journal_file_close(s->system_journal);
1929
1930 if (s->runtime_journal)
1931 journal_file_close(s->runtime_journal);
1932
1933 while ((f = ordered_hashmap_steal_first(s->user_journals)))
1934 journal_file_close(f);
1935
1936 ordered_hashmap_free(s->user_journals);
1937
1938 sd_event_source_unref(s->syslog_event_source);
1939 sd_event_source_unref(s->native_event_source);
1940 sd_event_source_unref(s->stdout_event_source);
1941 sd_event_source_unref(s->dev_kmsg_event_source);
1942 sd_event_source_unref(s->audit_event_source);
1943 sd_event_source_unref(s->sync_event_source);
1944 sd_event_source_unref(s->sigusr1_event_source);
1945 sd_event_source_unref(s->sigusr2_event_source);
1946 sd_event_source_unref(s->sigterm_event_source);
1947 sd_event_source_unref(s->sigint_event_source);
1948 sd_event_source_unref(s->sigrtmin1_event_source);
1949 sd_event_source_unref(s->hostname_event_source);
1950 sd_event_source_unref(s->notify_event_source);
1951 sd_event_source_unref(s->watchdog_event_source);
1952 sd_event_unref(s->event);
1953
1954 safe_close(s->syslog_fd);
1955 safe_close(s->native_fd);
1956 safe_close(s->stdout_fd);
1957 safe_close(s->dev_kmsg_fd);
1958 safe_close(s->audit_fd);
1959 safe_close(s->hostname_fd);
1960 safe_close(s->notify_fd);
1961
1962 if (s->rate_limit)
1963 journal_rate_limit_free(s->rate_limit);
1964
1965 if (s->kernel_seqnum)
1966 munmap(s->kernel_seqnum, sizeof(uint64_t));
1967
1968 free(s->buffer);
1969 free(s->tty_path);
1970 free(s->cgroup_root);
1971 free(s->hostname_field);
1972
1973 if (s->mmap)
1974 mmap_cache_unref(s->mmap);
1975
1976 udev_unref(s->udev);
1977 }
1978
1979 static const char* const storage_table[_STORAGE_MAX] = {
1980 [STORAGE_AUTO] = "auto",
1981 [STORAGE_VOLATILE] = "volatile",
1982 [STORAGE_PERSISTENT] = "persistent",
1983 [STORAGE_NONE] = "none"
1984 };
1985
1986 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
1987 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
1988
1989 static const char* const split_mode_table[_SPLIT_MAX] = {
1990 [SPLIT_LOGIN] = "login",
1991 [SPLIT_UID] = "uid",
1992 [SPLIT_NONE] = "none",
1993 };
1994
1995 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
1996 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");