]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
Merge pull request #4736 from dobyrch/calendar-cleanup
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #ifdef HAVE_SELINUX
21 #include <selinux/selinux.h>
22 #endif
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
28
29 #include "libudev.h"
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33
34 #include "acl-util.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "hashmap.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "log.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
94
95 assert(ret_used);
96 assert(ret_free);
97
98 d = opendir(path);
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
101 errno, "Failed to open %s: %m", path);
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127 }
128
129 static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131 }
132
133 static int cache_space_refresh(Server *s, JournalStorage *storage) {
134 JournalStorageSpace *space;
135 JournalMetrics *metrics;
136 uint64_t vfs_used, vfs_avail, avail;
137 usec_t ts;
138 int r;
139
140 assert(s);
141
142 metrics = &storage->metrics;
143 space = &storage->space;
144
145 ts = now(CLOCK_MONOTONIC);
146
147 if (space->timestamp + RECHECK_SPACE_USEC > ts)
148 return 0;
149
150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
151 if (r < 0)
152 return r;
153
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
162 return 1;
163 }
164
165 static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175 }
176
177
178 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
179 JournalStorage *js;
180 int r;
181
182 assert(s);
183
184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
194 }
195
196 void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
206 if (cache_space_refresh(s, storage) < 0)
207 return;
208
209 metrics = &storage->metrics;
210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
217 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235 }
236
237 static void server_add_acls(JournalFile *f, uid_t uid) {
238 #ifdef HAVE_ACL
239 int r;
240 #endif
241 assert(f);
242
243 #ifdef HAVE_ACL
244 if (uid <= SYSTEM_UID_MAX)
245 return;
246
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
250 #endif
251 }
252
253 static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
260 JournalFile **ret) {
261 int r;
262 JournalFile *f;
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
270 else
271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 bool flushed = false;
291 const char *fn;
292 int r = 0;
293
294 if (!s->system_journal &&
295 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
296 (flush_requested || (flushed = flushed_flag_is_set()))) {
297
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
300 *
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
303
304 if (s->storage == STORAGE_PERSISTENT)
305 (void) mkdir_p("/var/log/journal/", 0755);
306
307 (void) mkdir(s->system_storage.path, 0755);
308
309 fn = strjoina(s->system_storage.path, "/system.journal");
310 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
311 if (r >= 0) {
312 server_add_acls(s->system_journal, 0);
313 (void) cache_space_refresh(s, &s->system_storage);
314 patch_min_use(&s->system_storage);
315 } else if (r < 0) {
316 if (r != -ENOENT && r != -EROFS)
317 log_warning_errno(r, "Failed to open system journal: %m");
318
319 r = 0;
320 }
321
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
325 *
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
328 */
329 if (s->runtime_journal && flushed)
330 (void) server_flush_to_var(s);
331 }
332
333 if (!s->runtime_journal &&
334 (s->storage != STORAGE_NONE)) {
335
336 fn = strjoina(s->runtime_storage.path, "/system.journal");
337
338 if (s->system_journal) {
339
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
343
344 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
345 if (r < 0) {
346 if (r != -ENOENT)
347 log_warning_errno(r, "Failed to open runtime journal: %m");
348
349 r = 0;
350 }
351
352 } else {
353
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
356
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn, 0750);
360
361 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
362 if (r < 0)
363 return log_error_errno(r, "Failed to open runtime journal: %m");
364 }
365
366 if (s->runtime_journal) {
367 server_add_acls(s->runtime_journal, 0);
368 (void) cache_space_refresh(s, &s->runtime_storage);
369 patch_min_use(&s->runtime_storage);
370 }
371 }
372
373 return r;
374 }
375
376 static JournalFile* find_journal(Server *s, uid_t uid) {
377 _cleanup_free_ char *p = NULL;
378 int r;
379 JournalFile *f;
380 sd_id128_t machine;
381
382 assert(s);
383
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
387 *
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
391 *
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s, false);
394
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
399
400 if (s->runtime_journal)
401 return s->runtime_journal;
402
403 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
404 return s->system_journal;
405
406 r = sd_id128_get_machine(&machine);
407 if (r < 0)
408 return s->system_journal;
409
410 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
411 if (f)
412 return f;
413
414 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
415 SD_ID128_FORMAT_VAL(machine), uid) < 0)
416 return s->system_journal;
417
418 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
419 /* Too many open? Then let's close one */
420 f = ordered_hashmap_steal_first(s->user_journals);
421 assert(f);
422 (void) journal_file_close(f);
423 }
424
425 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
426 if (r < 0)
427 return s->system_journal;
428
429 server_add_acls(f, uid);
430
431 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
432 if (r < 0) {
433 (void) journal_file_close(f);
434 return s->system_journal;
435 }
436
437 return f;
438 }
439
440 static int do_rotate(
441 Server *s,
442 JournalFile **f,
443 const char* name,
444 bool seal,
445 uint32_t uid) {
446
447 int r;
448 assert(s);
449
450 if (!*f)
451 return -EINVAL;
452
453 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
454 if (r < 0)
455 if (*f)
456 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
457 else
458 log_error_errno(r, "Failed to create new %s journal: %m", name);
459 else
460 server_add_acls(*f, uid);
461
462 return r;
463 }
464
465 void server_rotate(Server *s) {
466 JournalFile *f;
467 void *k;
468 Iterator i;
469 int r;
470
471 log_debug("Rotating...");
472
473 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
474 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
475
476 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
477 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
478 if (r >= 0)
479 ordered_hashmap_replace(s->user_journals, k, f);
480 else if (!f)
481 /* Old file has been closed and deallocated */
482 ordered_hashmap_remove(s->user_journals, k);
483 }
484
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f, s->deferred_closes, i)
487 if (!journal_file_is_offlining(f)) {
488 (void) set_remove(s->deferred_closes, f);
489 (void) journal_file_close(f);
490 }
491 }
492
493 void server_sync(Server *s) {
494 JournalFile *f;
495 Iterator i;
496 int r;
497
498 if (s->system_journal) {
499 r = journal_file_set_offline(s->system_journal, false);
500 if (r < 0)
501 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
502 }
503
504 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
505 r = journal_file_set_offline(f, false);
506 if (r < 0)
507 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
508 }
509
510 if (s->sync_event_source) {
511 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
512 if (r < 0)
513 log_error_errno(r, "Failed to disable sync timer source: %m");
514 }
515
516 s->sync_scheduled = false;
517 }
518
519 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
520
521 int r;
522
523 assert(s);
524 assert(storage);
525
526 (void) cache_space_refresh(s, storage);
527
528 if (verbose)
529 server_space_usage_message(s, storage);
530
531 r = journal_directory_vacuum(storage->path, storage->space.limit,
532 storage->metrics.n_max_files, s->max_retention_usec,
533 &s->oldest_file_usec, verbose);
534 if (r < 0 && r != -ENOENT)
535 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
536
537 cache_space_invalidate(&storage->space);
538 }
539
540 int server_vacuum(Server *s, bool verbose) {
541 assert(s);
542
543 log_debug("Vacuuming...");
544
545 s->oldest_file_usec = 0;
546
547 if (s->system_journal)
548 do_vacuum(s, &s->system_storage, verbose);
549 if (s->runtime_journal)
550 do_vacuum(s, &s->runtime_storage, verbose);
551
552 return 0;
553 }
554
555 static void server_cache_machine_id(Server *s) {
556 sd_id128_t id;
557 int r;
558
559 assert(s);
560
561 r = sd_id128_get_machine(&id);
562 if (r < 0)
563 return;
564
565 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
566 }
567
568 static void server_cache_boot_id(Server *s) {
569 sd_id128_t id;
570 int r;
571
572 assert(s);
573
574 r = sd_id128_get_boot(&id);
575 if (r < 0)
576 return;
577
578 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
579 }
580
581 static void server_cache_hostname(Server *s) {
582 _cleanup_free_ char *t = NULL;
583 char *x;
584
585 assert(s);
586
587 t = gethostname_malloc();
588 if (!t)
589 return;
590
591 x = strappend("_HOSTNAME=", t);
592 if (!x)
593 return;
594
595 free(s->hostname_field);
596 s->hostname_field = x;
597 }
598
599 static bool shall_try_append_again(JournalFile *f, int r) {
600 switch(r) {
601
602 case -E2BIG: /* Hit configured limit */
603 case -EFBIG: /* Hit fs limit */
604 case -EDQUOT: /* Quota limit hit */
605 case -ENOSPC: /* Disk full */
606 log_debug("%s: Allocation limit reached, rotating.", f->path);
607 return true;
608
609 case -EIO: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f->path);
611 return true;
612
613 case -EHOSTDOWN: /* Other machine */
614 log_info("%s: Journal file from other machine, rotating.", f->path);
615 return true;
616
617 case -EBUSY: /* Unclean shutdown */
618 log_info("%s: Unclean shutdown, rotating.", f->path);
619 return true;
620
621 case -EPROTONOSUPPORT: /* Unsupported feature */
622 log_info("%s: Unsupported feature, rotating.", f->path);
623 return true;
624
625 case -EBADMSG: /* Corrupted */
626 case -ENODATA: /* Truncated */
627 case -ESHUTDOWN: /* Already archived */
628 log_warning("%s: Journal file corrupted, rotating.", f->path);
629 return true;
630
631 case -EIDRM: /* Journal file has been deleted */
632 log_warning("%s: Journal file has been deleted, rotating.", f->path);
633 return true;
634
635 case -ETXTBSY: /* Journal file is from the future */
636 log_warning("%s: Journal file is from the future, rotating.", f->path);
637 return true;
638
639 default:
640 return false;
641 }
642 }
643
644 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
645 bool vacuumed = false, rotate = false;
646 struct dual_timestamp ts;
647 JournalFile *f;
648 int r;
649
650 assert(s);
651 assert(iovec);
652 assert(n > 0);
653
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
658 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
659
660 if (ts.realtime < s->last_realtime_clock) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
665
666 log_debug("Time jumped backwards, rotating.");
667 rotate = true;
668 } else {
669
670 f = find_journal(s, uid);
671 if (!f)
672 return;
673
674 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
676 rotate = true;
677 }
678 }
679
680 if (rotate) {
681 server_rotate(s);
682 server_vacuum(s, false);
683 vacuumed = true;
684
685 f = find_journal(s, uid);
686 if (!f)
687 return;
688 }
689
690 s->last_realtime_clock = ts.realtime;
691
692 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
693 if (r >= 0) {
694 server_schedule_sync(s, priority);
695 return;
696 }
697
698 if (vacuumed || !shall_try_append_again(f, r)) {
699 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
700 return;
701 }
702
703 server_rotate(s);
704 server_vacuum(s, false);
705
706 f = find_journal(s, uid);
707 if (!f)
708 return;
709
710 log_debug("Retrying write.");
711 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
712 if (r < 0)
713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
714 else
715 server_schedule_sync(s, priority);
716 }
717
718 static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
719 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
720 char *copy, ids[SD_ID128_STRING_MAX];
721 int r;
722
723 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
724 * on the cgroup path. */
725
726 r = cg_slice_to_path(slice, &slice_path);
727 if (r < 0)
728 return r;
729
730 escaped = cg_escape(unit);
731 if (!escaped)
732 return -ENOMEM;
733
734 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
735 if (!p)
736 return -ENOMEM;
737
738 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
739 if (r < 0)
740 return r;
741 if (r != 32)
742 return -EINVAL;
743 ids[32] = 0;
744
745 if (!id128_is_valid(ids))
746 return -EINVAL;
747
748 copy = strdup(ids);
749 if (!copy)
750 return -ENOMEM;
751
752 *ret = copy;
753 return 0;
754 }
755
756 static void dispatch_message_real(
757 Server *s,
758 struct iovec *iovec, unsigned n, unsigned m,
759 const struct ucred *ucred,
760 const struct timeval *tv,
761 const char *label, size_t label_len,
762 const char *unit_id,
763 int priority,
764 pid_t object_pid) {
765
766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
776 char *x;
777 int r;
778 char *t, *c;
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
781 #ifdef HAVE_AUDIT
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
786
787 uint32_t audit;
788 uid_t loginuid;
789 #endif
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
795
796 if (ucred) {
797 realuid = ucred->uid;
798
799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
800 IOVEC_SET_STRING(iovec[n++], pid);
801
802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
803 IOVEC_SET_STRING(iovec[n++], uid);
804
805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
806 IOVEC_SET_STRING(iovec[n++], gid);
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
810 x = strjoina("_COMM=", t);
811 free(t);
812 IOVEC_SET_STRING(iovec[n++], x);
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
817 x = strjoina("_EXE=", t);
818 free(t);
819 IOVEC_SET_STRING(iovec[n++], x);
820 }
821
822 r = get_process_cmdline(ucred->pid, 0, false, &t);
823 if (r >= 0) {
824 x = strjoina("_CMDLINE=", t);
825 free(t);
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
831 x = strjoina("_CAP_EFFECTIVE=", t);
832 free(t);
833 IOVEC_SET_STRING(iovec[n++], x);
834 }
835
836 #ifdef HAVE_AUDIT
837 r = audit_session_from_pid(ucred->pid, &audit);
838 if (r >= 0) {
839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
844 if (r >= 0) {
845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
847 }
848 #endif
849
850 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
851 if (r >= 0) {
852 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
853 char *session = NULL;
854
855 x = strjoina("_SYSTEMD_CGROUP=", c);
856 IOVEC_SET_STRING(iovec[n++], x);
857
858 r = cg_path_get_session(c, &t);
859 if (r >= 0) {
860 session = strjoina("_SYSTEMD_SESSION=", t);
861 free(t);
862 IOVEC_SET_STRING(iovec[n++], session);
863 }
864
865 if (cg_path_get_owner_uid(c, &owner) >= 0) {
866 owner_valid = true;
867
868 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
869 IOVEC_SET_STRING(iovec[n++], owner_uid);
870 }
871
872 if (cg_path_get_unit(c, &raw_unit) >= 0) {
873 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
874 IOVEC_SET_STRING(iovec[n++], x);
875 } else if (unit_id && !session) {
876 x = strjoina("_SYSTEMD_UNIT=", unit_id);
877 IOVEC_SET_STRING(iovec[n++], x);
878 }
879
880 if (cg_path_get_user_unit(c, &t) >= 0) {
881 x = strjoina("_SYSTEMD_USER_UNIT=", t);
882 free(t);
883 IOVEC_SET_STRING(iovec[n++], x);
884 } else if (unit_id && session) {
885 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
886 IOVEC_SET_STRING(iovec[n++], x);
887 }
888
889 if (cg_path_get_slice(c, &raw_slice) >= 0) {
890 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
893
894 if (cg_path_get_user_slice(c, &t) >= 0) {
895 x = strjoina("_SYSTEMD_USER_SLICE=", t);
896 free(t);
897 IOVEC_SET_STRING(iovec[n++], x);
898 }
899
900 if (raw_slice && raw_unit) {
901 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
902 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
903 free(t);
904 IOVEC_SET_STRING(iovec[n++], x);
905 }
906 }
907
908 free(c);
909 } else if (unit_id) {
910 x = strjoina("_SYSTEMD_UNIT=", unit_id);
911 IOVEC_SET_STRING(iovec[n++], x);
912 }
913
914 #ifdef HAVE_SELINUX
915 if (mac_selinux_have()) {
916 if (label) {
917 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
918
919 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
920 IOVEC_SET_STRING(iovec[n++], x);
921 } else {
922 char *con;
923
924 if (getpidcon(ucred->pid, &con) >= 0) {
925 x = strjoina("_SELINUX_CONTEXT=", con);
926
927 freecon(con);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
930 }
931 }
932 #endif
933 }
934 assert(n <= m);
935
936 if (object_pid) {
937 r = get_process_uid(object_pid, &object_uid);
938 if (r >= 0) {
939 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
940 IOVEC_SET_STRING(iovec[n++], o_uid);
941 }
942
943 r = get_process_gid(object_pid, &object_gid);
944 if (r >= 0) {
945 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
946 IOVEC_SET_STRING(iovec[n++], o_gid);
947 }
948
949 r = get_process_comm(object_pid, &t);
950 if (r >= 0) {
951 x = strjoina("OBJECT_COMM=", t);
952 free(t);
953 IOVEC_SET_STRING(iovec[n++], x);
954 }
955
956 r = get_process_exe(object_pid, &t);
957 if (r >= 0) {
958 x = strjoina("OBJECT_EXE=", t);
959 free(t);
960 IOVEC_SET_STRING(iovec[n++], x);
961 }
962
963 r = get_process_cmdline(object_pid, 0, false, &t);
964 if (r >= 0) {
965 x = strjoina("OBJECT_CMDLINE=", t);
966 free(t);
967 IOVEC_SET_STRING(iovec[n++], x);
968 }
969
970 #ifdef HAVE_AUDIT
971 r = audit_session_from_pid(object_pid, &audit);
972 if (r >= 0) {
973 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
974 IOVEC_SET_STRING(iovec[n++], o_audit_session);
975 }
976
977 r = audit_loginuid_from_pid(object_pid, &loginuid);
978 if (r >= 0) {
979 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
980 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
981 }
982 #endif
983
984 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
985 if (r >= 0) {
986 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
987 IOVEC_SET_STRING(iovec[n++], x);
988
989 r = cg_path_get_session(c, &t);
990 if (r >= 0) {
991 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
992 free(t);
993 IOVEC_SET_STRING(iovec[n++], x);
994 }
995
996 if (cg_path_get_owner_uid(c, &owner) >= 0) {
997 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
998 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
999 }
1000
1001 if (cg_path_get_unit(c, &t) >= 0) {
1002 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
1003 free(t);
1004 IOVEC_SET_STRING(iovec[n++], x);
1005 }
1006
1007 if (cg_path_get_user_unit(c, &t) >= 0) {
1008 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
1009 free(t);
1010 IOVEC_SET_STRING(iovec[n++], x);
1011 }
1012
1013 if (cg_path_get_slice(c, &t) >= 0) {
1014 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1015 free(t);
1016 IOVEC_SET_STRING(iovec[n++], x);
1017 }
1018
1019 if (cg_path_get_user_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
1025 free(c);
1026 }
1027 }
1028 assert(n <= m);
1029
1030 if (tv) {
1031 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
1032 IOVEC_SET_STRING(iovec[n++], source_time);
1033 }
1034
1035 /* Note that strictly speaking storing the boot id here is
1036 * redundant since the entry includes this in-line
1037 * anyway. However, we need this indexed, too. */
1038 if (!isempty(s->boot_id_field))
1039 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
1040
1041 if (!isempty(s->machine_id_field))
1042 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
1043
1044 if (!isempty(s->hostname_field))
1045 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
1046
1047 assert(n <= m);
1048
1049 if (s->split_mode == SPLIT_UID && realuid > 0)
1050 /* Split up strictly by any UID */
1051 journal_uid = realuid;
1052 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
1053 /* Split up by login UIDs. We do this only if the
1054 * realuid is not root, in order not to accidentally
1055 * leak privileged information to the user that is
1056 * logged by a privileged process that is part of an
1057 * unprivileged session. */
1058 journal_uid = owner;
1059 else
1060 journal_uid = 0;
1061
1062 write_to_journal(s, journal_uid, iovec, n, priority);
1063 }
1064
1065 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1066 char mid[11 + 32 + 1];
1067 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1068 unsigned n = 0, m;
1069 int r;
1070 va_list ap;
1071 struct ucred ucred = {};
1072
1073 assert(s);
1074 assert(format);
1075
1076 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1077 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1078 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1079
1080 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
1081 assert_cc(6 == LOG_INFO);
1082 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
1083
1084 if (!sd_id128_is_null(message_id)) {
1085 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
1086 IOVEC_SET_STRING(iovec[n++], mid);
1087 }
1088
1089 m = n;
1090
1091 va_start(ap, format);
1092 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1093 /* Error handling below */
1094 va_end(ap);
1095
1096 ucred.pid = getpid();
1097 ucred.uid = getuid();
1098 ucred.gid = getgid();
1099
1100 if (r >= 0)
1101 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1102
1103 while (m < n)
1104 free(iovec[m++].iov_base);
1105
1106 if (r < 0) {
1107 /* We failed to format the message. Emit a warning instead. */
1108 char buf[LINE_MAX];
1109
1110 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1111
1112 n = 3;
1113 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1114 IOVEC_SET_STRING(iovec[n++], buf);
1115 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1116 }
1117 }
1118
1119 void server_dispatch_message(
1120 Server *s,
1121 struct iovec *iovec, unsigned n, unsigned m,
1122 const struct ucred *ucred,
1123 const struct timeval *tv,
1124 const char *label, size_t label_len,
1125 const char *unit_id,
1126 int priority,
1127 pid_t object_pid) {
1128
1129 int rl, r;
1130 _cleanup_free_ char *path = NULL;
1131 uint64_t available = 0;
1132 char *c;
1133
1134 assert(s);
1135 assert(iovec || n == 0);
1136
1137 if (n == 0)
1138 return;
1139
1140 if (LOG_PRI(priority) > s->max_level_store)
1141 return;
1142
1143 /* Stop early in case the information will not be stored
1144 * in a journal. */
1145 if (s->storage == STORAGE_NONE)
1146 return;
1147
1148 if (!ucred)
1149 goto finish;
1150
1151 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
1152 if (r < 0)
1153 goto finish;
1154
1155 /* example: /user/lennart/3/foobar
1156 * /system/dbus.service/foobar
1157 *
1158 * So let's cut of everything past the third /, since that is
1159 * where user directories start */
1160
1161 c = strchr(path, '/');
1162 if (c) {
1163 c = strchr(c+1, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c)
1167 *c = 0;
1168 }
1169 }
1170
1171 (void) determine_space(s, &available, NULL);
1172 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
1173 if (rl == 0)
1174 return;
1175
1176 /* Write a suppression message if we suppressed something */
1177 if (rl > 1)
1178 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
1179 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1180 NULL);
1181
1182 finish:
1183 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
1184 }
1185
1186 int server_flush_to_var(Server *s) {
1187 sd_id128_t machine;
1188 sd_journal *j = NULL;
1189 char ts[FORMAT_TIMESPAN_MAX];
1190 usec_t start;
1191 unsigned n = 0;
1192 int r;
1193
1194 assert(s);
1195
1196 if (s->storage != STORAGE_AUTO &&
1197 s->storage != STORAGE_PERSISTENT)
1198 return 0;
1199
1200 if (!s->runtime_journal)
1201 return 0;
1202
1203 (void) system_journal_open(s, true);
1204
1205 if (!s->system_journal)
1206 return 0;
1207
1208 log_debug("Flushing to /var...");
1209
1210 start = now(CLOCK_MONOTONIC);
1211
1212 r = sd_id128_get_machine(&machine);
1213 if (r < 0)
1214 return r;
1215
1216 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to read runtime journal: %m");
1219
1220 sd_journal_set_data_threshold(j, 0);
1221
1222 SD_JOURNAL_FOREACH(j) {
1223 Object *o = NULL;
1224 JournalFile *f;
1225
1226 f = j->current_file;
1227 assert(f && f->current_offset > 0);
1228
1229 n++;
1230
1231 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1232 if (r < 0) {
1233 log_error_errno(r, "Can't read entry: %m");
1234 goto finish;
1235 }
1236
1237 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1238 if (r >= 0)
1239 continue;
1240
1241 if (!shall_try_append_again(s->system_journal, r)) {
1242 log_error_errno(r, "Can't write entry: %m");
1243 goto finish;
1244 }
1245
1246 server_rotate(s);
1247 server_vacuum(s, false);
1248
1249 if (!s->system_journal) {
1250 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1251 r = -EIO;
1252 goto finish;
1253 }
1254
1255 log_debug("Retrying write.");
1256 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1257 if (r < 0) {
1258 log_error_errno(r, "Can't write entry: %m");
1259 goto finish;
1260 }
1261 }
1262
1263 r = 0;
1264
1265 finish:
1266 journal_file_post_change(s->system_journal);
1267
1268 s->runtime_journal = journal_file_close(s->runtime_journal);
1269
1270 if (r >= 0)
1271 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1272
1273 sd_journal_close(j);
1274
1275 server_driver_message(s, SD_ID128_NULL,
1276 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1277 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1278 n),
1279 NULL);
1280
1281 return r;
1282 }
1283
1284 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1285 Server *s = userdata;
1286 struct ucred *ucred = NULL;
1287 struct timeval *tv = NULL;
1288 struct cmsghdr *cmsg;
1289 char *label = NULL;
1290 size_t label_len = 0, m;
1291 struct iovec iovec;
1292 ssize_t n;
1293 int *fds = NULL, v = 0;
1294 unsigned n_fds = 0;
1295
1296 union {
1297 struct cmsghdr cmsghdr;
1298
1299 /* We use NAME_MAX space for the SELinux label
1300 * here. The kernel currently enforces no
1301 * limit, but according to suggestions from
1302 * the SELinux people this will change and it
1303 * will probably be identical to NAME_MAX. For
1304 * now we use that, but this should be updated
1305 * one day when the final limit is known. */
1306 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1307 CMSG_SPACE(sizeof(struct timeval)) +
1308 CMSG_SPACE(sizeof(int)) + /* fd */
1309 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1310 } control = {};
1311
1312 union sockaddr_union sa = {};
1313
1314 struct msghdr msghdr = {
1315 .msg_iov = &iovec,
1316 .msg_iovlen = 1,
1317 .msg_control = &control,
1318 .msg_controllen = sizeof(control),
1319 .msg_name = &sa,
1320 .msg_namelen = sizeof(sa),
1321 };
1322
1323 assert(s);
1324 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1325
1326 if (revents != EPOLLIN) {
1327 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1328 return -EIO;
1329 }
1330
1331 /* Try to get the right size, if we can. (Not all
1332 * sockets support SIOCINQ, hence we just try, but
1333 * don't rely on it. */
1334 (void) ioctl(fd, SIOCINQ, &v);
1335
1336 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1337 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1338 (size_t) LINE_MAX,
1339 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1340
1341 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1342 return log_oom();
1343
1344 iovec.iov_base = s->buffer;
1345 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1346
1347 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1348 if (n < 0) {
1349 if (errno == EINTR || errno == EAGAIN)
1350 return 0;
1351
1352 return log_error_errno(errno, "recvmsg() failed: %m");
1353 }
1354
1355 CMSG_FOREACH(cmsg, &msghdr) {
1356
1357 if (cmsg->cmsg_level == SOL_SOCKET &&
1358 cmsg->cmsg_type == SCM_CREDENTIALS &&
1359 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1360 ucred = (struct ucred*) CMSG_DATA(cmsg);
1361 else if (cmsg->cmsg_level == SOL_SOCKET &&
1362 cmsg->cmsg_type == SCM_SECURITY) {
1363 label = (char*) CMSG_DATA(cmsg);
1364 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1365 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1366 cmsg->cmsg_type == SO_TIMESTAMP &&
1367 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1368 tv = (struct timeval*) CMSG_DATA(cmsg);
1369 else if (cmsg->cmsg_level == SOL_SOCKET &&
1370 cmsg->cmsg_type == SCM_RIGHTS) {
1371 fds = (int*) CMSG_DATA(cmsg);
1372 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1373 }
1374 }
1375
1376 /* And a trailing NUL, just in case */
1377 s->buffer[n] = 0;
1378
1379 if (fd == s->syslog_fd) {
1380 if (n > 0 && n_fds == 0)
1381 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1382 else if (n_fds > 0)
1383 log_warning("Got file descriptors via syslog socket. Ignoring.");
1384
1385 } else if (fd == s->native_fd) {
1386 if (n > 0 && n_fds == 0)
1387 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1388 else if (n == 0 && n_fds == 1)
1389 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1390 else if (n_fds > 0)
1391 log_warning("Got too many file descriptors via native socket. Ignoring.");
1392
1393 } else {
1394 assert(fd == s->audit_fd);
1395
1396 if (n > 0 && n_fds == 0)
1397 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1398 else if (n_fds > 0)
1399 log_warning("Got file descriptors via audit socket. Ignoring.");
1400 }
1401
1402 close_many(fds, n_fds);
1403 return 0;
1404 }
1405
1406 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1407 Server *s = userdata;
1408 int r;
1409
1410 assert(s);
1411
1412 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1413
1414 (void) server_flush_to_var(s);
1415 server_sync(s);
1416 server_vacuum(s, false);
1417
1418 r = touch("/run/systemd/journal/flushed");
1419 if (r < 0)
1420 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1421
1422 server_space_usage_message(s, NULL);
1423 return 0;
1424 }
1425
1426 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1427 Server *s = userdata;
1428 int r;
1429
1430 assert(s);
1431
1432 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1433 server_rotate(s);
1434 server_vacuum(s, true);
1435
1436 if (s->system_journal)
1437 patch_min_use(&s->system_storage);
1438 if (s->runtime_journal)
1439 patch_min_use(&s->runtime_storage);
1440
1441 /* Let clients know when the most recent rotation happened. */
1442 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1443 if (r < 0)
1444 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1445
1446 return 0;
1447 }
1448
1449 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1450 Server *s = userdata;
1451
1452 assert(s);
1453
1454 log_received_signal(LOG_INFO, si);
1455
1456 sd_event_exit(s->event, 0);
1457 return 0;
1458 }
1459
1460 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1461 Server *s = userdata;
1462 int r;
1463
1464 assert(s);
1465
1466 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1467
1468 server_sync(s);
1469
1470 /* Let clients know when the most recent sync happened. */
1471 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1472 if (r < 0)
1473 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1474
1475 return 0;
1476 }
1477
1478 static int setup_signals(Server *s) {
1479 int r;
1480
1481 assert(s);
1482
1483 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1484
1485 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1486 if (r < 0)
1487 return r;
1488
1489 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1490 if (r < 0)
1491 return r;
1492
1493 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1494 if (r < 0)
1495 return r;
1496
1497 /* Let's process SIGTERM late, so that we flush all queued
1498 * messages to disk before we exit */
1499 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1500 if (r < 0)
1501 return r;
1502
1503 /* When journald is invoked on the terminal (when debugging),
1504 * it's useful if C-c is handled equivalent to SIGTERM. */
1505 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1506 if (r < 0)
1507 return r;
1508
1509 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1510 if (r < 0)
1511 return r;
1512
1513 /* SIGRTMIN+1 causes an immediate sync. We process this very
1514 * late, so that everything else queued at this point is
1515 * really written to disk. Clients can watch
1516 * /run/systemd/journal/synced with inotify until its mtime
1517 * changes to see when a sync happened. */
1518 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1519 if (r < 0)
1520 return r;
1521
1522 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1523 if (r < 0)
1524 return r;
1525
1526 return 0;
1527 }
1528
1529 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1530 Server *s = data;
1531 int r;
1532
1533 assert(s);
1534
1535 if (streq(key, "systemd.journald.forward_to_syslog")) {
1536 r = value ? parse_boolean(value) : true;
1537 if (r < 0)
1538 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1539 else
1540 s->forward_to_syslog = r;
1541 } else if (streq(key, "systemd.journald.forward_to_kmsg")) {
1542 r = value ? parse_boolean(value) : true;
1543 if (r < 0)
1544 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1545 else
1546 s->forward_to_kmsg = r;
1547 } else if (streq(key, "systemd.journald.forward_to_console")) {
1548 r = value ? parse_boolean(value) : true;
1549 if (r < 0)
1550 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1551 else
1552 s->forward_to_console = r;
1553 } else if (streq(key, "systemd.journald.forward_to_wall")) {
1554 r = value ? parse_boolean(value) : true;
1555 if (r < 0)
1556 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1557 else
1558 s->forward_to_wall = r;
1559 } else if (streq(key, "systemd.journald.max_level_console") && value) {
1560 r = log_level_from_string(value);
1561 if (r < 0)
1562 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1563 else
1564 s->max_level_console = r;
1565 } else if (streq(key, "systemd.journald.max_level_store") && value) {
1566 r = log_level_from_string(value);
1567 if (r < 0)
1568 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1569 else
1570 s->max_level_store = r;
1571 } else if (streq(key, "systemd.journald.max_level_syslog") && value) {
1572 r = log_level_from_string(value);
1573 if (r < 0)
1574 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1575 else
1576 s->max_level_syslog = r;
1577 } else if (streq(key, "systemd.journald.max_level_kmsg") && value) {
1578 r = log_level_from_string(value);
1579 if (r < 0)
1580 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1581 else
1582 s->max_level_kmsg = r;
1583 } else if (streq(key, "systemd.journald.max_level_wall") && value) {
1584 r = log_level_from_string(value);
1585 if (r < 0)
1586 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1587 else
1588 s->max_level_wall = r;
1589 } else if (startswith(key, "systemd.journald"))
1590 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1591
1592 /* do not warn about state here, since probably systemd already did */
1593 return 0;
1594 }
1595
1596 static int server_parse_config_file(Server *s) {
1597 assert(s);
1598
1599 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1600 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1601 "Journal\0",
1602 config_item_perf_lookup, journald_gperf_lookup,
1603 false, s);
1604 }
1605
1606 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1607 Server *s = userdata;
1608
1609 assert(s);
1610
1611 server_sync(s);
1612 return 0;
1613 }
1614
1615 int server_schedule_sync(Server *s, int priority) {
1616 int r;
1617
1618 assert(s);
1619
1620 if (priority <= LOG_CRIT) {
1621 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1622 server_sync(s);
1623 return 0;
1624 }
1625
1626 if (s->sync_scheduled)
1627 return 0;
1628
1629 if (s->sync_interval_usec > 0) {
1630 usec_t when;
1631
1632 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1633 if (r < 0)
1634 return r;
1635
1636 when += s->sync_interval_usec;
1637
1638 if (!s->sync_event_source) {
1639 r = sd_event_add_time(
1640 s->event,
1641 &s->sync_event_source,
1642 CLOCK_MONOTONIC,
1643 when, 0,
1644 server_dispatch_sync, s);
1645 if (r < 0)
1646 return r;
1647
1648 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1649 } else {
1650 r = sd_event_source_set_time(s->sync_event_source, when);
1651 if (r < 0)
1652 return r;
1653
1654 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1655 }
1656 if (r < 0)
1657 return r;
1658
1659 s->sync_scheduled = true;
1660 }
1661
1662 return 0;
1663 }
1664
1665 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1666 Server *s = userdata;
1667
1668 assert(s);
1669
1670 server_cache_hostname(s);
1671 return 0;
1672 }
1673
1674 static int server_open_hostname(Server *s) {
1675 int r;
1676
1677 assert(s);
1678
1679 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1680 if (s->hostname_fd < 0)
1681 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1682
1683 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1684 if (r < 0) {
1685 /* kernels prior to 3.2 don't support polling this file. Ignore
1686 * the failure. */
1687 if (r == -EPERM) {
1688 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1689 s->hostname_fd = safe_close(s->hostname_fd);
1690 return 0;
1691 }
1692
1693 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1694 }
1695
1696 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1697 if (r < 0)
1698 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1699
1700 return 0;
1701 }
1702
1703 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1704 Server *s = userdata;
1705 int r;
1706
1707 assert(s);
1708 assert(s->notify_event_source == es);
1709 assert(s->notify_fd == fd);
1710
1711 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1712 * message on it. Either it's the watchdog event, the initial
1713 * READY=1 event or an stdout stream event. If there's nothing
1714 * to write anymore, turn our event source off. The next time
1715 * there's something to send it will be turned on again. */
1716
1717 if (!s->sent_notify_ready) {
1718 static const char p[] =
1719 "READY=1\n"
1720 "STATUS=Processing requests...";
1721 ssize_t l;
1722
1723 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1724 if (l < 0) {
1725 if (errno == EAGAIN)
1726 return 0;
1727
1728 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1729 }
1730
1731 s->sent_notify_ready = true;
1732 log_debug("Sent READY=1 notification.");
1733
1734 } else if (s->send_watchdog) {
1735
1736 static const char p[] =
1737 "WATCHDOG=1";
1738
1739 ssize_t l;
1740
1741 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1742 if (l < 0) {
1743 if (errno == EAGAIN)
1744 return 0;
1745
1746 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1747 }
1748
1749 s->send_watchdog = false;
1750 log_debug("Sent WATCHDOG=1 notification.");
1751
1752 } else if (s->stdout_streams_notify_queue)
1753 /* Dispatch one stream notification event */
1754 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1755
1756 /* Leave us enabled if there's still more to do. */
1757 if (s->send_watchdog || s->stdout_streams_notify_queue)
1758 return 0;
1759
1760 /* There was nothing to do anymore, let's turn ourselves off. */
1761 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1762 if (r < 0)
1763 return log_error_errno(r, "Failed to turn off notify event source: %m");
1764
1765 return 0;
1766 }
1767
1768 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1769 Server *s = userdata;
1770 int r;
1771
1772 assert(s);
1773
1774 s->send_watchdog = true;
1775
1776 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1777 if (r < 0)
1778 log_warning_errno(r, "Failed to turn on notify event source: %m");
1779
1780 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1781 if (r < 0)
1782 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1783
1784 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1785 if (r < 0)
1786 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1787
1788 return 0;
1789 }
1790
1791 static int server_connect_notify(Server *s) {
1792 union sockaddr_union sa = {
1793 .un.sun_family = AF_UNIX,
1794 };
1795 const char *e;
1796 int r;
1797
1798 assert(s);
1799 assert(s->notify_fd < 0);
1800 assert(!s->notify_event_source);
1801
1802 /*
1803 So here's the problem: we'd like to send notification
1804 messages to PID 1, but we cannot do that via sd_notify(),
1805 since that's synchronous, and we might end up blocking on
1806 it. Specifically: given that PID 1 might block on
1807 dbus-daemon during IPC, and dbus-daemon is logging to us,
1808 and might hence block on us, we might end up in a deadlock
1809 if we block on sending PID 1 notification messages — by
1810 generating a full blocking circle. To avoid this, let's
1811 create a non-blocking socket, and connect it to the
1812 notification socket, and then wait for POLLOUT before we
1813 send anything. This should efficiently avoid any deadlocks,
1814 as we'll never block on PID 1, hence PID 1 can safely block
1815 on dbus-daemon which can safely block on us again.
1816
1817 Don't think that this issue is real? It is, see:
1818 https://github.com/systemd/systemd/issues/1505
1819 */
1820
1821 e = getenv("NOTIFY_SOCKET");
1822 if (!e)
1823 return 0;
1824
1825 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1826 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1827 return -EINVAL;
1828 }
1829
1830 if (strlen(e) > sizeof(sa.un.sun_path)) {
1831 log_error("NOTIFY_SOCKET path too long: %s", e);
1832 return -EINVAL;
1833 }
1834
1835 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1836 if (s->notify_fd < 0)
1837 return log_error_errno(errno, "Failed to create notify socket: %m");
1838
1839 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1840
1841 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1842 if (sa.un.sun_path[0] == '@')
1843 sa.un.sun_path[0] = 0;
1844
1845 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1846 if (r < 0)
1847 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1848
1849 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1850 if (r < 0)
1851 return log_error_errno(r, "Failed to watch notification socket: %m");
1852
1853 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1854 s->send_watchdog = true;
1855
1856 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1857 if (r < 0)
1858 return log_error_errno(r, "Failed to add watchdog time event: %m");
1859 }
1860
1861 /* This should fire pretty soon, which we'll use to send the
1862 * READY=1 event. */
1863
1864 return 0;
1865 }
1866
1867 int server_init(Server *s) {
1868 _cleanup_fdset_free_ FDSet *fds = NULL;
1869 int n, r, fd;
1870 bool no_sockets;
1871
1872 assert(s);
1873
1874 zero(*s);
1875 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1876 s->compress = true;
1877 s->seal = true;
1878
1879 s->watchdog_usec = USEC_INFINITY;
1880
1881 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1882 s->sync_scheduled = false;
1883
1884 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1885 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1886
1887 s->forward_to_wall = true;
1888
1889 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1890
1891 s->max_level_store = LOG_DEBUG;
1892 s->max_level_syslog = LOG_DEBUG;
1893 s->max_level_kmsg = LOG_NOTICE;
1894 s->max_level_console = LOG_INFO;
1895 s->max_level_wall = LOG_EMERG;
1896
1897 journal_reset_metrics(&s->system_storage.metrics);
1898 journal_reset_metrics(&s->runtime_storage.metrics);
1899
1900 server_parse_config_file(s);
1901 parse_proc_cmdline(parse_proc_cmdline_item, s, true);
1902
1903 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1904 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1905 s->rate_limit_interval, s->rate_limit_burst);
1906 s->rate_limit_interval = s->rate_limit_burst = 0;
1907 }
1908
1909 (void) mkdir_p("/run/systemd/journal", 0755);
1910
1911 s->user_journals = ordered_hashmap_new(NULL);
1912 if (!s->user_journals)
1913 return log_oom();
1914
1915 s->mmap = mmap_cache_new();
1916 if (!s->mmap)
1917 return log_oom();
1918
1919 s->deferred_closes = set_new(NULL);
1920 if (!s->deferred_closes)
1921 return log_oom();
1922
1923 r = sd_event_default(&s->event);
1924 if (r < 0)
1925 return log_error_errno(r, "Failed to create event loop: %m");
1926
1927 n = sd_listen_fds(true);
1928 if (n < 0)
1929 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1930
1931 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1932
1933 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1934
1935 if (s->native_fd >= 0) {
1936 log_error("Too many native sockets passed.");
1937 return -EINVAL;
1938 }
1939
1940 s->native_fd = fd;
1941
1942 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1943
1944 if (s->stdout_fd >= 0) {
1945 log_error("Too many stdout sockets passed.");
1946 return -EINVAL;
1947 }
1948
1949 s->stdout_fd = fd;
1950
1951 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1952 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1953
1954 if (s->syslog_fd >= 0) {
1955 log_error("Too many /dev/log sockets passed.");
1956 return -EINVAL;
1957 }
1958
1959 s->syslog_fd = fd;
1960
1961 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1962
1963 if (s->audit_fd >= 0) {
1964 log_error("Too many audit sockets passed.");
1965 return -EINVAL;
1966 }
1967
1968 s->audit_fd = fd;
1969
1970 } else {
1971
1972 if (!fds) {
1973 fds = fdset_new();
1974 if (!fds)
1975 return log_oom();
1976 }
1977
1978 r = fdset_put(fds, fd);
1979 if (r < 0)
1980 return log_oom();
1981 }
1982 }
1983
1984 /* Try to restore streams, but don't bother if this fails */
1985 (void) server_restore_streams(s, fds);
1986
1987 if (fdset_size(fds) > 0) {
1988 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1989 fds = fdset_free(fds);
1990 }
1991
1992 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1993
1994 /* always open stdout, syslog, native, and kmsg sockets */
1995
1996 /* systemd-journald.socket: /run/systemd/journal/stdout */
1997 r = server_open_stdout_socket(s);
1998 if (r < 0)
1999 return r;
2000
2001 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2002 r = server_open_syslog_socket(s);
2003 if (r < 0)
2004 return r;
2005
2006 /* systemd-journald.socket: /run/systemd/journal/socket */
2007 r = server_open_native_socket(s);
2008 if (r < 0)
2009 return r;
2010
2011 /* /dev/ksmg */
2012 r = server_open_dev_kmsg(s);
2013 if (r < 0)
2014 return r;
2015
2016 /* Unless we got *some* sockets and not audit, open audit socket */
2017 if (s->audit_fd >= 0 || no_sockets) {
2018 r = server_open_audit(s);
2019 if (r < 0)
2020 return r;
2021 }
2022
2023 r = server_open_kernel_seqnum(s);
2024 if (r < 0)
2025 return r;
2026
2027 r = server_open_hostname(s);
2028 if (r < 0)
2029 return r;
2030
2031 r = setup_signals(s);
2032 if (r < 0)
2033 return r;
2034
2035 s->udev = udev_new();
2036 if (!s->udev)
2037 return -ENOMEM;
2038
2039 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2040 if (!s->rate_limit)
2041 return -ENOMEM;
2042
2043 r = cg_get_root_path(&s->cgroup_root);
2044 if (r < 0)
2045 return r;
2046
2047 server_cache_hostname(s);
2048 server_cache_boot_id(s);
2049 server_cache_machine_id(s);
2050
2051 s->runtime_storage.name = "Runtime journal";
2052 s->system_storage.name = "System journal";
2053
2054 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2055 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2056 if (!s->runtime_storage.path || !s->system_storage.path)
2057 return -ENOMEM;
2058
2059 (void) server_connect_notify(s);
2060
2061 return system_journal_open(s, false);
2062 }
2063
2064 void server_maybe_append_tags(Server *s) {
2065 #ifdef HAVE_GCRYPT
2066 JournalFile *f;
2067 Iterator i;
2068 usec_t n;
2069
2070 n = now(CLOCK_REALTIME);
2071
2072 if (s->system_journal)
2073 journal_file_maybe_append_tag(s->system_journal, n);
2074
2075 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2076 journal_file_maybe_append_tag(f, n);
2077 #endif
2078 }
2079
2080 void server_done(Server *s) {
2081 JournalFile *f;
2082 assert(s);
2083
2084 if (s->deferred_closes) {
2085 journal_file_close_set(s->deferred_closes);
2086 set_free(s->deferred_closes);
2087 }
2088
2089 while (s->stdout_streams)
2090 stdout_stream_free(s->stdout_streams);
2091
2092 if (s->system_journal)
2093 (void) journal_file_close(s->system_journal);
2094
2095 if (s->runtime_journal)
2096 (void) journal_file_close(s->runtime_journal);
2097
2098 while ((f = ordered_hashmap_steal_first(s->user_journals)))
2099 (void) journal_file_close(f);
2100
2101 ordered_hashmap_free(s->user_journals);
2102
2103 sd_event_source_unref(s->syslog_event_source);
2104 sd_event_source_unref(s->native_event_source);
2105 sd_event_source_unref(s->stdout_event_source);
2106 sd_event_source_unref(s->dev_kmsg_event_source);
2107 sd_event_source_unref(s->audit_event_source);
2108 sd_event_source_unref(s->sync_event_source);
2109 sd_event_source_unref(s->sigusr1_event_source);
2110 sd_event_source_unref(s->sigusr2_event_source);
2111 sd_event_source_unref(s->sigterm_event_source);
2112 sd_event_source_unref(s->sigint_event_source);
2113 sd_event_source_unref(s->sigrtmin1_event_source);
2114 sd_event_source_unref(s->hostname_event_source);
2115 sd_event_source_unref(s->notify_event_source);
2116 sd_event_source_unref(s->watchdog_event_source);
2117 sd_event_unref(s->event);
2118
2119 safe_close(s->syslog_fd);
2120 safe_close(s->native_fd);
2121 safe_close(s->stdout_fd);
2122 safe_close(s->dev_kmsg_fd);
2123 safe_close(s->audit_fd);
2124 safe_close(s->hostname_fd);
2125 safe_close(s->notify_fd);
2126
2127 if (s->rate_limit)
2128 journal_rate_limit_free(s->rate_limit);
2129
2130 if (s->kernel_seqnum)
2131 munmap(s->kernel_seqnum, sizeof(uint64_t));
2132
2133 free(s->buffer);
2134 free(s->tty_path);
2135 free(s->cgroup_root);
2136 free(s->hostname_field);
2137
2138 if (s->mmap)
2139 mmap_cache_unref(s->mmap);
2140
2141 udev_unref(s->udev);
2142 }
2143
2144 static const char* const storage_table[_STORAGE_MAX] = {
2145 [STORAGE_AUTO] = "auto",
2146 [STORAGE_VOLATILE] = "volatile",
2147 [STORAGE_PERSISTENT] = "persistent",
2148 [STORAGE_NONE] = "none"
2149 };
2150
2151 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2152 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2153
2154 static const char* const split_mode_table[_SPLIT_MAX] = {
2155 [SPLIT_LOGIN] = "login",
2156 [SPLIT_UID] = "uid",
2157 [SPLIT_NONE] = "none",
2158 };
2159
2160 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2161 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");