]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
6bdb375fb7882dc66ba5f9367dfefe9b7084b70c
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2011 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #ifdef HAVE_SELINUX
21 #include <selinux/selinux.h>
22 #endif
23 #include <sys/ioctl.h>
24 #include <sys/mman.h>
25 #include <sys/signalfd.h>
26 #include <sys/statvfs.h>
27 #include <linux/sockios.h>
28
29 #include "libudev.h"
30 #include "sd-daemon.h"
31 #include "sd-journal.h"
32 #include "sd-messages.h"
33
34 #include "acl-util.h"
35 #include "alloc-util.h"
36 #include "audit-util.h"
37 #include "cgroup-util.h"
38 #include "conf-parser.h"
39 #include "dirent-util.h"
40 #include "extract-word.h"
41 #include "fd-util.h"
42 #include "fileio.h"
43 #include "format-util.h"
44 #include "fs-util.h"
45 #include "hashmap.h"
46 #include "hostname-util.h"
47 #include "id128-util.h"
48 #include "io-util.h"
49 #include "journal-authenticate.h"
50 #include "journal-file.h"
51 #include "journal-internal.h"
52 #include "journal-vacuum.h"
53 #include "journald-audit.h"
54 #include "journald-kmsg.h"
55 #include "journald-native.h"
56 #include "journald-rate-limit.h"
57 #include "journald-server.h"
58 #include "journald-stream.h"
59 #include "journald-syslog.h"
60 #include "log.h"
61 #include "missing.h"
62 #include "mkdir.h"
63 #include "parse-util.h"
64 #include "proc-cmdline.h"
65 #include "process-util.h"
66 #include "rm-rf.h"
67 #include "selinux-util.h"
68 #include "signal-util.h"
69 #include "socket-util.h"
70 #include "stdio-util.h"
71 #include "string-table.h"
72 #include "string-util.h"
73 #include "user-util.h"
74 #include "syslog-util.h"
75
76 #define USER_JOURNALS_MAX 1024
77
78 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
79 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
80 #define DEFAULT_RATE_LIMIT_BURST 1000
81 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
82
83 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
84
85 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
86
87 /* The period to insert between posting changes for coalescing */
88 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
89
90 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
91 _cleanup_closedir_ DIR *d = NULL;
92 struct dirent *de;
93 struct statvfs ss;
94
95 assert(ret_used);
96 assert(ret_free);
97
98 d = opendir(path);
99 if (!d)
100 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
101 errno, "Failed to open %s: %m", path);
102
103 if (fstatvfs(dirfd(d), &ss) < 0)
104 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
105
106 *ret_free = ss.f_bsize * ss.f_bavail;
107 *ret_used = 0;
108 FOREACH_DIRENT_ALL(de, d, break) {
109 struct stat st;
110
111 if (!endswith(de->d_name, ".journal") &&
112 !endswith(de->d_name, ".journal~"))
113 continue;
114
115 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
116 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
117 continue;
118 }
119
120 if (!S_ISREG(st.st_mode))
121 continue;
122
123 *ret_used += (uint64_t) st.st_blocks * 512UL;
124 }
125
126 return 0;
127 }
128
129 static void cache_space_invalidate(JournalStorageSpace *space) {
130 memset(space, 0, sizeof(*space));
131 }
132
133 static int cache_space_refresh(Server *s, JournalStorage *storage) {
134 JournalStorageSpace *space;
135 JournalMetrics *metrics;
136 uint64_t vfs_used, vfs_avail, avail;
137 usec_t ts;
138 int r;
139
140 assert(s);
141
142 metrics = &storage->metrics;
143 space = &storage->space;
144
145 ts = now(CLOCK_MONOTONIC);
146
147 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
148 return 0;
149
150 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
151 if (r < 0)
152 return r;
153
154 space->vfs_used = vfs_used;
155 space->vfs_available = vfs_avail;
156
157 avail = LESS_BY(vfs_avail, metrics->keep_free);
158
159 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
160 space->available = LESS_BY(space->limit, vfs_used);
161 space->timestamp = ts;
162 return 1;
163 }
164
165 static void patch_min_use(JournalStorage *storage) {
166 assert(storage);
167
168 /* Let's bump the min_use limit to the current usage on disk. We do
169 * this when starting up and first opening the journal files. This way
170 * sudden spikes in disk usage will not cause journald to vacuum files
171 * without bounds. Note that this means that only a restart of journald
172 * will make it reset this value. */
173
174 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
175 }
176
177
178 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
179 JournalStorage *js;
180 int r;
181
182 assert(s);
183
184 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
185
186 r = cache_space_refresh(s, js);
187 if (r >= 0) {
188 if (available)
189 *available = js->space.available;
190 if (limit)
191 *limit = js->space.limit;
192 }
193 return r;
194 }
195
196 void server_space_usage_message(Server *s, JournalStorage *storage) {
197 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
198 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
199 JournalMetrics *metrics;
200
201 assert(s);
202
203 if (!storage)
204 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
205
206 if (cache_space_refresh(s, storage) < 0)
207 return;
208
209 metrics = &storage->metrics;
210 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
211 format_bytes(fb2, sizeof(fb2), metrics->max_use);
212 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
213 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
214 format_bytes(fb5, sizeof(fb5), storage->space.limit);
215 format_bytes(fb6, sizeof(fb6), storage->space.available);
216
217 server_driver_message(s, SD_MESSAGE_JOURNAL_USAGE,
218 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
219 storage->name, storage->path, fb1, fb5, fb6),
220 "JOURNAL_NAME=%s", storage->name,
221 "JOURNAL_PATH=%s", storage->path,
222 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
223 "CURRENT_USE_PRETTY=%s", fb1,
224 "MAX_USE=%"PRIu64, metrics->max_use,
225 "MAX_USE_PRETTY=%s", fb2,
226 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
227 "DISK_KEEP_FREE_PRETTY=%s", fb3,
228 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
229 "DISK_AVAILABLE_PRETTY=%s", fb4,
230 "LIMIT=%"PRIu64, storage->space.limit,
231 "LIMIT_PRETTY=%s", fb5,
232 "AVAILABLE=%"PRIu64, storage->space.available,
233 "AVAILABLE_PRETTY=%s", fb6,
234 NULL);
235 }
236
237 static void server_add_acls(JournalFile *f, uid_t uid) {
238 #ifdef HAVE_ACL
239 int r;
240 #endif
241 assert(f);
242
243 #ifdef HAVE_ACL
244 if (uid <= SYSTEM_UID_MAX)
245 return;
246
247 r = add_acls_for_user(f->fd, uid);
248 if (r < 0)
249 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
250 #endif
251 }
252
253 static int open_journal(
254 Server *s,
255 bool reliably,
256 const char *fname,
257 int flags,
258 bool seal,
259 JournalMetrics *metrics,
260 JournalFile **ret) {
261 int r;
262 JournalFile *f;
263
264 assert(s);
265 assert(fname);
266 assert(ret);
267
268 if (reliably)
269 r = journal_file_open_reliably(fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
270 else
271 r = journal_file_open(-1, fname, flags, 0640, s->compress, seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return (access("/run/systemd/journal/flushed", F_OK) >= 0);
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 bool flushed = false;
291 const char *fn;
292 int r = 0;
293
294 if (!s->system_journal &&
295 (s->storage == STORAGE_PERSISTENT || s->storage == STORAGE_AUTO) &&
296 (flush_requested || (flushed = flushed_flag_is_set()))) {
297
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
300 *
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
303
304 if (s->storage == STORAGE_PERSISTENT)
305 (void) mkdir_p("/var/log/journal/", 0755);
306
307 (void) mkdir(s->system_storage.path, 0755);
308
309 fn = strjoina(s->system_storage.path, "/system.journal");
310 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
311 if (r >= 0) {
312 server_add_acls(s->system_journal, 0);
313 (void) cache_space_refresh(s, &s->system_storage);
314 patch_min_use(&s->system_storage);
315 } else if (r < 0) {
316 if (r != -ENOENT && r != -EROFS)
317 log_warning_errno(r, "Failed to open system journal: %m");
318
319 r = 0;
320 }
321
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
325 *
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
328 */
329 if (s->runtime_journal && flushed)
330 (void) server_flush_to_var(s);
331 }
332
333 if (!s->runtime_journal &&
334 (s->storage != STORAGE_NONE)) {
335
336 fn = strjoina(s->runtime_storage.path, "/system.journal");
337
338 if (s->system_journal) {
339
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
343
344 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
345 if (r < 0) {
346 if (r != -ENOENT)
347 log_warning_errno(r, "Failed to open runtime journal: %m");
348
349 r = 0;
350 }
351
352 } else {
353
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
356
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn, 0750);
360
361 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
362 if (r < 0)
363 return log_error_errno(r, "Failed to open runtime journal: %m");
364 }
365
366 if (s->runtime_journal) {
367 server_add_acls(s->runtime_journal, 0);
368 (void) cache_space_refresh(s, &s->runtime_storage);
369 patch_min_use(&s->runtime_storage);
370 }
371 }
372
373 return r;
374 }
375
376 static JournalFile* find_journal(Server *s, uid_t uid) {
377 _cleanup_free_ char *p = NULL;
378 int r;
379 JournalFile *f;
380 sd_id128_t machine;
381
382 assert(s);
383
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
387 *
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
391 *
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s, false);
394
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
399
400 if (s->runtime_journal)
401 return s->runtime_journal;
402
403 if (uid <= SYSTEM_UID_MAX || uid_is_dynamic(uid))
404 return s->system_journal;
405
406 r = sd_id128_get_machine(&machine);
407 if (r < 0)
408 return s->system_journal;
409
410 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
411 if (f)
412 return f;
413
414 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
415 SD_ID128_FORMAT_VAL(machine), uid) < 0)
416 return s->system_journal;
417
418 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
419 /* Too many open? Then let's close one */
420 f = ordered_hashmap_steal_first(s->user_journals);
421 assert(f);
422 (void) journal_file_close(f);
423 }
424
425 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
426 if (r < 0)
427 return s->system_journal;
428
429 server_add_acls(f, uid);
430
431 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
432 if (r < 0) {
433 (void) journal_file_close(f);
434 return s->system_journal;
435 }
436
437 return f;
438 }
439
440 static int do_rotate(
441 Server *s,
442 JournalFile **f,
443 const char* name,
444 bool seal,
445 uint32_t uid) {
446
447 int r;
448 assert(s);
449
450 if (!*f)
451 return -EINVAL;
452
453 r = journal_file_rotate(f, s->compress, seal, s->deferred_closes);
454 if (r < 0)
455 if (*f)
456 log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
457 else
458 log_error_errno(r, "Failed to create new %s journal: %m", name);
459 else
460 server_add_acls(*f, uid);
461
462 return r;
463 }
464
465 void server_rotate(Server *s) {
466 JournalFile *f;
467 void *k;
468 Iterator i;
469 int r;
470
471 log_debug("Rotating...");
472
473 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
474 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
475
476 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
477 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
478 if (r >= 0)
479 ordered_hashmap_replace(s->user_journals, k, f);
480 else if (!f)
481 /* Old file has been closed and deallocated */
482 ordered_hashmap_remove(s->user_journals, k);
483 }
484
485 /* Perform any deferred closes which aren't still offlining. */
486 SET_FOREACH(f, s->deferred_closes, i)
487 if (!journal_file_is_offlining(f)) {
488 (void) set_remove(s->deferred_closes, f);
489 (void) journal_file_close(f);
490 }
491 }
492
493 void server_sync(Server *s) {
494 JournalFile *f;
495 Iterator i;
496 int r;
497
498 if (s->system_journal) {
499 r = journal_file_set_offline(s->system_journal, false);
500 if (r < 0)
501 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
502 }
503
504 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
505 r = journal_file_set_offline(f, false);
506 if (r < 0)
507 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
508 }
509
510 if (s->sync_event_source) {
511 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
512 if (r < 0)
513 log_error_errno(r, "Failed to disable sync timer source: %m");
514 }
515
516 s->sync_scheduled = false;
517 }
518
519 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
520
521 int r;
522
523 assert(s);
524 assert(storage);
525
526 (void) cache_space_refresh(s, storage);
527
528 if (verbose)
529 server_space_usage_message(s, storage);
530
531 r = journal_directory_vacuum(storage->path, storage->space.limit,
532 storage->metrics.n_max_files, s->max_retention_usec,
533 &s->oldest_file_usec, verbose);
534 if (r < 0 && r != -ENOENT)
535 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
536
537 cache_space_invalidate(&storage->space);
538 }
539
540 int server_vacuum(Server *s, bool verbose) {
541 assert(s);
542
543 log_debug("Vacuuming...");
544
545 s->oldest_file_usec = 0;
546
547 if (s->system_journal)
548 do_vacuum(s, &s->system_storage, verbose);
549 if (s->runtime_journal)
550 do_vacuum(s, &s->runtime_storage, verbose);
551
552 return 0;
553 }
554
555 static void server_cache_machine_id(Server *s) {
556 sd_id128_t id;
557 int r;
558
559 assert(s);
560
561 r = sd_id128_get_machine(&id);
562 if (r < 0)
563 return;
564
565 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
566 }
567
568 static void server_cache_boot_id(Server *s) {
569 sd_id128_t id;
570 int r;
571
572 assert(s);
573
574 r = sd_id128_get_boot(&id);
575 if (r < 0)
576 return;
577
578 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
579 }
580
581 static void server_cache_hostname(Server *s) {
582 _cleanup_free_ char *t = NULL;
583 char *x;
584
585 assert(s);
586
587 t = gethostname_malloc();
588 if (!t)
589 return;
590
591 x = strappend("_HOSTNAME=", t);
592 if (!x)
593 return;
594
595 free(s->hostname_field);
596 s->hostname_field = x;
597 }
598
599 static bool shall_try_append_again(JournalFile *f, int r) {
600 switch(r) {
601
602 case -E2BIG: /* Hit configured limit */
603 case -EFBIG: /* Hit fs limit */
604 case -EDQUOT: /* Quota limit hit */
605 case -ENOSPC: /* Disk full */
606 log_debug("%s: Allocation limit reached, rotating.", f->path);
607 return true;
608
609 case -EIO: /* I/O error of some kind (mmap) */
610 log_warning("%s: IO error, rotating.", f->path);
611 return true;
612
613 case -EHOSTDOWN: /* Other machine */
614 log_info("%s: Journal file from other machine, rotating.", f->path);
615 return true;
616
617 case -EBUSY: /* Unclean shutdown */
618 log_info("%s: Unclean shutdown, rotating.", f->path);
619 return true;
620
621 case -EPROTONOSUPPORT: /* Unsupported feature */
622 log_info("%s: Unsupported feature, rotating.", f->path);
623 return true;
624
625 case -EBADMSG: /* Corrupted */
626 case -ENODATA: /* Truncated */
627 case -ESHUTDOWN: /* Already archived */
628 log_warning("%s: Journal file corrupted, rotating.", f->path);
629 return true;
630
631 case -EIDRM: /* Journal file has been deleted */
632 log_warning("%s: Journal file has been deleted, rotating.", f->path);
633 return true;
634
635 case -ETXTBSY: /* Journal file is from the future */
636 log_warning("%s: Journal file is from the future, rotating.", f->path);
637 return true;
638
639 default:
640 return false;
641 }
642 }
643
644 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, unsigned n, int priority) {
645 bool vacuumed = false, rotate = false;
646 struct dual_timestamp ts;
647 JournalFile *f;
648 int r;
649
650 assert(s);
651 assert(iovec);
652 assert(n > 0);
653
654 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
655 * the source time, and not even the time the event was originally seen, but instead simply the time we started
656 * processing it, as we want strictly linear ordering in what we write out.) */
657 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
658 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
659
660 if (ts.realtime < s->last_realtime_clock) {
661 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
662 * regular operation. However, when it does happen, then we should make sure that we start fresh files
663 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
664 * bisection works correctly. */
665
666 log_debug("Time jumped backwards, rotating.");
667 rotate = true;
668 } else {
669
670 f = find_journal(s, uid);
671 if (!f)
672 return;
673
674 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
675 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
676 rotate = true;
677 }
678 }
679
680 if (rotate) {
681 server_rotate(s);
682 server_vacuum(s, false);
683 vacuumed = true;
684
685 f = find_journal(s, uid);
686 if (!f)
687 return;
688 }
689
690 s->last_realtime_clock = ts.realtime;
691
692 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
693 if (r >= 0) {
694 server_schedule_sync(s, priority);
695 return;
696 }
697
698 if (vacuumed || !shall_try_append_again(f, r)) {
699 log_error_errno(r, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
700 return;
701 }
702
703 server_rotate(s);
704 server_vacuum(s, false);
705
706 f = find_journal(s, uid);
707 if (!f)
708 return;
709
710 log_debug("Retrying write.");
711 r = journal_file_append_entry(f, &ts, iovec, n, &s->seqnum, NULL, NULL);
712 if (r < 0)
713 log_error_errno(r, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
714 else
715 server_schedule_sync(s, priority);
716 }
717
718 static int get_invocation_id(const char *cgroup_root, const char *slice, const char *unit, char **ret) {
719 _cleanup_free_ char *escaped = NULL, *slice_path = NULL, *p = NULL;
720 char *copy, ids[SD_ID128_STRING_MAX];
721 int r;
722
723 /* Read the invocation ID of a unit off a unit. It's stored in the "trusted.invocation_id" extended attribute
724 * on the cgroup path. */
725
726 r = cg_slice_to_path(slice, &slice_path);
727 if (r < 0)
728 return r;
729
730 escaped = cg_escape(unit);
731 if (!escaped)
732 return -ENOMEM;
733
734 p = strjoin(cgroup_root, "/", slice_path, "/", escaped);
735 if (!p)
736 return -ENOMEM;
737
738 r = cg_get_xattr(SYSTEMD_CGROUP_CONTROLLER, p, "trusted.invocation_id", ids, 32);
739 if (r < 0)
740 return r;
741 if (r != 32)
742 return -EINVAL;
743 ids[32] = 0;
744
745 if (!id128_is_valid(ids))
746 return -EINVAL;
747
748 copy = strdup(ids);
749 if (!copy)
750 return -ENOMEM;
751
752 *ret = copy;
753 return 0;
754 }
755
756 static void dispatch_message_real(
757 Server *s,
758 struct iovec *iovec, unsigned n, unsigned m,
759 const struct ucred *ucred,
760 const struct timeval *tv,
761 const char *label, size_t label_len,
762 const char *unit_id,
763 int priority,
764 pid_t object_pid) {
765
766 char pid[sizeof("_PID=") + DECIMAL_STR_MAX(pid_t)],
767 uid[sizeof("_UID=") + DECIMAL_STR_MAX(uid_t)],
768 gid[sizeof("_GID=") + DECIMAL_STR_MAX(gid_t)],
769 owner_uid[sizeof("_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)],
770 source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)],
771 o_uid[sizeof("OBJECT_UID=") + DECIMAL_STR_MAX(uid_t)],
772 o_gid[sizeof("OBJECT_GID=") + DECIMAL_STR_MAX(gid_t)],
773 o_owner_uid[sizeof("OBJECT_SYSTEMD_OWNER_UID=") + DECIMAL_STR_MAX(uid_t)];
774 uid_t object_uid;
775 gid_t object_gid;
776 char *x;
777 int r;
778 char *t, *c;
779 uid_t realuid = 0, owner = 0, journal_uid;
780 bool owner_valid = false;
781 #ifdef HAVE_AUDIT
782 char audit_session[sizeof("_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
783 audit_loginuid[sizeof("_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)],
784 o_audit_session[sizeof("OBJECT_AUDIT_SESSION=") + DECIMAL_STR_MAX(uint32_t)],
785 o_audit_loginuid[sizeof("OBJECT_AUDIT_LOGINUID=") + DECIMAL_STR_MAX(uid_t)];
786
787 uint32_t audit;
788 uid_t loginuid;
789 #endif
790
791 assert(s);
792 assert(iovec);
793 assert(n > 0);
794 assert(n + N_IOVEC_META_FIELDS + (object_pid > 0 ? N_IOVEC_OBJECT_FIELDS : 0) <= m);
795
796 if (ucred) {
797 realuid = ucred->uid;
798
799 sprintf(pid, "_PID="PID_FMT, ucred->pid);
800 IOVEC_SET_STRING(iovec[n++], pid);
801
802 sprintf(uid, "_UID="UID_FMT, ucred->uid);
803 IOVEC_SET_STRING(iovec[n++], uid);
804
805 sprintf(gid, "_GID="GID_FMT, ucred->gid);
806 IOVEC_SET_STRING(iovec[n++], gid);
807
808 r = get_process_comm(ucred->pid, &t);
809 if (r >= 0) {
810 x = strjoina("_COMM=", t);
811 free(t);
812 IOVEC_SET_STRING(iovec[n++], x);
813 }
814
815 r = get_process_exe(ucred->pid, &t);
816 if (r >= 0) {
817 x = strjoina("_EXE=", t);
818 free(t);
819 IOVEC_SET_STRING(iovec[n++], x);
820 }
821
822 r = get_process_cmdline(ucred->pid, 0, false, &t);
823 if (r >= 0) {
824 x = strjoina("_CMDLINE=", t);
825 free(t);
826 IOVEC_SET_STRING(iovec[n++], x);
827 }
828
829 r = get_process_capeff(ucred->pid, &t);
830 if (r >= 0) {
831 x = strjoina("_CAP_EFFECTIVE=", t);
832 free(t);
833 IOVEC_SET_STRING(iovec[n++], x);
834 }
835
836 #ifdef HAVE_AUDIT
837 r = audit_session_from_pid(ucred->pid, &audit);
838 if (r >= 0) {
839 sprintf(audit_session, "_AUDIT_SESSION=%"PRIu32, audit);
840 IOVEC_SET_STRING(iovec[n++], audit_session);
841 }
842
843 r = audit_loginuid_from_pid(ucred->pid, &loginuid);
844 if (r >= 0) {
845 sprintf(audit_loginuid, "_AUDIT_LOGINUID="UID_FMT, loginuid);
846 IOVEC_SET_STRING(iovec[n++], audit_loginuid);
847 }
848 #endif
849
850 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &c);
851 if (r >= 0) {
852 _cleanup_free_ char *raw_unit = NULL, *raw_slice = NULL;
853 char *session = NULL;
854
855 x = strjoina("_SYSTEMD_CGROUP=", c);
856 IOVEC_SET_STRING(iovec[n++], x);
857
858 r = cg_path_get_session(c, &t);
859 if (r >= 0) {
860 session = strjoina("_SYSTEMD_SESSION=", t);
861 free(t);
862 IOVEC_SET_STRING(iovec[n++], session);
863 }
864
865 if (cg_path_get_owner_uid(c, &owner) >= 0) {
866 owner_valid = true;
867
868 sprintf(owner_uid, "_SYSTEMD_OWNER_UID="UID_FMT, owner);
869 IOVEC_SET_STRING(iovec[n++], owner_uid);
870 }
871
872 if (cg_path_get_unit(c, &raw_unit) >= 0) {
873 x = strjoina("_SYSTEMD_UNIT=", raw_unit);
874 IOVEC_SET_STRING(iovec[n++], x);
875 } else if (unit_id && !session) {
876 x = strjoina("_SYSTEMD_UNIT=", unit_id);
877 IOVEC_SET_STRING(iovec[n++], x);
878 }
879
880 if (cg_path_get_user_unit(c, &t) >= 0) {
881 x = strjoina("_SYSTEMD_USER_UNIT=", t);
882 free(t);
883 IOVEC_SET_STRING(iovec[n++], x);
884 } else if (unit_id && session) {
885 x = strjoina("_SYSTEMD_USER_UNIT=", unit_id);
886 IOVEC_SET_STRING(iovec[n++], x);
887 }
888
889 if (cg_path_get_slice(c, &raw_slice) >= 0) {
890 x = strjoina("_SYSTEMD_SLICE=", raw_slice);
891 IOVEC_SET_STRING(iovec[n++], x);
892 }
893
894 if (cg_path_get_user_slice(c, &t) >= 0) {
895 x = strjoina("_SYSTEMD_USER_SLICE=", t);
896 free(t);
897 IOVEC_SET_STRING(iovec[n++], x);
898 }
899
900 if (raw_slice && raw_unit) {
901 if (get_invocation_id(s->cgroup_root, raw_slice, raw_unit, &t) >= 0) {
902 x = strjoina("_SYSTEMD_INVOCATION_ID=", t);
903 free(t);
904 IOVEC_SET_STRING(iovec[n++], x);
905 }
906 }
907
908 free(c);
909 } else if (unit_id) {
910 x = strjoina("_SYSTEMD_UNIT=", unit_id);
911 IOVEC_SET_STRING(iovec[n++], x);
912 }
913
914 #ifdef HAVE_SELINUX
915 if (mac_selinux_have()) {
916 if (label) {
917 x = alloca(strlen("_SELINUX_CONTEXT=") + label_len + 1);
918
919 *((char*) mempcpy(stpcpy(x, "_SELINUX_CONTEXT="), label, label_len)) = 0;
920 IOVEC_SET_STRING(iovec[n++], x);
921 } else {
922 char *con;
923
924 if (getpidcon(ucred->pid, &con) >= 0) {
925 x = strjoina("_SELINUX_CONTEXT=", con);
926
927 freecon(con);
928 IOVEC_SET_STRING(iovec[n++], x);
929 }
930 }
931 }
932 #endif
933 }
934 assert(n <= m);
935
936 if (object_pid) {
937 r = get_process_uid(object_pid, &object_uid);
938 if (r >= 0) {
939 sprintf(o_uid, "OBJECT_UID="UID_FMT, object_uid);
940 IOVEC_SET_STRING(iovec[n++], o_uid);
941 }
942
943 r = get_process_gid(object_pid, &object_gid);
944 if (r >= 0) {
945 sprintf(o_gid, "OBJECT_GID="GID_FMT, object_gid);
946 IOVEC_SET_STRING(iovec[n++], o_gid);
947 }
948
949 r = get_process_comm(object_pid, &t);
950 if (r >= 0) {
951 x = strjoina("OBJECT_COMM=", t);
952 free(t);
953 IOVEC_SET_STRING(iovec[n++], x);
954 }
955
956 r = get_process_exe(object_pid, &t);
957 if (r >= 0) {
958 x = strjoina("OBJECT_EXE=", t);
959 free(t);
960 IOVEC_SET_STRING(iovec[n++], x);
961 }
962
963 r = get_process_cmdline(object_pid, 0, false, &t);
964 if (r >= 0) {
965 x = strjoina("OBJECT_CMDLINE=", t);
966 free(t);
967 IOVEC_SET_STRING(iovec[n++], x);
968 }
969
970 #ifdef HAVE_AUDIT
971 r = audit_session_from_pid(object_pid, &audit);
972 if (r >= 0) {
973 sprintf(o_audit_session, "OBJECT_AUDIT_SESSION=%"PRIu32, audit);
974 IOVEC_SET_STRING(iovec[n++], o_audit_session);
975 }
976
977 r = audit_loginuid_from_pid(object_pid, &loginuid);
978 if (r >= 0) {
979 sprintf(o_audit_loginuid, "OBJECT_AUDIT_LOGINUID="UID_FMT, loginuid);
980 IOVEC_SET_STRING(iovec[n++], o_audit_loginuid);
981 }
982 #endif
983
984 r = cg_pid_get_path_shifted(object_pid, s->cgroup_root, &c);
985 if (r >= 0) {
986 x = strjoina("OBJECT_SYSTEMD_CGROUP=", c);
987 IOVEC_SET_STRING(iovec[n++], x);
988
989 r = cg_path_get_session(c, &t);
990 if (r >= 0) {
991 x = strjoina("OBJECT_SYSTEMD_SESSION=", t);
992 free(t);
993 IOVEC_SET_STRING(iovec[n++], x);
994 }
995
996 if (cg_path_get_owner_uid(c, &owner) >= 0) {
997 sprintf(o_owner_uid, "OBJECT_SYSTEMD_OWNER_UID="UID_FMT, owner);
998 IOVEC_SET_STRING(iovec[n++], o_owner_uid);
999 }
1000
1001 if (cg_path_get_unit(c, &t) >= 0) {
1002 x = strjoina("OBJECT_SYSTEMD_UNIT=", t);
1003 free(t);
1004 IOVEC_SET_STRING(iovec[n++], x);
1005 }
1006
1007 if (cg_path_get_user_unit(c, &t) >= 0) {
1008 x = strjoina("OBJECT_SYSTEMD_USER_UNIT=", t);
1009 free(t);
1010 IOVEC_SET_STRING(iovec[n++], x);
1011 }
1012
1013 if (cg_path_get_slice(c, &t) >= 0) {
1014 x = strjoina("OBJECT_SYSTEMD_SLICE=", t);
1015 free(t);
1016 IOVEC_SET_STRING(iovec[n++], x);
1017 }
1018
1019 if (cg_path_get_user_slice(c, &t) >= 0) {
1020 x = strjoina("OBJECT_SYSTEMD_USER_SLICE=", t);
1021 free(t);
1022 IOVEC_SET_STRING(iovec[n++], x);
1023 }
1024
1025 free(c);
1026 }
1027 }
1028 assert(n <= m);
1029
1030 if (tv) {
1031 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
1032 IOVEC_SET_STRING(iovec[n++], source_time);
1033 }
1034
1035 /* Note that strictly speaking storing the boot id here is
1036 * redundant since the entry includes this in-line
1037 * anyway. However, we need this indexed, too. */
1038 if (!isempty(s->boot_id_field))
1039 IOVEC_SET_STRING(iovec[n++], s->boot_id_field);
1040
1041 if (!isempty(s->machine_id_field))
1042 IOVEC_SET_STRING(iovec[n++], s->machine_id_field);
1043
1044 if (!isempty(s->hostname_field))
1045 IOVEC_SET_STRING(iovec[n++], s->hostname_field);
1046
1047 assert(n <= m);
1048
1049 if (s->split_mode == SPLIT_UID && realuid > 0)
1050 /* Split up strictly by any UID */
1051 journal_uid = realuid;
1052 else if (s->split_mode == SPLIT_LOGIN && realuid > 0 && owner_valid && owner > 0)
1053 /* Split up by login UIDs. We do this only if the
1054 * realuid is not root, in order not to accidentally
1055 * leak privileged information to the user that is
1056 * logged by a privileged process that is part of an
1057 * unprivileged session. */
1058 journal_uid = owner;
1059 else
1060 journal_uid = 0;
1061
1062 write_to_journal(s, journal_uid, iovec, n, priority);
1063 }
1064
1065 void server_driver_message(Server *s, sd_id128_t message_id, const char *format, ...) {
1066 char mid[11 + 32 + 1];
1067 struct iovec iovec[N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS];
1068 unsigned n = 0, m;
1069 int r;
1070 va_list ap;
1071 struct ucred ucred = {};
1072
1073 assert(s);
1074 assert(format);
1075
1076 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1077 IOVEC_SET_STRING(iovec[n++], "SYSLOG_FACILITY=3");
1078 IOVEC_SET_STRING(iovec[n++], "SYSLOG_IDENTIFIER=systemd-journald");
1079
1080 IOVEC_SET_STRING(iovec[n++], "_TRANSPORT=driver");
1081 assert_cc(6 == LOG_INFO);
1082 IOVEC_SET_STRING(iovec[n++], "PRIORITY=6");
1083
1084 if (!sd_id128_is_null(message_id)) {
1085 snprintf(mid, sizeof(mid), LOG_MESSAGE_ID(message_id));
1086 IOVEC_SET_STRING(iovec[n++], mid);
1087 }
1088
1089 m = n;
1090
1091 va_start(ap, format);
1092 r = log_format_iovec(iovec, ELEMENTSOF(iovec), &n, false, 0, format, ap);
1093 /* Error handling below */
1094 va_end(ap);
1095
1096 ucred.pid = getpid();
1097 ucred.uid = getuid();
1098 ucred.gid = getgid();
1099
1100 if (r >= 0)
1101 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1102
1103 while (m < n)
1104 free(iovec[m++].iov_base);
1105
1106 if (r < 0) {
1107 /* We failed to format the message. Emit a warning instead. */
1108 char buf[LINE_MAX];
1109
1110 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1111
1112 n = 3;
1113 IOVEC_SET_STRING(iovec[n++], "PRIORITY=4");
1114 IOVEC_SET_STRING(iovec[n++], buf);
1115 dispatch_message_real(s, iovec, n, ELEMENTSOF(iovec), &ucred, NULL, NULL, 0, NULL, LOG_INFO, 0);
1116 }
1117 }
1118
1119 void server_dispatch_message(
1120 Server *s,
1121 struct iovec *iovec, unsigned n, unsigned m,
1122 const struct ucred *ucred,
1123 const struct timeval *tv,
1124 const char *label, size_t label_len,
1125 const char *unit_id,
1126 int priority,
1127 pid_t object_pid) {
1128
1129 int rl, r;
1130 _cleanup_free_ char *path = NULL;
1131 uint64_t available = 0;
1132 char *c;
1133
1134 assert(s);
1135 assert(iovec || n == 0);
1136
1137 if (n == 0)
1138 return;
1139
1140 if (LOG_PRI(priority) > s->max_level_store)
1141 return;
1142
1143 /* Stop early in case the information will not be stored
1144 * in a journal. */
1145 if (s->storage == STORAGE_NONE)
1146 return;
1147
1148 if (!ucred)
1149 goto finish;
1150
1151 r = cg_pid_get_path_shifted(ucred->pid, s->cgroup_root, &path);
1152 if (r < 0)
1153 goto finish;
1154
1155 /* example: /user/lennart/3/foobar
1156 * /system/dbus.service/foobar
1157 *
1158 * So let's cut of everything past the third /, since that is
1159 * where user directories start */
1160
1161 c = strchr(path, '/');
1162 if (c) {
1163 c = strchr(c+1, '/');
1164 if (c) {
1165 c = strchr(c+1, '/');
1166 if (c)
1167 *c = 0;
1168 }
1169 }
1170
1171 (void) determine_space(s, &available, NULL);
1172 rl = journal_rate_limit_test(s->rate_limit, path, priority & LOG_PRIMASK, available);
1173 if (rl == 0)
1174 return;
1175
1176 /* Write a suppression message if we suppressed something */
1177 if (rl > 1)
1178 server_driver_message(s, SD_MESSAGE_JOURNAL_DROPPED,
1179 LOG_MESSAGE("Suppressed %u messages from %s", rl - 1, path),
1180 NULL);
1181
1182 finish:
1183 dispatch_message_real(s, iovec, n, m, ucred, tv, label, label_len, unit_id, priority, object_pid);
1184 }
1185
1186 int server_flush_to_var(Server *s) {
1187 sd_id128_t machine;
1188 sd_journal *j = NULL;
1189 char ts[FORMAT_TIMESPAN_MAX];
1190 usec_t start;
1191 unsigned n = 0;
1192 int r;
1193
1194 assert(s);
1195
1196 if (s->storage != STORAGE_AUTO &&
1197 s->storage != STORAGE_PERSISTENT)
1198 return 0;
1199
1200 if (!s->runtime_journal)
1201 return 0;
1202
1203 (void) system_journal_open(s, true);
1204
1205 if (!s->system_journal)
1206 return 0;
1207
1208 log_debug("Flushing to /var...");
1209
1210 start = now(CLOCK_MONOTONIC);
1211
1212 r = sd_id128_get_machine(&machine);
1213 if (r < 0)
1214 return r;
1215
1216 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1217 if (r < 0)
1218 return log_error_errno(r, "Failed to read runtime journal: %m");
1219
1220 sd_journal_set_data_threshold(j, 0);
1221
1222 SD_JOURNAL_FOREACH(j) {
1223 Object *o = NULL;
1224 JournalFile *f;
1225
1226 f = j->current_file;
1227 assert(f && f->current_offset > 0);
1228
1229 n++;
1230
1231 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1232 if (r < 0) {
1233 log_error_errno(r, "Can't read entry: %m");
1234 goto finish;
1235 }
1236
1237 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1238 if (r >= 0)
1239 continue;
1240
1241 if (!shall_try_append_again(s->system_journal, r)) {
1242 log_error_errno(r, "Can't write entry: %m");
1243 goto finish;
1244 }
1245
1246 server_rotate(s);
1247 server_vacuum(s, false);
1248
1249 if (!s->system_journal) {
1250 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1251 r = -EIO;
1252 goto finish;
1253 }
1254
1255 log_debug("Retrying write.");
1256 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset, NULL, NULL, NULL);
1257 if (r < 0) {
1258 log_error_errno(r, "Can't write entry: %m");
1259 goto finish;
1260 }
1261 }
1262
1263 r = 0;
1264
1265 finish:
1266 journal_file_post_change(s->system_journal);
1267
1268 s->runtime_journal = journal_file_close(s->runtime_journal);
1269
1270 if (r >= 0)
1271 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1272
1273 sd_journal_close(j);
1274
1275 server_driver_message(s, SD_ID128_NULL,
1276 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1277 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1278 n),
1279 NULL);
1280
1281 return r;
1282 }
1283
1284 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1285 Server *s = userdata;
1286 struct ucred *ucred = NULL;
1287 struct timeval *tv = NULL;
1288 struct cmsghdr *cmsg;
1289 char *label = NULL;
1290 size_t label_len = 0, m;
1291 struct iovec iovec;
1292 ssize_t n;
1293 int *fds = NULL, v = 0;
1294 unsigned n_fds = 0;
1295
1296 union {
1297 struct cmsghdr cmsghdr;
1298
1299 /* We use NAME_MAX space for the SELinux label
1300 * here. The kernel currently enforces no
1301 * limit, but according to suggestions from
1302 * the SELinux people this will change and it
1303 * will probably be identical to NAME_MAX. For
1304 * now we use that, but this should be updated
1305 * one day when the final limit is known. */
1306 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1307 CMSG_SPACE(sizeof(struct timeval)) +
1308 CMSG_SPACE(sizeof(int)) + /* fd */
1309 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1310 } control = {};
1311
1312 union sockaddr_union sa = {};
1313
1314 struct msghdr msghdr = {
1315 .msg_iov = &iovec,
1316 .msg_iovlen = 1,
1317 .msg_control = &control,
1318 .msg_controllen = sizeof(control),
1319 .msg_name = &sa,
1320 .msg_namelen = sizeof(sa),
1321 };
1322
1323 assert(s);
1324 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1325
1326 if (revents != EPOLLIN) {
1327 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1328 return -EIO;
1329 }
1330
1331 /* Try to get the right size, if we can. (Not all
1332 * sockets support SIOCINQ, hence we just try, but
1333 * don't rely on it. */
1334 (void) ioctl(fd, SIOCINQ, &v);
1335
1336 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1337 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1338 (size_t) LINE_MAX,
1339 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1340
1341 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1342 return log_oom();
1343
1344 iovec.iov_base = s->buffer;
1345 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1346
1347 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1348 if (n < 0) {
1349 if (errno == EINTR || errno == EAGAIN)
1350 return 0;
1351
1352 return log_error_errno(errno, "recvmsg() failed: %m");
1353 }
1354
1355 CMSG_FOREACH(cmsg, &msghdr) {
1356
1357 if (cmsg->cmsg_level == SOL_SOCKET &&
1358 cmsg->cmsg_type == SCM_CREDENTIALS &&
1359 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1360 ucred = (struct ucred*) CMSG_DATA(cmsg);
1361 else if (cmsg->cmsg_level == SOL_SOCKET &&
1362 cmsg->cmsg_type == SCM_SECURITY) {
1363 label = (char*) CMSG_DATA(cmsg);
1364 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1365 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1366 cmsg->cmsg_type == SO_TIMESTAMP &&
1367 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1368 tv = (struct timeval*) CMSG_DATA(cmsg);
1369 else if (cmsg->cmsg_level == SOL_SOCKET &&
1370 cmsg->cmsg_type == SCM_RIGHTS) {
1371 fds = (int*) CMSG_DATA(cmsg);
1372 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1373 }
1374 }
1375
1376 /* And a trailing NUL, just in case */
1377 s->buffer[n] = 0;
1378
1379 if (fd == s->syslog_fd) {
1380 if (n > 0 && n_fds == 0)
1381 server_process_syslog_message(s, strstrip(s->buffer), ucred, tv, label, label_len);
1382 else if (n_fds > 0)
1383 log_warning("Got file descriptors via syslog socket. Ignoring.");
1384
1385 } else if (fd == s->native_fd) {
1386 if (n > 0 && n_fds == 0)
1387 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1388 else if (n == 0 && n_fds == 1)
1389 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1390 else if (n_fds > 0)
1391 log_warning("Got too many file descriptors via native socket. Ignoring.");
1392
1393 } else {
1394 assert(fd == s->audit_fd);
1395
1396 if (n > 0 && n_fds == 0)
1397 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1398 else if (n_fds > 0)
1399 log_warning("Got file descriptors via audit socket. Ignoring.");
1400 }
1401
1402 close_many(fds, n_fds);
1403 return 0;
1404 }
1405
1406 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1407 Server *s = userdata;
1408 int r;
1409
1410 assert(s);
1411
1412 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1413
1414 (void) server_flush_to_var(s);
1415 server_sync(s);
1416 server_vacuum(s, false);
1417
1418 r = touch("/run/systemd/journal/flushed");
1419 if (r < 0)
1420 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1421
1422 server_space_usage_message(s, NULL);
1423 return 0;
1424 }
1425
1426 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1427 Server *s = userdata;
1428 int r;
1429
1430 assert(s);
1431
1432 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1433 server_rotate(s);
1434 server_vacuum(s, true);
1435
1436 if (s->system_journal)
1437 patch_min_use(&s->system_storage);
1438 if (s->runtime_journal)
1439 patch_min_use(&s->runtime_storage);
1440
1441 /* Let clients know when the most recent rotation happened. */
1442 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1443 if (r < 0)
1444 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1445
1446 return 0;
1447 }
1448
1449 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1450 Server *s = userdata;
1451
1452 assert(s);
1453
1454 log_received_signal(LOG_INFO, si);
1455
1456 sd_event_exit(s->event, 0);
1457 return 0;
1458 }
1459
1460 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1461 Server *s = userdata;
1462 int r;
1463
1464 assert(s);
1465
1466 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1467
1468 server_sync(s);
1469
1470 /* Let clients know when the most recent sync happened. */
1471 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1472 if (r < 0)
1473 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1474
1475 return 0;
1476 }
1477
1478 static int setup_signals(Server *s) {
1479 int r;
1480
1481 assert(s);
1482
1483 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1484
1485 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1486 if (r < 0)
1487 return r;
1488
1489 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1490 if (r < 0)
1491 return r;
1492
1493 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1494 if (r < 0)
1495 return r;
1496
1497 /* Let's process SIGTERM late, so that we flush all queued
1498 * messages to disk before we exit */
1499 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1500 if (r < 0)
1501 return r;
1502
1503 /* When journald is invoked on the terminal (when debugging),
1504 * it's useful if C-c is handled equivalent to SIGTERM. */
1505 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1506 if (r < 0)
1507 return r;
1508
1509 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1510 if (r < 0)
1511 return r;
1512
1513 /* SIGRTMIN+1 causes an immediate sync. We process this very
1514 * late, so that everything else queued at this point is
1515 * really written to disk. Clients can watch
1516 * /run/systemd/journal/synced with inotify until its mtime
1517 * changes to see when a sync happened. */
1518 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1519 if (r < 0)
1520 return r;
1521
1522 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1523 if (r < 0)
1524 return r;
1525
1526 return 0;
1527 }
1528
1529 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1530 Server *s = data;
1531 int r;
1532
1533 assert(s);
1534
1535 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1536
1537 r = value ? parse_boolean(value) : true;
1538 if (r < 0)
1539 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1540 else
1541 s->forward_to_syslog = r;
1542
1543 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1544
1545 r = value ? parse_boolean(value) : true;
1546 if (r < 0)
1547 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1548 else
1549 s->forward_to_kmsg = r;
1550
1551 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1552
1553 r = value ? parse_boolean(value) : true;
1554 if (r < 0)
1555 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1556 else
1557 s->forward_to_console = r;
1558
1559 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1560
1561 r = value ? parse_boolean(value) : true;
1562 if (r < 0)
1563 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1564 else
1565 s->forward_to_wall = r;
1566
1567 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1568
1569 if (proc_cmdline_value_missing(key, value))
1570 return 0;
1571
1572 r = log_level_from_string(value);
1573 if (r < 0)
1574 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1575 else
1576 s->max_level_console = r;
1577
1578 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1579
1580 if (proc_cmdline_value_missing(key, value))
1581 return 0;
1582
1583 r = log_level_from_string(value);
1584 if (r < 0)
1585 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1586 else
1587 s->max_level_store = r;
1588
1589 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1590
1591 if (proc_cmdline_value_missing(key, value))
1592 return 0;
1593
1594 r = log_level_from_string(value);
1595 if (r < 0)
1596 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1597 else
1598 s->max_level_syslog = r;
1599
1600 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1601
1602 if (proc_cmdline_value_missing(key, value))
1603 return 0;
1604
1605 r = log_level_from_string(value);
1606 if (r < 0)
1607 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1608 else
1609 s->max_level_kmsg = r;
1610
1611 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1612
1613 if (proc_cmdline_value_missing(key, value))
1614 return 0;
1615
1616 r = log_level_from_string(value);
1617 if (r < 0)
1618 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1619 else
1620 s->max_level_wall = r;
1621
1622 } else if (startswith(key, "systemd.journald"))
1623 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1624
1625 /* do not warn about state here, since probably systemd already did */
1626 return 0;
1627 }
1628
1629 static int server_parse_config_file(Server *s) {
1630 assert(s);
1631
1632 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1633 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1634 "Journal\0",
1635 config_item_perf_lookup, journald_gperf_lookup,
1636 false, s);
1637 }
1638
1639 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1640 Server *s = userdata;
1641
1642 assert(s);
1643
1644 server_sync(s);
1645 return 0;
1646 }
1647
1648 int server_schedule_sync(Server *s, int priority) {
1649 int r;
1650
1651 assert(s);
1652
1653 if (priority <= LOG_CRIT) {
1654 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1655 server_sync(s);
1656 return 0;
1657 }
1658
1659 if (s->sync_scheduled)
1660 return 0;
1661
1662 if (s->sync_interval_usec > 0) {
1663 usec_t when;
1664
1665 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1666 if (r < 0)
1667 return r;
1668
1669 when += s->sync_interval_usec;
1670
1671 if (!s->sync_event_source) {
1672 r = sd_event_add_time(
1673 s->event,
1674 &s->sync_event_source,
1675 CLOCK_MONOTONIC,
1676 when, 0,
1677 server_dispatch_sync, s);
1678 if (r < 0)
1679 return r;
1680
1681 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1682 } else {
1683 r = sd_event_source_set_time(s->sync_event_source, when);
1684 if (r < 0)
1685 return r;
1686
1687 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1688 }
1689 if (r < 0)
1690 return r;
1691
1692 s->sync_scheduled = true;
1693 }
1694
1695 return 0;
1696 }
1697
1698 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1699 Server *s = userdata;
1700
1701 assert(s);
1702
1703 server_cache_hostname(s);
1704 return 0;
1705 }
1706
1707 static int server_open_hostname(Server *s) {
1708 int r;
1709
1710 assert(s);
1711
1712 s->hostname_fd = open("/proc/sys/kernel/hostname", O_RDONLY|O_CLOEXEC|O_NDELAY|O_NOCTTY);
1713 if (s->hostname_fd < 0)
1714 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1715
1716 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1717 if (r < 0) {
1718 /* kernels prior to 3.2 don't support polling this file. Ignore
1719 * the failure. */
1720 if (r == -EPERM) {
1721 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1722 s->hostname_fd = safe_close(s->hostname_fd);
1723 return 0;
1724 }
1725
1726 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1727 }
1728
1729 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1730 if (r < 0)
1731 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1732
1733 return 0;
1734 }
1735
1736 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1737 Server *s = userdata;
1738 int r;
1739
1740 assert(s);
1741 assert(s->notify_event_source == es);
1742 assert(s->notify_fd == fd);
1743
1744 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1745 * message on it. Either it's the watchdog event, the initial
1746 * READY=1 event or an stdout stream event. If there's nothing
1747 * to write anymore, turn our event source off. The next time
1748 * there's something to send it will be turned on again. */
1749
1750 if (!s->sent_notify_ready) {
1751 static const char p[] =
1752 "READY=1\n"
1753 "STATUS=Processing requests...";
1754 ssize_t l;
1755
1756 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1757 if (l < 0) {
1758 if (errno == EAGAIN)
1759 return 0;
1760
1761 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1762 }
1763
1764 s->sent_notify_ready = true;
1765 log_debug("Sent READY=1 notification.");
1766
1767 } else if (s->send_watchdog) {
1768
1769 static const char p[] =
1770 "WATCHDOG=1";
1771
1772 ssize_t l;
1773
1774 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1775 if (l < 0) {
1776 if (errno == EAGAIN)
1777 return 0;
1778
1779 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1780 }
1781
1782 s->send_watchdog = false;
1783 log_debug("Sent WATCHDOG=1 notification.");
1784
1785 } else if (s->stdout_streams_notify_queue)
1786 /* Dispatch one stream notification event */
1787 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1788
1789 /* Leave us enabled if there's still more to do. */
1790 if (s->send_watchdog || s->stdout_streams_notify_queue)
1791 return 0;
1792
1793 /* There was nothing to do anymore, let's turn ourselves off. */
1794 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1795 if (r < 0)
1796 return log_error_errno(r, "Failed to turn off notify event source: %m");
1797
1798 return 0;
1799 }
1800
1801 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1802 Server *s = userdata;
1803 int r;
1804
1805 assert(s);
1806
1807 s->send_watchdog = true;
1808
1809 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1810 if (r < 0)
1811 log_warning_errno(r, "Failed to turn on notify event source: %m");
1812
1813 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1814 if (r < 0)
1815 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1816
1817 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1818 if (r < 0)
1819 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1820
1821 return 0;
1822 }
1823
1824 static int server_connect_notify(Server *s) {
1825 union sockaddr_union sa = {
1826 .un.sun_family = AF_UNIX,
1827 };
1828 const char *e;
1829 int r;
1830
1831 assert(s);
1832 assert(s->notify_fd < 0);
1833 assert(!s->notify_event_source);
1834
1835 /*
1836 So here's the problem: we'd like to send notification
1837 messages to PID 1, but we cannot do that via sd_notify(),
1838 since that's synchronous, and we might end up blocking on
1839 it. Specifically: given that PID 1 might block on
1840 dbus-daemon during IPC, and dbus-daemon is logging to us,
1841 and might hence block on us, we might end up in a deadlock
1842 if we block on sending PID 1 notification messages — by
1843 generating a full blocking circle. To avoid this, let's
1844 create a non-blocking socket, and connect it to the
1845 notification socket, and then wait for POLLOUT before we
1846 send anything. This should efficiently avoid any deadlocks,
1847 as we'll never block on PID 1, hence PID 1 can safely block
1848 on dbus-daemon which can safely block on us again.
1849
1850 Don't think that this issue is real? It is, see:
1851 https://github.com/systemd/systemd/issues/1505
1852 */
1853
1854 e = getenv("NOTIFY_SOCKET");
1855 if (!e)
1856 return 0;
1857
1858 if ((e[0] != '@' && e[0] != '/') || e[1] == 0) {
1859 log_error("NOTIFY_SOCKET set to an invalid value: %s", e);
1860 return -EINVAL;
1861 }
1862
1863 if (strlen(e) > sizeof(sa.un.sun_path)) {
1864 log_error("NOTIFY_SOCKET path too long: %s", e);
1865 return -EINVAL;
1866 }
1867
1868 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1869 if (s->notify_fd < 0)
1870 return log_error_errno(errno, "Failed to create notify socket: %m");
1871
1872 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1873
1874 strncpy(sa.un.sun_path, e, sizeof(sa.un.sun_path));
1875 if (sa.un.sun_path[0] == '@')
1876 sa.un.sun_path[0] = 0;
1877
1878 r = connect(s->notify_fd, &sa.sa, SOCKADDR_UN_LEN(sa.un));
1879 if (r < 0)
1880 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1881
1882 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1883 if (r < 0)
1884 return log_error_errno(r, "Failed to watch notification socket: %m");
1885
1886 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1887 s->send_watchdog = true;
1888
1889 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1890 if (r < 0)
1891 return log_error_errno(r, "Failed to add watchdog time event: %m");
1892 }
1893
1894 /* This should fire pretty soon, which we'll use to send the
1895 * READY=1 event. */
1896
1897 return 0;
1898 }
1899
1900 int server_init(Server *s) {
1901 _cleanup_fdset_free_ FDSet *fds = NULL;
1902 int n, r, fd;
1903 bool no_sockets;
1904
1905 assert(s);
1906
1907 zero(*s);
1908 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1909 s->compress = true;
1910 s->seal = true;
1911
1912 s->watchdog_usec = USEC_INFINITY;
1913
1914 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1915 s->sync_scheduled = false;
1916
1917 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1918 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1919
1920 s->forward_to_wall = true;
1921
1922 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1923
1924 s->max_level_store = LOG_DEBUG;
1925 s->max_level_syslog = LOG_DEBUG;
1926 s->max_level_kmsg = LOG_NOTICE;
1927 s->max_level_console = LOG_INFO;
1928 s->max_level_wall = LOG_EMERG;
1929
1930 journal_reset_metrics(&s->system_storage.metrics);
1931 journal_reset_metrics(&s->runtime_storage.metrics);
1932
1933 server_parse_config_file(s);
1934
1935 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1936 if (r < 0)
1937 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1938
1939 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1940 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1941 s->rate_limit_interval, s->rate_limit_burst);
1942 s->rate_limit_interval = s->rate_limit_burst = 0;
1943 }
1944
1945 (void) mkdir_p("/run/systemd/journal", 0755);
1946
1947 s->user_journals = ordered_hashmap_new(NULL);
1948 if (!s->user_journals)
1949 return log_oom();
1950
1951 s->mmap = mmap_cache_new();
1952 if (!s->mmap)
1953 return log_oom();
1954
1955 s->deferred_closes = set_new(NULL);
1956 if (!s->deferred_closes)
1957 return log_oom();
1958
1959 r = sd_event_default(&s->event);
1960 if (r < 0)
1961 return log_error_errno(r, "Failed to create event loop: %m");
1962
1963 n = sd_listen_fds(true);
1964 if (n < 0)
1965 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1966
1967 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1968
1969 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1970
1971 if (s->native_fd >= 0) {
1972 log_error("Too many native sockets passed.");
1973 return -EINVAL;
1974 }
1975
1976 s->native_fd = fd;
1977
1978 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1979
1980 if (s->stdout_fd >= 0) {
1981 log_error("Too many stdout sockets passed.");
1982 return -EINVAL;
1983 }
1984
1985 s->stdout_fd = fd;
1986
1987 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1988 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1989
1990 if (s->syslog_fd >= 0) {
1991 log_error("Too many /dev/log sockets passed.");
1992 return -EINVAL;
1993 }
1994
1995 s->syslog_fd = fd;
1996
1997 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1998
1999 if (s->audit_fd >= 0) {
2000 log_error("Too many audit sockets passed.");
2001 return -EINVAL;
2002 }
2003
2004 s->audit_fd = fd;
2005
2006 } else {
2007
2008 if (!fds) {
2009 fds = fdset_new();
2010 if (!fds)
2011 return log_oom();
2012 }
2013
2014 r = fdset_put(fds, fd);
2015 if (r < 0)
2016 return log_oom();
2017 }
2018 }
2019
2020 /* Try to restore streams, but don't bother if this fails */
2021 (void) server_restore_streams(s, fds);
2022
2023 if (fdset_size(fds) > 0) {
2024 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
2025 fds = fdset_free(fds);
2026 }
2027
2028 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
2029
2030 /* always open stdout, syslog, native, and kmsg sockets */
2031
2032 /* systemd-journald.socket: /run/systemd/journal/stdout */
2033 r = server_open_stdout_socket(s);
2034 if (r < 0)
2035 return r;
2036
2037 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
2038 r = server_open_syslog_socket(s);
2039 if (r < 0)
2040 return r;
2041
2042 /* systemd-journald.socket: /run/systemd/journal/socket */
2043 r = server_open_native_socket(s);
2044 if (r < 0)
2045 return r;
2046
2047 /* /dev/ksmg */
2048 r = server_open_dev_kmsg(s);
2049 if (r < 0)
2050 return r;
2051
2052 /* Unless we got *some* sockets and not audit, open audit socket */
2053 if (s->audit_fd >= 0 || no_sockets) {
2054 r = server_open_audit(s);
2055 if (r < 0)
2056 return r;
2057 }
2058
2059 r = server_open_kernel_seqnum(s);
2060 if (r < 0)
2061 return r;
2062
2063 r = server_open_hostname(s);
2064 if (r < 0)
2065 return r;
2066
2067 r = setup_signals(s);
2068 if (r < 0)
2069 return r;
2070
2071 s->udev = udev_new();
2072 if (!s->udev)
2073 return -ENOMEM;
2074
2075 s->rate_limit = journal_rate_limit_new(s->rate_limit_interval, s->rate_limit_burst);
2076 if (!s->rate_limit)
2077 return -ENOMEM;
2078
2079 r = cg_get_root_path(&s->cgroup_root);
2080 if (r < 0)
2081 return r;
2082
2083 server_cache_hostname(s);
2084 server_cache_boot_id(s);
2085 server_cache_machine_id(s);
2086
2087 s->runtime_storage.name = "Runtime journal";
2088 s->system_storage.name = "System journal";
2089
2090 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2091 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2092 if (!s->runtime_storage.path || !s->system_storage.path)
2093 return -ENOMEM;
2094
2095 (void) server_connect_notify(s);
2096
2097 return system_journal_open(s, false);
2098 }
2099
2100 void server_maybe_append_tags(Server *s) {
2101 #ifdef HAVE_GCRYPT
2102 JournalFile *f;
2103 Iterator i;
2104 usec_t n;
2105
2106 n = now(CLOCK_REALTIME);
2107
2108 if (s->system_journal)
2109 journal_file_maybe_append_tag(s->system_journal, n);
2110
2111 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2112 journal_file_maybe_append_tag(f, n);
2113 #endif
2114 }
2115
2116 void server_done(Server *s) {
2117 JournalFile *f;
2118 assert(s);
2119
2120 if (s->deferred_closes) {
2121 journal_file_close_set(s->deferred_closes);
2122 set_free(s->deferred_closes);
2123 }
2124
2125 while (s->stdout_streams)
2126 stdout_stream_free(s->stdout_streams);
2127
2128 if (s->system_journal)
2129 (void) journal_file_close(s->system_journal);
2130
2131 if (s->runtime_journal)
2132 (void) journal_file_close(s->runtime_journal);
2133
2134 while ((f = ordered_hashmap_steal_first(s->user_journals)))
2135 (void) journal_file_close(f);
2136
2137 ordered_hashmap_free(s->user_journals);
2138
2139 sd_event_source_unref(s->syslog_event_source);
2140 sd_event_source_unref(s->native_event_source);
2141 sd_event_source_unref(s->stdout_event_source);
2142 sd_event_source_unref(s->dev_kmsg_event_source);
2143 sd_event_source_unref(s->audit_event_source);
2144 sd_event_source_unref(s->sync_event_source);
2145 sd_event_source_unref(s->sigusr1_event_source);
2146 sd_event_source_unref(s->sigusr2_event_source);
2147 sd_event_source_unref(s->sigterm_event_source);
2148 sd_event_source_unref(s->sigint_event_source);
2149 sd_event_source_unref(s->sigrtmin1_event_source);
2150 sd_event_source_unref(s->hostname_event_source);
2151 sd_event_source_unref(s->notify_event_source);
2152 sd_event_source_unref(s->watchdog_event_source);
2153 sd_event_unref(s->event);
2154
2155 safe_close(s->syslog_fd);
2156 safe_close(s->native_fd);
2157 safe_close(s->stdout_fd);
2158 safe_close(s->dev_kmsg_fd);
2159 safe_close(s->audit_fd);
2160 safe_close(s->hostname_fd);
2161 safe_close(s->notify_fd);
2162
2163 if (s->rate_limit)
2164 journal_rate_limit_free(s->rate_limit);
2165
2166 if (s->kernel_seqnum)
2167 munmap(s->kernel_seqnum, sizeof(uint64_t));
2168
2169 free(s->buffer);
2170 free(s->tty_path);
2171 free(s->cgroup_root);
2172 free(s->hostname_field);
2173
2174 if (s->mmap)
2175 mmap_cache_unref(s->mmap);
2176
2177 udev_unref(s->udev);
2178 }
2179
2180 static const char* const storage_table[_STORAGE_MAX] = {
2181 [STORAGE_AUTO] = "auto",
2182 [STORAGE_VOLATILE] = "volatile",
2183 [STORAGE_PERSISTENT] = "persistent",
2184 [STORAGE_NONE] = "none"
2185 };
2186
2187 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2188 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2189
2190 static const char* const split_mode_table[_SPLIT_MAX] = {
2191 [SPLIT_LOGIN] = "login",
2192 [SPLIT_UID] = "uid",
2193 [SPLIT_NONE] = "none",
2194 };
2195
2196 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2197 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");