]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
journald: remove unnecessary {}
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if HAVE_SELINUX
4 #include <selinux/selinux.h>
5 #endif
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
11
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
15
16 #include "acl-util.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "format-util.h"
26 #include "fs-util.h"
27 #include "hashmap.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
30 #include "io-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
43 #include "log.h"
44 #include "missing.h"
45 #include "mkdir.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
49 #include "rm-rf.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
58
59 #define USER_JOURNALS_MAX 1024
60
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
65
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
67
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
69
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
72
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
76
77 #define DEFERRED_CLOSES_MAX (4096)
78
79 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
80 _cleanup_closedir_ DIR *d = NULL;
81 struct dirent *de;
82 struct statvfs ss;
83
84 assert(ret_used);
85 assert(ret_free);
86
87 d = opendir(path);
88 if (!d)
89 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
90 errno, "Failed to open %s: %m", path);
91
92 if (fstatvfs(dirfd(d), &ss) < 0)
93 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
94
95 *ret_free = ss.f_bsize * ss.f_bavail;
96 *ret_used = 0;
97 FOREACH_DIRENT_ALL(de, d, break) {
98 struct stat st;
99
100 if (!endswith(de->d_name, ".journal") &&
101 !endswith(de->d_name, ".journal~"))
102 continue;
103
104 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
105 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
106 continue;
107 }
108
109 if (!S_ISREG(st.st_mode))
110 continue;
111
112 *ret_used += (uint64_t) st.st_blocks * 512UL;
113 }
114
115 return 0;
116 }
117
118 static void cache_space_invalidate(JournalStorageSpace *space) {
119 zero(*space);
120 }
121
122 static int cache_space_refresh(Server *s, JournalStorage *storage) {
123 JournalStorageSpace *space;
124 JournalMetrics *metrics;
125 uint64_t vfs_used, vfs_avail, avail;
126 usec_t ts;
127 int r;
128
129 assert(s);
130
131 metrics = &storage->metrics;
132 space = &storage->space;
133
134 ts = now(CLOCK_MONOTONIC);
135
136 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
137 return 0;
138
139 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
140 if (r < 0)
141 return r;
142
143 space->vfs_used = vfs_used;
144 space->vfs_available = vfs_avail;
145
146 avail = LESS_BY(vfs_avail, metrics->keep_free);
147
148 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
149 space->available = LESS_BY(space->limit, vfs_used);
150 space->timestamp = ts;
151 return 1;
152 }
153
154 static void patch_min_use(JournalStorage *storage) {
155 assert(storage);
156
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
162
163 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
164 }
165
166 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
167 JournalStorage *js;
168 int r;
169
170 assert(s);
171
172 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
173
174 r = cache_space_refresh(s, js);
175 if (r >= 0) {
176 if (available)
177 *available = js->space.available;
178 if (limit)
179 *limit = js->space.limit;
180 }
181 return r;
182 }
183
184 void server_space_usage_message(Server *s, JournalStorage *storage) {
185 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
186 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
187 JournalMetrics *metrics;
188
189 assert(s);
190
191 if (!storage)
192 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
193
194 if (cache_space_refresh(s, storage) < 0)
195 return;
196
197 metrics = &storage->metrics;
198 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
199 format_bytes(fb2, sizeof(fb2), metrics->max_use);
200 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
201 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
202 format_bytes(fb5, sizeof(fb5), storage->space.limit);
203 format_bytes(fb6, sizeof(fb6), storage->space.available);
204
205 server_driver_message(s, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage->name, storage->path, fb1, fb5, fb6),
209 "JOURNAL_NAME=%s", storage->name,
210 "JOURNAL_PATH=%s", storage->path,
211 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
212 "CURRENT_USE_PRETTY=%s", fb1,
213 "MAX_USE=%"PRIu64, metrics->max_use,
214 "MAX_USE_PRETTY=%s", fb2,
215 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3,
217 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
218 "DISK_AVAILABLE_PRETTY=%s", fb4,
219 "LIMIT=%"PRIu64, storage->space.limit,
220 "LIMIT_PRETTY=%s", fb5,
221 "AVAILABLE=%"PRIu64, storage->space.available,
222 "AVAILABLE_PRETTY=%s", fb6,
223 NULL);
224 }
225
226 static bool uid_for_system_journal(uid_t uid) {
227
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
229
230 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
231 }
232
233 static void server_add_acls(JournalFile *f, uid_t uid) {
234 #if HAVE_ACL
235 int r;
236 #endif
237 assert(f);
238
239 #if HAVE_ACL
240 if (uid_for_system_journal(uid))
241 return;
242
243 r = add_acls_for_user(f->fd, uid);
244 if (r < 0)
245 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
246 #endif
247 }
248
249 static int open_journal(
250 Server *s,
251 bool reliably,
252 const char *fname,
253 int flags,
254 bool seal,
255 JournalMetrics *metrics,
256 JournalFile **ret) {
257
258 JournalFile *f;
259 int r;
260
261 assert(s);
262 assert(fname);
263 assert(ret);
264
265 if (reliably)
266 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
267 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
268 else
269 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
270 metrics, s->mmap, s->deferred_closes, NULL, &f);
271
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK) >= 0;
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 const char *fn;
291 int r = 0;
292
293 if (!s->system_journal &&
294 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
295 (flush_requested || flushed_flag_is_set())) {
296
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
299 *
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
302
303 if (s->storage == STORAGE_PERSISTENT)
304 (void) mkdir_p("/var/log/journal/", 0755);
305
306 (void) mkdir(s->system_storage.path, 0755);
307
308 fn = strjoina(s->system_storage.path, "/system.journal");
309 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
310 if (r >= 0) {
311 server_add_acls(s->system_journal, 0);
312 (void) cache_space_refresh(s, &s->system_storage);
313 patch_min_use(&s->system_storage);
314 } else {
315 if (!IN_SET(r, -ENOENT, -EROFS))
316 log_warning_errno(r, "Failed to open system journal: %m");
317
318 r = 0;
319 }
320
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
324 *
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
327 */
328 if (!flush_requested)
329 (void) server_flush_to_var(s, true);
330 }
331
332 if (!s->runtime_journal &&
333 (s->storage != STORAGE_NONE)) {
334
335 fn = strjoina(s->runtime_storage.path, "/system.journal");
336
337 if (s->system_journal) {
338
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
342
343 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
344 if (r < 0) {
345 if (r != -ENOENT)
346 log_warning_errno(r, "Failed to open runtime journal: %m");
347
348 r = 0;
349 }
350
351 } else {
352
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
355
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn, 0750);
359
360 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
361 if (r < 0)
362 return log_error_errno(r, "Failed to open runtime journal: %m");
363 }
364
365 if (s->runtime_journal) {
366 server_add_acls(s->runtime_journal, 0);
367 (void) cache_space_refresh(s, &s->runtime_storage);
368 patch_min_use(&s->runtime_storage);
369 }
370 }
371
372 return r;
373 }
374
375 static JournalFile* find_journal(Server *s, uid_t uid) {
376 _cleanup_free_ char *p = NULL;
377 int r;
378 JournalFile *f;
379 sd_id128_t machine;
380
381 assert(s);
382
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
386 *
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
390 *
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s, false);
393
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
398
399 if (s->runtime_journal)
400 return s->runtime_journal;
401
402 if (uid_for_system_journal(uid))
403 return s->system_journal;
404
405 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
406 if (f)
407 return f;
408
409 r = sd_id128_get_machine(&machine);
410 if (r < 0) {
411 log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
412 return s->system_journal;
413 }
414
415 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
416 SD_ID128_FORMAT_VAL(machine), uid) < 0) {
417 log_oom();
418 return s->system_journal;
419 }
420
421 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
422 /* Too many open? Then let's close one */
423 f = ordered_hashmap_steal_first(s->user_journals);
424 assert(f);
425 (void) journal_file_close(f);
426 }
427
428 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
429 if (r < 0)
430 return s->system_journal;
431
432 server_add_acls(f, uid);
433
434 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
435 if (r < 0) {
436 (void) journal_file_close(f);
437 return s->system_journal;
438 }
439
440 return f;
441 }
442
443 static int do_rotate(
444 Server *s,
445 JournalFile **f,
446 const char* name,
447 bool seal,
448 uint32_t uid) {
449
450 int r;
451 assert(s);
452
453 if (!*f)
454 return -EINVAL;
455
456 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
457 if (r < 0) {
458 if (*f)
459 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
460 else
461 return log_error_errno(r, "Failed to create new %s journal: %m", name);
462 }
463
464 server_add_acls(*f, uid);
465
466 return r;
467 }
468
469 static void server_process_deferred_closes(Server *s) {
470 JournalFile *f;
471 Iterator i;
472
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f, s->deferred_closes, i) {
475 if (journal_file_is_offlining(f))
476 continue;
477
478 (void) set_remove(s->deferred_closes, f);
479 (void) journal_file_close(f);
480 }
481 }
482
483 static void server_vacuum_deferred_closes(Server *s) {
484 assert(s);
485
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
488 return;
489
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s);
492
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
495 JournalFile *f;
496
497 assert_se(f = set_steal_first(s->deferred_closes));
498 journal_file_close(f);
499 }
500 }
501
502 static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
503 _cleanup_closedir_ DIR *dir = NULL;
504 _cleanup_free_ char *path = NULL;
505 sd_id128_t machine;
506 int r;
507
508 assert(s);
509
510 r = sd_id128_get_machine(&machine);
511 if (r < 0)
512 return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
513
514 if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
515 return log_oom();
516
517 dir = opendir(path);
518 if (!dir)
519 return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
520
521 if (ret_dir)
522 *ret_dir = TAKE_PTR(dir);
523 if (ret_path)
524 *ret_path = TAKE_PTR(path);
525
526 return 0;
527 }
528
529 void server_rotate(Server *s) {
530 _cleanup_free_ char *path = NULL;
531 _cleanup_closedir_ DIR *d = NULL;
532 JournalFile *f;
533 Iterator i;
534 void *k;
535 int r;
536
537 log_debug("Rotating...");
538
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
541 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
542
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
545 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
546 if (r >= 0)
547 ordered_hashmap_replace(s->user_journals, k, f);
548 else if (!f)
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s->user_journals, k);
551 }
552
553 /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
554 * access to /var, i.e. are not in the log-to-runtime-journal mode). */
555 if (!s->runtime_journal &&
556 open_user_journal_directory(s, &d, &path) >= 0) {
557
558 struct dirent *de;
559
560 FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
561 _cleanup_free_ char *u = NULL, *full = NULL;
562 _cleanup_close_ int fd = -1;
563 const char *a, *b;
564 uid_t uid;
565
566 a = startswith(de->d_name, "user-");
567 if (!a)
568 continue;
569 b = endswith(de->d_name, ".journal");
570 if (!b)
571 continue;
572
573 u = strndup(a, b-a);
574 if (!u) {
575 log_oom();
576 break;
577 }
578
579 r = parse_uid(u, &uid);
580 if (r < 0) {
581 log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
582 continue;
583 }
584
585 /* Already rotated in the above loop? i.e. is it an open user journal? */
586 if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
587 continue;
588
589 full = strjoin(path, de->d_name);
590 if (!full) {
591 log_oom();
592 break;
593 }
594
595 fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
596 if (fd < 0) {
597 log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
598 "Failed to open journal file '%s' for rotation: %m", full);
599 continue;
600 }
601
602 /* Make some room in the set of deferred close()s */
603 server_vacuum_deferred_closes(s);
604
605 /* Open the file briefly, so that we can archive it */
606 r = journal_file_open(fd,
607 full,
608 O_RDWR,
609 0640,
610 s->compress.enabled,
611 s->compress.threshold_bytes,
612 s->seal,
613 &s->system_storage.metrics,
614 s->mmap,
615 s->deferred_closes,
616 NULL,
617 &f);
618 if (r < 0) {
619 log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
620
621 r = journal_file_dispose(dirfd(d), de->d_name);
622 if (r < 0)
623 log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
624 else
625 log_debug("Successfully moved %s out of the way.", full);
626
627 continue;
628 }
629
630 TAKE_FD(fd); /* Donated to journal_file_open() */
631
632 r = journal_file_archive(f);
633 if (r < 0)
634 log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
635
636 f = journal_initiate_close(f, s->deferred_closes);
637 }
638 }
639
640 server_process_deferred_closes(s);
641 }
642
643 void server_sync(Server *s) {
644 JournalFile *f;
645 Iterator i;
646 int r;
647
648 if (s->system_journal) {
649 r = journal_file_set_offline(s->system_journal, false);
650 if (r < 0)
651 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
652 }
653
654 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
655 r = journal_file_set_offline(f, false);
656 if (r < 0)
657 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
658 }
659
660 if (s->sync_event_source) {
661 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
662 if (r < 0)
663 log_error_errno(r, "Failed to disable sync timer source: %m");
664 }
665
666 s->sync_scheduled = false;
667 }
668
669 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
670
671 int r;
672
673 assert(s);
674 assert(storage);
675
676 (void) cache_space_refresh(s, storage);
677
678 if (verbose)
679 server_space_usage_message(s, storage);
680
681 r = journal_directory_vacuum(storage->path, storage->space.limit,
682 storage->metrics.n_max_files, s->max_retention_usec,
683 &s->oldest_file_usec, verbose);
684 if (r < 0 && r != -ENOENT)
685 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
686
687 cache_space_invalidate(&storage->space);
688 }
689
690 int server_vacuum(Server *s, bool verbose) {
691 assert(s);
692
693 log_debug("Vacuuming...");
694
695 s->oldest_file_usec = 0;
696
697 if (s->system_journal)
698 do_vacuum(s, &s->system_storage, verbose);
699 if (s->runtime_journal)
700 do_vacuum(s, &s->runtime_storage, verbose);
701
702 return 0;
703 }
704
705 static void server_cache_machine_id(Server *s) {
706 sd_id128_t id;
707 int r;
708
709 assert(s);
710
711 r = sd_id128_get_machine(&id);
712 if (r < 0)
713 return;
714
715 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
716 }
717
718 static void server_cache_boot_id(Server *s) {
719 sd_id128_t id;
720 int r;
721
722 assert(s);
723
724 r = sd_id128_get_boot(&id);
725 if (r < 0)
726 return;
727
728 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
729 }
730
731 static void server_cache_hostname(Server *s) {
732 _cleanup_free_ char *t = NULL;
733 char *x;
734
735 assert(s);
736
737 t = gethostname_malloc();
738 if (!t)
739 return;
740
741 x = strappend("_HOSTNAME=", t);
742 if (!x)
743 return;
744
745 free(s->hostname_field);
746 s->hostname_field = x;
747 }
748
749 static bool shall_try_append_again(JournalFile *f, int r) {
750 switch(r) {
751
752 case -E2BIG: /* Hit configured limit */
753 case -EFBIG: /* Hit fs limit */
754 case -EDQUOT: /* Quota limit hit */
755 case -ENOSPC: /* Disk full */
756 log_debug("%s: Allocation limit reached, rotating.", f->path);
757 return true;
758
759 case -EIO: /* I/O error of some kind (mmap) */
760 log_warning("%s: IO error, rotating.", f->path);
761 return true;
762
763 case -EHOSTDOWN: /* Other machine */
764 log_info("%s: Journal file from other machine, rotating.", f->path);
765 return true;
766
767 case -EBUSY: /* Unclean shutdown */
768 log_info("%s: Unclean shutdown, rotating.", f->path);
769 return true;
770
771 case -EPROTONOSUPPORT: /* Unsupported feature */
772 log_info("%s: Unsupported feature, rotating.", f->path);
773 return true;
774
775 case -EBADMSG: /* Corrupted */
776 case -ENODATA: /* Truncated */
777 case -ESHUTDOWN: /* Already archived */
778 log_warning("%s: Journal file corrupted, rotating.", f->path);
779 return true;
780
781 case -EIDRM: /* Journal file has been deleted */
782 log_warning("%s: Journal file has been deleted, rotating.", f->path);
783 return true;
784
785 case -ETXTBSY: /* Journal file is from the future */
786 log_warning("%s: Journal file is from the future, rotating.", f->path);
787 return true;
788
789 default:
790 return false;
791 }
792 }
793
794 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
795 bool vacuumed = false, rotate = false;
796 struct dual_timestamp ts;
797 JournalFile *f;
798 int r;
799
800 assert(s);
801 assert(iovec);
802 assert(n > 0);
803
804 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
805 * the source time, and not even the time the event was originally seen, but instead simply the time we started
806 * processing it, as we want strictly linear ordering in what we write out.) */
807 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
808 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
809
810 if (ts.realtime < s->last_realtime_clock) {
811 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
812 * regular operation. However, when it does happen, then we should make sure that we start fresh files
813 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
814 * bisection works correctly. */
815
816 log_debug("Time jumped backwards, rotating.");
817 rotate = true;
818 } else {
819
820 f = find_journal(s, uid);
821 if (!f)
822 return;
823
824 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
825 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
826 rotate = true;
827 }
828 }
829
830 if (rotate) {
831 server_rotate(s);
832 server_vacuum(s, false);
833 vacuumed = true;
834
835 f = find_journal(s, uid);
836 if (!f)
837 return;
838 }
839
840 s->last_realtime_clock = ts.realtime;
841
842 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
843 if (r >= 0) {
844 server_schedule_sync(s, priority);
845 return;
846 }
847
848 if (vacuumed || !shall_try_append_again(f, r)) {
849 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
850 return;
851 }
852
853 server_rotate(s);
854 server_vacuum(s, false);
855
856 f = find_journal(s, uid);
857 if (!f)
858 return;
859
860 log_debug("Retrying write.");
861 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
862 if (r < 0)
863 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
864 else
865 server_schedule_sync(s, priority);
866 }
867
868 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
869 if (isset(value)) { \
870 char *k; \
871 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
872 sprintf(k, field "=" format, value); \
873 iovec[n++] = IOVEC_MAKE_STRING(k); \
874 }
875
876 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
877 if (!isempty(value)) { \
878 char *k; \
879 k = strjoina(field "=", value); \
880 iovec[n++] = IOVEC_MAKE_STRING(k); \
881 }
882
883 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
884 if (!sd_id128_is_null(value)) { \
885 char *k; \
886 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
887 sd_id128_to_string(value, stpcpy(k, field "=")); \
888 iovec[n++] = IOVEC_MAKE_STRING(k); \
889 }
890
891 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
892 if (value_size > 0) { \
893 char *k; \
894 k = newa(char, STRLEN(field "=") + value_size + 1); \
895 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
896 iovec[n++] = IOVEC_MAKE_STRING(k); \
897 } \
898
899 static void dispatch_message_real(
900 Server *s,
901 struct iovec *iovec, size_t n, size_t m,
902 const ClientContext *c,
903 const struct timeval *tv,
904 int priority,
905 pid_t object_pid) {
906
907 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
908 uid_t journal_uid;
909 ClientContext *o;
910
911 assert(s);
912 assert(iovec);
913 assert(n > 0);
914 assert(n +
915 N_IOVEC_META_FIELDS +
916 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
917 client_context_extra_fields_n_iovec(c) <= m);
918
919 if (c) {
920 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
921 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
922 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
923
924 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
925 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
926 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
927 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
928
929 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
930
931 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
932 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
933
934 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
935 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
936 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
937 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
938 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
939 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
940 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
941
942 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
943
944 if (c->extra_fields_n_iovec > 0) {
945 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
946 n += c->extra_fields_n_iovec;
947 }
948 }
949
950 assert(n <= m);
951
952 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
953
954 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
955 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
956 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
957
958 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
959 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
960 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
961 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
962
963 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
964
965 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
966 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
967
968 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
969 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
970 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
971 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
972 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
973 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
974 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
975
976 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
977 }
978
979 assert(n <= m);
980
981 if (tv) {
982 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
983 iovec[n++] = IOVEC_MAKE_STRING(source_time);
984 }
985
986 /* Note that strictly speaking storing the boot id here is
987 * redundant since the entry includes this in-line
988 * anyway. However, we need this indexed, too. */
989 if (!isempty(s->boot_id_field))
990 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
991
992 if (!isempty(s->machine_id_field))
993 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
994
995 if (!isempty(s->hostname_field))
996 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
997
998 assert(n <= m);
999
1000 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
1001 /* Split up strictly by (non-root) UID */
1002 journal_uid = c->uid;
1003 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1004 /* Split up by login UIDs. We do this only if the
1005 * realuid is not root, in order not to accidentally
1006 * leak privileged information to the user that is
1007 * logged by a privileged process that is part of an
1008 * unprivileged session. */
1009 journal_uid = c->owner_uid;
1010 else
1011 journal_uid = 0;
1012
1013 write_to_journal(s, journal_uid, iovec, n, priority);
1014 }
1015
1016 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1017
1018 struct iovec *iovec;
1019 size_t n = 0, k, m;
1020 va_list ap;
1021 int r;
1022
1023 assert(s);
1024 assert(format);
1025
1026 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1027 iovec = newa(struct iovec, m);
1028
1029 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1030 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1031 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1032
1033 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1034 assert_cc(6 == LOG_INFO);
1035 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1036
1037 if (message_id)
1038 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1039 k = n;
1040
1041 va_start(ap, format);
1042 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1043 /* Error handling below */
1044 va_end(ap);
1045
1046 if (r >= 0)
1047 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1048
1049 while (k < n)
1050 free(iovec[k++].iov_base);
1051
1052 if (r < 0) {
1053 /* We failed to format the message. Emit a warning instead. */
1054 char buf[LINE_MAX];
1055
1056 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1057
1058 n = 3;
1059 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1060 iovec[n++] = IOVEC_MAKE_STRING(buf);
1061 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1062 }
1063 }
1064
1065 void server_dispatch_message(
1066 Server *s,
1067 struct iovec *iovec, size_t n, size_t m,
1068 ClientContext *c,
1069 const struct timeval *tv,
1070 int priority,
1071 pid_t object_pid) {
1072
1073 uint64_t available = 0;
1074 int rl;
1075
1076 assert(s);
1077 assert(iovec || n == 0);
1078
1079 if (n == 0)
1080 return;
1081
1082 if (LOG_PRI(priority) > s->max_level_store)
1083 return;
1084
1085 /* Stop early in case the information will not be stored
1086 * in a journal. */
1087 if (s->storage == STORAGE_NONE)
1088 return;
1089
1090 if (c && c->unit) {
1091 (void) determine_space(s, &available, NULL);
1092
1093 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
1094 if (rl == 0)
1095 return;
1096
1097 /* Write a suppression message if we suppressed something */
1098 if (rl > 1)
1099 server_driver_message(s, c->pid,
1100 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1101 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1102 "N_DROPPED=%i", rl - 1,
1103 NULL);
1104 }
1105
1106 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1107 }
1108
1109 int server_flush_to_var(Server *s, bool require_flag_file) {
1110 sd_id128_t machine;
1111 sd_journal *j = NULL;
1112 char ts[FORMAT_TIMESPAN_MAX];
1113 usec_t start;
1114 unsigned n = 0;
1115 int r;
1116
1117 assert(s);
1118
1119 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1120 return 0;
1121
1122 if (!s->runtime_journal)
1123 return 0;
1124
1125 if (require_flag_file && !flushed_flag_is_set())
1126 return 0;
1127
1128 (void) system_journal_open(s, true);
1129
1130 if (!s->system_journal)
1131 return 0;
1132
1133 log_debug("Flushing to /var...");
1134
1135 start = now(CLOCK_MONOTONIC);
1136
1137 r = sd_id128_get_machine(&machine);
1138 if (r < 0)
1139 return r;
1140
1141 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1142 if (r < 0)
1143 return log_error_errno(r, "Failed to read runtime journal: %m");
1144
1145 sd_journal_set_data_threshold(j, 0);
1146
1147 SD_JOURNAL_FOREACH(j) {
1148 Object *o = NULL;
1149 JournalFile *f;
1150
1151 f = j->current_file;
1152 assert(f && f->current_offset > 0);
1153
1154 n++;
1155
1156 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1157 if (r < 0) {
1158 log_error_errno(r, "Can't read entry: %m");
1159 goto finish;
1160 }
1161
1162 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1163 if (r >= 0)
1164 continue;
1165
1166 if (!shall_try_append_again(s->system_journal, r)) {
1167 log_error_errno(r, "Can't write entry: %m");
1168 goto finish;
1169 }
1170
1171 server_rotate(s);
1172 server_vacuum(s, false);
1173
1174 if (!s->system_journal) {
1175 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1176 r = -EIO;
1177 goto finish;
1178 }
1179
1180 log_debug("Retrying write.");
1181 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1182 if (r < 0) {
1183 log_error_errno(r, "Can't write entry: %m");
1184 goto finish;
1185 }
1186 }
1187
1188 r = 0;
1189
1190 finish:
1191 if (s->system_journal)
1192 journal_file_post_change(s->system_journal);
1193
1194 s->runtime_journal = journal_file_close(s->runtime_journal);
1195
1196 if (r >= 0)
1197 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1198
1199 sd_journal_close(j);
1200
1201 server_driver_message(s, 0, NULL,
1202 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1203 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1204 n),
1205 NULL);
1206
1207 return r;
1208 }
1209
1210 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1211 Server *s = userdata;
1212 struct ucred *ucred = NULL;
1213 struct timeval *tv = NULL;
1214 struct cmsghdr *cmsg;
1215 char *label = NULL;
1216 size_t label_len = 0, m;
1217 struct iovec iovec;
1218 ssize_t n;
1219 int *fds = NULL, v = 0;
1220 size_t n_fds = 0;
1221
1222 union {
1223 struct cmsghdr cmsghdr;
1224
1225 /* We use NAME_MAX space for the SELinux label
1226 * here. The kernel currently enforces no
1227 * limit, but according to suggestions from
1228 * the SELinux people this will change and it
1229 * will probably be identical to NAME_MAX. For
1230 * now we use that, but this should be updated
1231 * one day when the final limit is known. */
1232 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1233 CMSG_SPACE(sizeof(struct timeval)) +
1234 CMSG_SPACE(sizeof(int)) + /* fd */
1235 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1236 } control = {};
1237
1238 union sockaddr_union sa = {};
1239
1240 struct msghdr msghdr = {
1241 .msg_iov = &iovec,
1242 .msg_iovlen = 1,
1243 .msg_control = &control,
1244 .msg_controllen = sizeof(control),
1245 .msg_name = &sa,
1246 .msg_namelen = sizeof(sa),
1247 };
1248
1249 assert(s);
1250 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1251
1252 if (revents != EPOLLIN)
1253 return log_error_errno(SYNTHETIC_ERRNO(EIO),
1254 "Got invalid event from epoll for datagram fd: %" PRIx32,
1255 revents);
1256
1257 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1258 * it.) */
1259 (void) ioctl(fd, SIOCINQ, &v);
1260
1261 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1262 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1263 (size_t) LINE_MAX,
1264 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1265
1266 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1267 return log_oom();
1268
1269 iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
1270
1271 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1272 if (n < 0) {
1273 if (IN_SET(errno, EINTR, EAGAIN))
1274 return 0;
1275
1276 return log_error_errno(errno, "recvmsg() failed: %m");
1277 }
1278
1279 CMSG_FOREACH(cmsg, &msghdr)
1280 if (cmsg->cmsg_level == SOL_SOCKET &&
1281 cmsg->cmsg_type == SCM_CREDENTIALS &&
1282 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1283 ucred = (struct ucred*) CMSG_DATA(cmsg);
1284 else if (cmsg->cmsg_level == SOL_SOCKET &&
1285 cmsg->cmsg_type == SCM_SECURITY) {
1286 label = (char*) CMSG_DATA(cmsg);
1287 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1288 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1289 cmsg->cmsg_type == SO_TIMESTAMP &&
1290 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1291 tv = (struct timeval*) CMSG_DATA(cmsg);
1292 else if (cmsg->cmsg_level == SOL_SOCKET &&
1293 cmsg->cmsg_type == SCM_RIGHTS) {
1294 fds = (int*) CMSG_DATA(cmsg);
1295 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1296 }
1297
1298 /* And a trailing NUL, just in case */
1299 s->buffer[n] = 0;
1300
1301 if (fd == s->syslog_fd) {
1302 if (n > 0 && n_fds == 0)
1303 server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1304 else if (n_fds > 0)
1305 log_warning("Got file descriptors via syslog socket. Ignoring.");
1306
1307 } else if (fd == s->native_fd) {
1308 if (n > 0 && n_fds == 0)
1309 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1310 else if (n == 0 && n_fds == 1)
1311 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1312 else if (n_fds > 0)
1313 log_warning("Got too many file descriptors via native socket. Ignoring.");
1314
1315 } else {
1316 assert(fd == s->audit_fd);
1317
1318 if (n > 0 && n_fds == 0)
1319 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1320 else if (n_fds > 0)
1321 log_warning("Got file descriptors via audit socket. Ignoring.");
1322 }
1323
1324 close_many(fds, n_fds);
1325 return 0;
1326 }
1327
1328 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1329 Server *s = userdata;
1330 int r;
1331
1332 assert(s);
1333
1334 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1335
1336 (void) server_flush_to_var(s, false);
1337 server_sync(s);
1338 server_vacuum(s, false);
1339
1340 r = touch("/run/systemd/journal/flushed");
1341 if (r < 0)
1342 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1343
1344 server_space_usage_message(s, NULL);
1345 return 0;
1346 }
1347
1348 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1349 Server *s = userdata;
1350 int r;
1351
1352 assert(s);
1353
1354 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1355 server_rotate(s);
1356 server_vacuum(s, true);
1357
1358 if (s->system_journal)
1359 patch_min_use(&s->system_storage);
1360 if (s->runtime_journal)
1361 patch_min_use(&s->runtime_storage);
1362
1363 /* Let clients know when the most recent rotation happened. */
1364 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1365 if (r < 0)
1366 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1367
1368 return 0;
1369 }
1370
1371 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1372 Server *s = userdata;
1373
1374 assert(s);
1375
1376 log_received_signal(LOG_INFO, si);
1377
1378 sd_event_exit(s->event, 0);
1379 return 0;
1380 }
1381
1382 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1383 Server *s = userdata;
1384 int r;
1385
1386 assert(s);
1387
1388 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1389
1390 server_sync(s);
1391
1392 /* Let clients know when the most recent sync happened. */
1393 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1394 if (r < 0)
1395 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1396
1397 return 0;
1398 }
1399
1400 static int setup_signals(Server *s) {
1401 int r;
1402
1403 assert(s);
1404
1405 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1406
1407 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1408 if (r < 0)
1409 return r;
1410
1411 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1412 if (r < 0)
1413 return r;
1414
1415 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1416 if (r < 0)
1417 return r;
1418
1419 /* Let's process SIGTERM late, so that we flush all queued
1420 * messages to disk before we exit */
1421 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1422 if (r < 0)
1423 return r;
1424
1425 /* When journald is invoked on the terminal (when debugging),
1426 * it's useful if C-c is handled equivalent to SIGTERM. */
1427 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1428 if (r < 0)
1429 return r;
1430
1431 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1432 if (r < 0)
1433 return r;
1434
1435 /* SIGRTMIN+1 causes an immediate sync. We process this very
1436 * late, so that everything else queued at this point is
1437 * really written to disk. Clients can watch
1438 * /run/systemd/journal/synced with inotify until its mtime
1439 * changes to see when a sync happened. */
1440 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1441 if (r < 0)
1442 return r;
1443
1444 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1445 if (r < 0)
1446 return r;
1447
1448 return 0;
1449 }
1450
1451 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1452 Server *s = data;
1453 int r;
1454
1455 assert(s);
1456
1457 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1458
1459 r = value ? parse_boolean(value) : true;
1460 if (r < 0)
1461 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1462 else
1463 s->forward_to_syslog = r;
1464
1465 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1466
1467 r = value ? parse_boolean(value) : true;
1468 if (r < 0)
1469 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1470 else
1471 s->forward_to_kmsg = r;
1472
1473 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1474
1475 r = value ? parse_boolean(value) : true;
1476 if (r < 0)
1477 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1478 else
1479 s->forward_to_console = r;
1480
1481 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1482
1483 r = value ? parse_boolean(value) : true;
1484 if (r < 0)
1485 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1486 else
1487 s->forward_to_wall = r;
1488
1489 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1490
1491 if (proc_cmdline_value_missing(key, value))
1492 return 0;
1493
1494 r = log_level_from_string(value);
1495 if (r < 0)
1496 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1497 else
1498 s->max_level_console = r;
1499
1500 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1501
1502 if (proc_cmdline_value_missing(key, value))
1503 return 0;
1504
1505 r = log_level_from_string(value);
1506 if (r < 0)
1507 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1508 else
1509 s->max_level_store = r;
1510
1511 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1512
1513 if (proc_cmdline_value_missing(key, value))
1514 return 0;
1515
1516 r = log_level_from_string(value);
1517 if (r < 0)
1518 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1519 else
1520 s->max_level_syslog = r;
1521
1522 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1523
1524 if (proc_cmdline_value_missing(key, value))
1525 return 0;
1526
1527 r = log_level_from_string(value);
1528 if (r < 0)
1529 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1530 else
1531 s->max_level_kmsg = r;
1532
1533 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1534
1535 if (proc_cmdline_value_missing(key, value))
1536 return 0;
1537
1538 r = log_level_from_string(value);
1539 if (r < 0)
1540 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1541 else
1542 s->max_level_wall = r;
1543
1544 } else if (startswith(key, "systemd.journald"))
1545 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1546
1547 /* do not warn about state here, since probably systemd already did */
1548 return 0;
1549 }
1550
1551 static int server_parse_config_file(Server *s) {
1552 assert(s);
1553
1554 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1555 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1556 "Journal\0",
1557 config_item_perf_lookup, journald_gperf_lookup,
1558 CONFIG_PARSE_WARN, s);
1559 }
1560
1561 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1562 Server *s = userdata;
1563
1564 assert(s);
1565
1566 server_sync(s);
1567 return 0;
1568 }
1569
1570 int server_schedule_sync(Server *s, int priority) {
1571 int r;
1572
1573 assert(s);
1574
1575 if (priority <= LOG_CRIT) {
1576 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1577 server_sync(s);
1578 return 0;
1579 }
1580
1581 if (s->sync_scheduled)
1582 return 0;
1583
1584 if (s->sync_interval_usec > 0) {
1585 usec_t when;
1586
1587 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1588 if (r < 0)
1589 return r;
1590
1591 when += s->sync_interval_usec;
1592
1593 if (!s->sync_event_source) {
1594 r = sd_event_add_time(
1595 s->event,
1596 &s->sync_event_source,
1597 CLOCK_MONOTONIC,
1598 when, 0,
1599 server_dispatch_sync, s);
1600 if (r < 0)
1601 return r;
1602
1603 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1604 } else {
1605 r = sd_event_source_set_time(s->sync_event_source, when);
1606 if (r < 0)
1607 return r;
1608
1609 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1610 }
1611 if (r < 0)
1612 return r;
1613
1614 s->sync_scheduled = true;
1615 }
1616
1617 return 0;
1618 }
1619
1620 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1621 Server *s = userdata;
1622
1623 assert(s);
1624
1625 server_cache_hostname(s);
1626 return 0;
1627 }
1628
1629 static int server_open_hostname(Server *s) {
1630 int r;
1631
1632 assert(s);
1633
1634 s->hostname_fd = open("/proc/sys/kernel/hostname",
1635 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1636 if (s->hostname_fd < 0)
1637 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1638
1639 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1640 if (r < 0) {
1641 /* kernels prior to 3.2 don't support polling this file. Ignore
1642 * the failure. */
1643 if (r == -EPERM) {
1644 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1645 s->hostname_fd = safe_close(s->hostname_fd);
1646 return 0;
1647 }
1648
1649 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1650 }
1651
1652 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1653 if (r < 0)
1654 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1655
1656 return 0;
1657 }
1658
1659 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1660 Server *s = userdata;
1661 int r;
1662
1663 assert(s);
1664 assert(s->notify_event_source == es);
1665 assert(s->notify_fd == fd);
1666
1667 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1668 * message on it. Either it's the watchdog event, the initial
1669 * READY=1 event or an stdout stream event. If there's nothing
1670 * to write anymore, turn our event source off. The next time
1671 * there's something to send it will be turned on again. */
1672
1673 if (!s->sent_notify_ready) {
1674 static const char p[] =
1675 "READY=1\n"
1676 "STATUS=Processing requests...";
1677 ssize_t l;
1678
1679 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1680 if (l < 0) {
1681 if (errno == EAGAIN)
1682 return 0;
1683
1684 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1685 }
1686
1687 s->sent_notify_ready = true;
1688 log_debug("Sent READY=1 notification.");
1689
1690 } else if (s->send_watchdog) {
1691
1692 static const char p[] =
1693 "WATCHDOG=1";
1694
1695 ssize_t l;
1696
1697 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1698 if (l < 0) {
1699 if (errno == EAGAIN)
1700 return 0;
1701
1702 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1703 }
1704
1705 s->send_watchdog = false;
1706 log_debug("Sent WATCHDOG=1 notification.");
1707
1708 } else if (s->stdout_streams_notify_queue)
1709 /* Dispatch one stream notification event */
1710 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1711
1712 /* Leave us enabled if there's still more to do. */
1713 if (s->send_watchdog || s->stdout_streams_notify_queue)
1714 return 0;
1715
1716 /* There was nothing to do anymore, let's turn ourselves off. */
1717 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1718 if (r < 0)
1719 return log_error_errno(r, "Failed to turn off notify event source: %m");
1720
1721 return 0;
1722 }
1723
1724 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1725 Server *s = userdata;
1726 int r;
1727
1728 assert(s);
1729
1730 s->send_watchdog = true;
1731
1732 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1733 if (r < 0)
1734 log_warning_errno(r, "Failed to turn on notify event source: %m");
1735
1736 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1737 if (r < 0)
1738 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1739
1740 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1741 if (r < 0)
1742 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1743
1744 return 0;
1745 }
1746
1747 static int server_connect_notify(Server *s) {
1748 union sockaddr_union sa = {};
1749 const char *e;
1750 int r, salen;
1751
1752 assert(s);
1753 assert(s->notify_fd < 0);
1754 assert(!s->notify_event_source);
1755
1756 /*
1757 So here's the problem: we'd like to send notification
1758 messages to PID 1, but we cannot do that via sd_notify(),
1759 since that's synchronous, and we might end up blocking on
1760 it. Specifically: given that PID 1 might block on
1761 dbus-daemon during IPC, and dbus-daemon is logging to us,
1762 and might hence block on us, we might end up in a deadlock
1763 if we block on sending PID 1 notification messages — by
1764 generating a full blocking circle. To avoid this, let's
1765 create a non-blocking socket, and connect it to the
1766 notification socket, and then wait for POLLOUT before we
1767 send anything. This should efficiently avoid any deadlocks,
1768 as we'll never block on PID 1, hence PID 1 can safely block
1769 on dbus-daemon which can safely block on us again.
1770
1771 Don't think that this issue is real? It is, see:
1772 https://github.com/systemd/systemd/issues/1505
1773 */
1774
1775 e = getenv("NOTIFY_SOCKET");
1776 if (!e)
1777 return 0;
1778
1779 salen = sockaddr_un_set_path(&sa.un, e);
1780 if (salen < 0)
1781 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1782
1783 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1784 if (s->notify_fd < 0)
1785 return log_error_errno(errno, "Failed to create notify socket: %m");
1786
1787 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1788
1789 r = connect(s->notify_fd, &sa.sa, salen);
1790 if (r < 0)
1791 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1792
1793 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1794 if (r < 0)
1795 return log_error_errno(r, "Failed to watch notification socket: %m");
1796
1797 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1798 s->send_watchdog = true;
1799
1800 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1801 if (r < 0)
1802 return log_error_errno(r, "Failed to add watchdog time event: %m");
1803 }
1804
1805 /* This should fire pretty soon, which we'll use to send the
1806 * READY=1 event. */
1807
1808 return 0;
1809 }
1810
1811 int server_init(Server *s) {
1812 _cleanup_fdset_free_ FDSet *fds = NULL;
1813 int n, r, fd;
1814 bool no_sockets;
1815
1816 assert(s);
1817
1818 zero(*s);
1819 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1820 s->compress.enabled = true;
1821 s->compress.threshold_bytes = (uint64_t) -1;
1822 s->seal = true;
1823 s->read_kmsg = true;
1824
1825 s->watchdog_usec = USEC_INFINITY;
1826
1827 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1828 s->sync_scheduled = false;
1829
1830 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1831 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1832
1833 s->forward_to_wall = true;
1834
1835 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1836
1837 s->max_level_store = LOG_DEBUG;
1838 s->max_level_syslog = LOG_DEBUG;
1839 s->max_level_kmsg = LOG_NOTICE;
1840 s->max_level_console = LOG_INFO;
1841 s->max_level_wall = LOG_EMERG;
1842
1843 s->line_max = DEFAULT_LINE_MAX;
1844
1845 journal_reset_metrics(&s->system_storage.metrics);
1846 journal_reset_metrics(&s->runtime_storage.metrics);
1847
1848 server_parse_config_file(s);
1849
1850 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1851 if (r < 0)
1852 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1853
1854 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1855 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1856 s->rate_limit_interval, s->rate_limit_burst);
1857 s->rate_limit_interval = s->rate_limit_burst = 0;
1858 }
1859
1860 (void) mkdir_p("/run/systemd/journal", 0755);
1861
1862 s->user_journals = ordered_hashmap_new(NULL);
1863 if (!s->user_journals)
1864 return log_oom();
1865
1866 s->mmap = mmap_cache_new();
1867 if (!s->mmap)
1868 return log_oom();
1869
1870 s->deferred_closes = set_new(NULL);
1871 if (!s->deferred_closes)
1872 return log_oom();
1873
1874 r = sd_event_default(&s->event);
1875 if (r < 0)
1876 return log_error_errno(r, "Failed to create event loop: %m");
1877
1878 n = sd_listen_fds(true);
1879 if (n < 0)
1880 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1881
1882 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1883
1884 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1885
1886 if (s->native_fd >= 0)
1887 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1888 "Too many native sockets passed.");
1889
1890 s->native_fd = fd;
1891
1892 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1893
1894 if (s->stdout_fd >= 0)
1895 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1896 "Too many stdout sockets passed.");
1897
1898 s->stdout_fd = fd;
1899
1900 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1901 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1902
1903 if (s->syslog_fd >= 0)
1904 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1905 "Too many /dev/log sockets passed.");
1906
1907 s->syslog_fd = fd;
1908
1909 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1910
1911 if (s->audit_fd >= 0)
1912 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1913 "Too many audit sockets passed.");
1914
1915 s->audit_fd = fd;
1916
1917 } else {
1918
1919 if (!fds) {
1920 fds = fdset_new();
1921 if (!fds)
1922 return log_oom();
1923 }
1924
1925 r = fdset_put(fds, fd);
1926 if (r < 0)
1927 return log_oom();
1928 }
1929 }
1930
1931 /* Try to restore streams, but don't bother if this fails */
1932 (void) server_restore_streams(s, fds);
1933
1934 if (fdset_size(fds) > 0) {
1935 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1936 fds = fdset_free(fds);
1937 }
1938
1939 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1940
1941 /* always open stdout, syslog, native, and kmsg sockets */
1942
1943 /* systemd-journald.socket: /run/systemd/journal/stdout */
1944 r = server_open_stdout_socket(s);
1945 if (r < 0)
1946 return r;
1947
1948 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1949 r = server_open_syslog_socket(s);
1950 if (r < 0)
1951 return r;
1952
1953 /* systemd-journald.socket: /run/systemd/journal/socket */
1954 r = server_open_native_socket(s);
1955 if (r < 0)
1956 return r;
1957
1958 /* /dev/kmsg */
1959 r = server_open_dev_kmsg(s);
1960 if (r < 0)
1961 return r;
1962
1963 /* Unless we got *some* sockets and not audit, open audit socket */
1964 if (s->audit_fd >= 0 || no_sockets) {
1965 r = server_open_audit(s);
1966 if (r < 0)
1967 return r;
1968 }
1969
1970 r = server_open_kernel_seqnum(s);
1971 if (r < 0)
1972 return r;
1973
1974 r = server_open_hostname(s);
1975 if (r < 0)
1976 return r;
1977
1978 r = setup_signals(s);
1979 if (r < 0)
1980 return r;
1981
1982 s->rate_limit = journal_rate_limit_new();
1983 if (!s->rate_limit)
1984 return -ENOMEM;
1985
1986 r = cg_get_root_path(&s->cgroup_root);
1987 if (r < 0)
1988 return r;
1989
1990 server_cache_hostname(s);
1991 server_cache_boot_id(s);
1992 server_cache_machine_id(s);
1993
1994 s->runtime_storage.name = "Runtime journal";
1995 s->system_storage.name = "System journal";
1996
1997 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
1998 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
1999 if (!s->runtime_storage.path || !s->system_storage.path)
2000 return -ENOMEM;
2001
2002 (void) server_connect_notify(s);
2003
2004 (void) client_context_acquire_default(s);
2005
2006 return system_journal_open(s, false);
2007 }
2008
2009 void server_maybe_append_tags(Server *s) {
2010 #if HAVE_GCRYPT
2011 JournalFile *f;
2012 Iterator i;
2013 usec_t n;
2014
2015 n = now(CLOCK_REALTIME);
2016
2017 if (s->system_journal)
2018 journal_file_maybe_append_tag(s->system_journal, n);
2019
2020 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2021 journal_file_maybe_append_tag(f, n);
2022 #endif
2023 }
2024
2025 void server_done(Server *s) {
2026 assert(s);
2027
2028 set_free_with_destructor(s->deferred_closes, journal_file_close);
2029
2030 while (s->stdout_streams)
2031 stdout_stream_free(s->stdout_streams);
2032
2033 client_context_flush_all(s);
2034
2035 if (s->system_journal)
2036 (void) journal_file_close(s->system_journal);
2037
2038 if (s->runtime_journal)
2039 (void) journal_file_close(s->runtime_journal);
2040
2041 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2042
2043 sd_event_source_unref(s->syslog_event_source);
2044 sd_event_source_unref(s->native_event_source);
2045 sd_event_source_unref(s->stdout_event_source);
2046 sd_event_source_unref(s->dev_kmsg_event_source);
2047 sd_event_source_unref(s->audit_event_source);
2048 sd_event_source_unref(s->sync_event_source);
2049 sd_event_source_unref(s->sigusr1_event_source);
2050 sd_event_source_unref(s->sigusr2_event_source);
2051 sd_event_source_unref(s->sigterm_event_source);
2052 sd_event_source_unref(s->sigint_event_source);
2053 sd_event_source_unref(s->sigrtmin1_event_source);
2054 sd_event_source_unref(s->hostname_event_source);
2055 sd_event_source_unref(s->notify_event_source);
2056 sd_event_source_unref(s->watchdog_event_source);
2057 sd_event_unref(s->event);
2058
2059 safe_close(s->syslog_fd);
2060 safe_close(s->native_fd);
2061 safe_close(s->stdout_fd);
2062 safe_close(s->dev_kmsg_fd);
2063 safe_close(s->audit_fd);
2064 safe_close(s->hostname_fd);
2065 safe_close(s->notify_fd);
2066
2067 if (s->rate_limit)
2068 journal_rate_limit_free(s->rate_limit);
2069
2070 if (s->kernel_seqnum)
2071 munmap(s->kernel_seqnum, sizeof(uint64_t));
2072
2073 free(s->buffer);
2074 free(s->tty_path);
2075 free(s->cgroup_root);
2076 free(s->hostname_field);
2077 free(s->runtime_storage.path);
2078 free(s->system_storage.path);
2079
2080 if (s->mmap)
2081 mmap_cache_unref(s->mmap);
2082 }
2083
2084 static const char* const storage_table[_STORAGE_MAX] = {
2085 [STORAGE_AUTO] = "auto",
2086 [STORAGE_VOLATILE] = "volatile",
2087 [STORAGE_PERSISTENT] = "persistent",
2088 [STORAGE_NONE] = "none"
2089 };
2090
2091 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2092 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2093
2094 static const char* const split_mode_table[_SPLIT_MAX] = {
2095 [SPLIT_LOGIN] = "login",
2096 [SPLIT_UID] = "uid",
2097 [SPLIT_NONE] = "none",
2098 };
2099
2100 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2101 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2102
2103 int config_parse_line_max(
2104 const char* unit,
2105 const char *filename,
2106 unsigned line,
2107 const char *section,
2108 unsigned section_line,
2109 const char *lvalue,
2110 int ltype,
2111 const char *rvalue,
2112 void *data,
2113 void *userdata) {
2114
2115 size_t *sz = data;
2116 int r;
2117
2118 assert(filename);
2119 assert(lvalue);
2120 assert(rvalue);
2121 assert(data);
2122
2123 if (isempty(rvalue))
2124 /* Empty assignment means default */
2125 *sz = DEFAULT_LINE_MAX;
2126 else {
2127 uint64_t v;
2128
2129 r = parse_size(rvalue, 1024, &v);
2130 if (r < 0) {
2131 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2132 return 0;
2133 }
2134
2135 if (v < 79) {
2136 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2137 * terminal size is 80ch, and it might make sense to break one character before the natural
2138 * line break would occur on that. */
2139 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2140 *sz = 79;
2141 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2142 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2143 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2144 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2145 * fail much earlier anyway. */
2146 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2147 *sz = SSIZE_MAX-1;
2148 } else
2149 *sz = (size_t) v;
2150 }
2151
2152 return 0;
2153 }
2154
2155 int config_parse_compress(const char* unit,
2156 const char *filename,
2157 unsigned line,
2158 const char *section,
2159 unsigned section_line,
2160 const char *lvalue,
2161 int ltype,
2162 const char *rvalue,
2163 void *data,
2164 void *userdata) {
2165 JournalCompressOptions* compress = data;
2166 int r;
2167
2168 if (streq(rvalue, "1")) {
2169 log_syntax(unit, LOG_WARNING, filename, line, 0,
2170 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2171 compress->enabled = true;
2172 } else if (streq(rvalue, "0")) {
2173 log_syntax(unit, LOG_WARNING, filename, line, 0,
2174 "Compress= ambiguously specified as 0, disabling compression");
2175 compress->enabled = false;
2176 } else if ((r = parse_boolean(rvalue)) >= 0)
2177 compress->enabled = r;
2178 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2179 compress->enabled = true;
2180 else if (isempty(rvalue)) {
2181 compress->enabled = true;
2182 compress->threshold_bytes = (uint64_t) -1;
2183 } else
2184 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2185
2186 return 0;
2187 }