]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if HAVE_SELINUX
4 #include <selinux/selinux.h>
5 #endif
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
11
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
15
16 #include "acl-util.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "format-util.h"
26 #include "fs-util.h"
27 #include "hashmap.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
30 #include "io-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
43 #include "log.h"
44 #include "missing.h"
45 #include "mkdir.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
49 #include "rm-rf.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
58
59 #define USER_JOURNALS_MAX 1024
60
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
65
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
67
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
69
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
72
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
76
77 #define DEFERRED_CLOSES_MAX (4096)
78
79 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
80 _cleanup_closedir_ DIR *d = NULL;
81 struct dirent *de;
82 struct statvfs ss;
83
84 assert(ret_used);
85 assert(ret_free);
86
87 d = opendir(path);
88 if (!d)
89 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
90 errno, "Failed to open %s: %m", path);
91
92 if (fstatvfs(dirfd(d), &ss) < 0)
93 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
94
95 *ret_free = ss.f_bsize * ss.f_bavail;
96 *ret_used = 0;
97 FOREACH_DIRENT_ALL(de, d, break) {
98 struct stat st;
99
100 if (!endswith(de->d_name, ".journal") &&
101 !endswith(de->d_name, ".journal~"))
102 continue;
103
104 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
105 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
106 continue;
107 }
108
109 if (!S_ISREG(st.st_mode))
110 continue;
111
112 *ret_used += (uint64_t) st.st_blocks * 512UL;
113 }
114
115 return 0;
116 }
117
118 static void cache_space_invalidate(JournalStorageSpace *space) {
119 zero(*space);
120 }
121
122 static int cache_space_refresh(Server *s, JournalStorage *storage) {
123 JournalStorageSpace *space;
124 JournalMetrics *metrics;
125 uint64_t vfs_used, vfs_avail, avail;
126 usec_t ts;
127 int r;
128
129 assert(s);
130
131 metrics = &storage->metrics;
132 space = &storage->space;
133
134 ts = now(CLOCK_MONOTONIC);
135
136 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
137 return 0;
138
139 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
140 if (r < 0)
141 return r;
142
143 space->vfs_used = vfs_used;
144 space->vfs_available = vfs_avail;
145
146 avail = LESS_BY(vfs_avail, metrics->keep_free);
147
148 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
149 space->available = LESS_BY(space->limit, vfs_used);
150 space->timestamp = ts;
151 return 1;
152 }
153
154 static void patch_min_use(JournalStorage *storage) {
155 assert(storage);
156
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
162
163 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
164 }
165
166 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
167 JournalStorage *js;
168 int r;
169
170 assert(s);
171
172 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
173
174 r = cache_space_refresh(s, js);
175 if (r >= 0) {
176 if (available)
177 *available = js->space.available;
178 if (limit)
179 *limit = js->space.limit;
180 }
181 return r;
182 }
183
184 void server_space_usage_message(Server *s, JournalStorage *storage) {
185 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
186 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
187 JournalMetrics *metrics;
188
189 assert(s);
190
191 if (!storage)
192 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
193
194 if (cache_space_refresh(s, storage) < 0)
195 return;
196
197 metrics = &storage->metrics;
198 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
199 format_bytes(fb2, sizeof(fb2), metrics->max_use);
200 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
201 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
202 format_bytes(fb5, sizeof(fb5), storage->space.limit);
203 format_bytes(fb6, sizeof(fb6), storage->space.available);
204
205 server_driver_message(s, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage->name, storage->path, fb1, fb5, fb6),
209 "JOURNAL_NAME=%s", storage->name,
210 "JOURNAL_PATH=%s", storage->path,
211 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
212 "CURRENT_USE_PRETTY=%s", fb1,
213 "MAX_USE=%"PRIu64, metrics->max_use,
214 "MAX_USE_PRETTY=%s", fb2,
215 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3,
217 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
218 "DISK_AVAILABLE_PRETTY=%s", fb4,
219 "LIMIT=%"PRIu64, storage->space.limit,
220 "LIMIT_PRETTY=%s", fb5,
221 "AVAILABLE=%"PRIu64, storage->space.available,
222 "AVAILABLE_PRETTY=%s", fb6,
223 NULL);
224 }
225
226 static bool uid_for_system_journal(uid_t uid) {
227
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
229
230 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
231 }
232
233 static void server_add_acls(JournalFile *f, uid_t uid) {
234 #if HAVE_ACL
235 int r;
236 #endif
237 assert(f);
238
239 #if HAVE_ACL
240 if (uid_for_system_journal(uid))
241 return;
242
243 r = add_acls_for_user(f->fd, uid);
244 if (r < 0)
245 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
246 #endif
247 }
248
249 static int open_journal(
250 Server *s,
251 bool reliably,
252 const char *fname,
253 int flags,
254 bool seal,
255 JournalMetrics *metrics,
256 JournalFile **ret) {
257
258 JournalFile *f;
259 int r;
260
261 assert(s);
262 assert(fname);
263 assert(ret);
264
265 if (reliably)
266 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
267 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
268 else
269 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
270 metrics, s->mmap, s->deferred_closes, NULL, &f);
271
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK) >= 0;
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 const char *fn;
291 int r = 0;
292
293 if (!s->system_journal &&
294 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
295 (flush_requested || flushed_flag_is_set())) {
296
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
299 *
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
302
303 if (s->storage == STORAGE_PERSISTENT)
304 (void) mkdir_p("/var/log/journal/", 0755);
305
306 (void) mkdir(s->system_storage.path, 0755);
307
308 fn = strjoina(s->system_storage.path, "/system.journal");
309 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
310 if (r >= 0) {
311 server_add_acls(s->system_journal, 0);
312 (void) cache_space_refresh(s, &s->system_storage);
313 patch_min_use(&s->system_storage);
314 } else {
315 if (!IN_SET(r, -ENOENT, -EROFS))
316 log_warning_errno(r, "Failed to open system journal: %m");
317
318 r = 0;
319 }
320
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
324 *
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
327 */
328 if (!flush_requested)
329 (void) server_flush_to_var(s, true);
330 }
331
332 if (!s->runtime_journal &&
333 (s->storage != STORAGE_NONE)) {
334
335 fn = strjoina(s->runtime_storage.path, "/system.journal");
336
337 if (s->system_journal) {
338
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
342
343 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
344 if (r < 0) {
345 if (r != -ENOENT)
346 log_warning_errno(r, "Failed to open runtime journal: %m");
347
348 r = 0;
349 }
350
351 } else {
352
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
355
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn, 0750);
359
360 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
361 if (r < 0)
362 return log_error_errno(r, "Failed to open runtime journal: %m");
363 }
364
365 if (s->runtime_journal) {
366 server_add_acls(s->runtime_journal, 0);
367 (void) cache_space_refresh(s, &s->runtime_storage);
368 patch_min_use(&s->runtime_storage);
369 }
370 }
371
372 return r;
373 }
374
375 static JournalFile* find_journal(Server *s, uid_t uid) {
376 _cleanup_free_ char *p = NULL;
377 int r;
378 JournalFile *f;
379 sd_id128_t machine;
380
381 assert(s);
382
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
386 *
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
390 *
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s, false);
393
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
398
399 if (s->runtime_journal)
400 return s->runtime_journal;
401
402 if (uid_for_system_journal(uid))
403 return s->system_journal;
404
405 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
406 if (f)
407 return f;
408
409 r = sd_id128_get_machine(&machine);
410 if (r < 0) {
411 log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
412 return s->system_journal;
413 }
414
415 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
416 SD_ID128_FORMAT_VAL(machine), uid) < 0) {
417 log_oom();
418 return s->system_journal;
419 }
420
421 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
422 /* Too many open? Then let's close one */
423 f = ordered_hashmap_steal_first(s->user_journals);
424 assert(f);
425 (void) journal_file_close(f);
426 }
427
428 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
429 if (r < 0)
430 return s->system_journal;
431
432 server_add_acls(f, uid);
433
434 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
435 if (r < 0) {
436 (void) journal_file_close(f);
437 return s->system_journal;
438 }
439
440 return f;
441 }
442
443 static int do_rotate(
444 Server *s,
445 JournalFile **f,
446 const char* name,
447 bool seal,
448 uint32_t uid) {
449
450 int r;
451 assert(s);
452
453 if (!*f)
454 return -EINVAL;
455
456 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
457 if (r < 0) {
458 if (*f)
459 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
460 else
461 return log_error_errno(r, "Failed to create new %s journal: %m", name);
462 }
463
464 server_add_acls(*f, uid);
465
466 return r;
467 }
468
469 static void server_process_deferred_closes(Server *s) {
470 JournalFile *f;
471 Iterator i;
472
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f, s->deferred_closes, i) {
475 if (journal_file_is_offlining(f))
476 continue;
477
478 (void) set_remove(s->deferred_closes, f);
479 (void) journal_file_close(f);
480 }
481 }
482
483 static void server_vacuum_deferred_closes(Server *s) {
484 assert(s);
485
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
488 return;
489
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s);
492
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
495 JournalFile *f;
496
497 assert_se(f = set_steal_first(s->deferred_closes));
498 journal_file_close(f);
499 }
500 }
501
502 static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
503 _cleanup_closedir_ DIR *dir = NULL;
504 _cleanup_free_ char *path = NULL;
505 sd_id128_t machine;
506 int r;
507
508 assert(s);
509
510 r = sd_id128_get_machine(&machine);
511 if (r < 0)
512 return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
513
514 if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
515 return log_oom();
516
517 dir = opendir(path);
518 if (!dir)
519 return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
520
521 if (ret_dir)
522 *ret_dir = TAKE_PTR(dir);
523 if (ret_path)
524 *ret_path = TAKE_PTR(path);
525
526 return 0;
527 }
528
529 void server_rotate(Server *s) {
530 _cleanup_free_ char *path = NULL;
531 _cleanup_closedir_ DIR *d = NULL;
532 JournalFile *f;
533 Iterator i;
534 void *k;
535 int r;
536
537 log_debug("Rotating...");
538
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
541 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
542
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
545 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
546 if (r >= 0)
547 ordered_hashmap_replace(s->user_journals, k, f);
548 else if (!f)
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s->user_journals, k);
551 }
552
553 /* Finally, also rotate all user journals we currently do not have open. */
554 r = open_user_journal_directory(s, &d, &path);
555 if (r >= 0) {
556 struct dirent *de;
557
558 FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
559 _cleanup_free_ char *u = NULL, *full = NULL;
560 _cleanup_close_ int fd = -1;
561 const char *a, *b;
562 uid_t uid;
563
564 a = startswith(de->d_name, "user-");
565 if (!a)
566 continue;
567 b = endswith(de->d_name, ".journal");
568 if (!b)
569 continue;
570
571 u = strndup(a, b-a);
572 if (!u) {
573 log_oom();
574 break;
575 }
576
577 r = parse_uid(u, &uid);
578 if (r < 0) {
579 log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
580 continue;
581 }
582
583 /* Already rotated in the above loop? i.e. is it an open user journal? */
584 if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
585 continue;
586
587 full = strjoin(path, de->d_name);
588 if (!full) {
589 log_oom();
590 break;
591 }
592
593 fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
594 if (fd < 0) {
595 log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
596 "Failed to open journal file '%s' for rotation: %m", full);
597 continue;
598 }
599
600 /* Make some room in the set of deferred close()s */
601 server_vacuum_deferred_closes(s);
602
603 /* Open the file briefly, so that we can archive it */
604 r = journal_file_open(fd,
605 full,
606 O_RDWR,
607 0640,
608 s->compress.enabled,
609 s->compress.threshold_bytes,
610 s->seal,
611 &s->system_storage.metrics,
612 s->mmap,
613 s->deferred_closes,
614 NULL,
615 &f);
616 if (r < 0) {
617 log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
618
619 r = journal_file_dispose(dirfd(d), de->d_name);
620 if (r < 0)
621 log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
622 else
623 log_debug("Successfully moved %s out of the way.", full);
624
625 continue;
626 }
627
628 TAKE_FD(fd); /* Donated to journal_file_open() */
629
630 r = journal_file_archive(f);
631 if (r < 0)
632 log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
633
634 f = journal_initiate_close(f, s->deferred_closes);
635 }
636 }
637
638 server_process_deferred_closes(s);
639 }
640
641 void server_sync(Server *s) {
642 JournalFile *f;
643 Iterator i;
644 int r;
645
646 if (s->system_journal) {
647 r = journal_file_set_offline(s->system_journal, false);
648 if (r < 0)
649 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
650 }
651
652 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
653 r = journal_file_set_offline(f, false);
654 if (r < 0)
655 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
656 }
657
658 if (s->sync_event_source) {
659 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
660 if (r < 0)
661 log_error_errno(r, "Failed to disable sync timer source: %m");
662 }
663
664 s->sync_scheduled = false;
665 }
666
667 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
668
669 int r;
670
671 assert(s);
672 assert(storage);
673
674 (void) cache_space_refresh(s, storage);
675
676 if (verbose)
677 server_space_usage_message(s, storage);
678
679 r = journal_directory_vacuum(storage->path, storage->space.limit,
680 storage->metrics.n_max_files, s->max_retention_usec,
681 &s->oldest_file_usec, verbose);
682 if (r < 0 && r != -ENOENT)
683 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
684
685 cache_space_invalidate(&storage->space);
686 }
687
688 int server_vacuum(Server *s, bool verbose) {
689 assert(s);
690
691 log_debug("Vacuuming...");
692
693 s->oldest_file_usec = 0;
694
695 if (s->system_journal)
696 do_vacuum(s, &s->system_storage, verbose);
697 if (s->runtime_journal)
698 do_vacuum(s, &s->runtime_storage, verbose);
699
700 return 0;
701 }
702
703 static void server_cache_machine_id(Server *s) {
704 sd_id128_t id;
705 int r;
706
707 assert(s);
708
709 r = sd_id128_get_machine(&id);
710 if (r < 0)
711 return;
712
713 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
714 }
715
716 static void server_cache_boot_id(Server *s) {
717 sd_id128_t id;
718 int r;
719
720 assert(s);
721
722 r = sd_id128_get_boot(&id);
723 if (r < 0)
724 return;
725
726 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
727 }
728
729 static void server_cache_hostname(Server *s) {
730 _cleanup_free_ char *t = NULL;
731 char *x;
732
733 assert(s);
734
735 t = gethostname_malloc();
736 if (!t)
737 return;
738
739 x = strappend("_HOSTNAME=", t);
740 if (!x)
741 return;
742
743 free(s->hostname_field);
744 s->hostname_field = x;
745 }
746
747 static bool shall_try_append_again(JournalFile *f, int r) {
748 switch(r) {
749
750 case -E2BIG: /* Hit configured limit */
751 case -EFBIG: /* Hit fs limit */
752 case -EDQUOT: /* Quota limit hit */
753 case -ENOSPC: /* Disk full */
754 log_debug("%s: Allocation limit reached, rotating.", f->path);
755 return true;
756
757 case -EIO: /* I/O error of some kind (mmap) */
758 log_warning("%s: IO error, rotating.", f->path);
759 return true;
760
761 case -EHOSTDOWN: /* Other machine */
762 log_info("%s: Journal file from other machine, rotating.", f->path);
763 return true;
764
765 case -EBUSY: /* Unclean shutdown */
766 log_info("%s: Unclean shutdown, rotating.", f->path);
767 return true;
768
769 case -EPROTONOSUPPORT: /* Unsupported feature */
770 log_info("%s: Unsupported feature, rotating.", f->path);
771 return true;
772
773 case -EBADMSG: /* Corrupted */
774 case -ENODATA: /* Truncated */
775 case -ESHUTDOWN: /* Already archived */
776 log_warning("%s: Journal file corrupted, rotating.", f->path);
777 return true;
778
779 case -EIDRM: /* Journal file has been deleted */
780 log_warning("%s: Journal file has been deleted, rotating.", f->path);
781 return true;
782
783 case -ETXTBSY: /* Journal file is from the future */
784 log_warning("%s: Journal file is from the future, rotating.", f->path);
785 return true;
786
787 default:
788 return false;
789 }
790 }
791
792 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
793 bool vacuumed = false, rotate = false;
794 struct dual_timestamp ts;
795 JournalFile *f;
796 int r;
797
798 assert(s);
799 assert(iovec);
800 assert(n > 0);
801
802 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
803 * the source time, and not even the time the event was originally seen, but instead simply the time we started
804 * processing it, as we want strictly linear ordering in what we write out.) */
805 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
806 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
807
808 if (ts.realtime < s->last_realtime_clock) {
809 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
810 * regular operation. However, when it does happen, then we should make sure that we start fresh files
811 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
812 * bisection works correctly. */
813
814 log_debug("Time jumped backwards, rotating.");
815 rotate = true;
816 } else {
817
818 f = find_journal(s, uid);
819 if (!f)
820 return;
821
822 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
823 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
824 rotate = true;
825 }
826 }
827
828 if (rotate) {
829 server_rotate(s);
830 server_vacuum(s, false);
831 vacuumed = true;
832
833 f = find_journal(s, uid);
834 if (!f)
835 return;
836 }
837
838 s->last_realtime_clock = ts.realtime;
839
840 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
841 if (r >= 0) {
842 server_schedule_sync(s, priority);
843 return;
844 }
845
846 if (vacuumed || !shall_try_append_again(f, r)) {
847 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
848 return;
849 }
850
851 server_rotate(s);
852 server_vacuum(s, false);
853
854 f = find_journal(s, uid);
855 if (!f)
856 return;
857
858 log_debug("Retrying write.");
859 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
860 if (r < 0)
861 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
862 else
863 server_schedule_sync(s, priority);
864 }
865
866 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
867 if (isset(value)) { \
868 char *k; \
869 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
870 sprintf(k, field "=" format, value); \
871 iovec[n++] = IOVEC_MAKE_STRING(k); \
872 }
873
874 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
875 if (!isempty(value)) { \
876 char *k; \
877 k = strjoina(field "=", value); \
878 iovec[n++] = IOVEC_MAKE_STRING(k); \
879 }
880
881 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
882 if (!sd_id128_is_null(value)) { \
883 char *k; \
884 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
885 sd_id128_to_string(value, stpcpy(k, field "=")); \
886 iovec[n++] = IOVEC_MAKE_STRING(k); \
887 }
888
889 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
890 if (value_size > 0) { \
891 char *k; \
892 k = newa(char, STRLEN(field "=") + value_size + 1); \
893 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
894 iovec[n++] = IOVEC_MAKE_STRING(k); \
895 } \
896
897 static void dispatch_message_real(
898 Server *s,
899 struct iovec *iovec, size_t n, size_t m,
900 const ClientContext *c,
901 const struct timeval *tv,
902 int priority,
903 pid_t object_pid) {
904
905 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
906 uid_t journal_uid;
907 ClientContext *o;
908
909 assert(s);
910 assert(iovec);
911 assert(n > 0);
912 assert(n +
913 N_IOVEC_META_FIELDS +
914 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
915 client_context_extra_fields_n_iovec(c) <= m);
916
917 if (c) {
918 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
919 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
920 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
921
922 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM");
923 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE");
924 IOVEC_ADD_STRING_FIELD(iovec, n, c->cmdline, "_CMDLINE");
925 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE");
926
927 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
928
929 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
930 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
931
932 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP");
933 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
934 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
935 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT");
936 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
937 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
938 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
939
940 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
941
942 if (c->extra_fields_n_iovec > 0) {
943 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
944 n += c->extra_fields_n_iovec;
945 }
946 }
947
948 assert(n <= m);
949
950 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
951
952 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
953 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
954 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
955
956 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
957 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
958 IOVEC_ADD_STRING_FIELD(iovec, n, o->cmdline, "OBJECT_CMDLINE");
959 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
960
961 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
962
963 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
964 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
965
966 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
967 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
968 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
969 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
970 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
971 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
972 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
973
974 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
975 }
976
977 assert(n <= m);
978
979 if (tv) {
980 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
981 iovec[n++] = IOVEC_MAKE_STRING(source_time);
982 }
983
984 /* Note that strictly speaking storing the boot id here is
985 * redundant since the entry includes this in-line
986 * anyway. However, we need this indexed, too. */
987 if (!isempty(s->boot_id_field))
988 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
989
990 if (!isempty(s->machine_id_field))
991 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
992
993 if (!isempty(s->hostname_field))
994 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
995
996 assert(n <= m);
997
998 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
999 /* Split up strictly by (non-root) UID */
1000 journal_uid = c->uid;
1001 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1002 /* Split up by login UIDs. We do this only if the
1003 * realuid is not root, in order not to accidentally
1004 * leak privileged information to the user that is
1005 * logged by a privileged process that is part of an
1006 * unprivileged session. */
1007 journal_uid = c->owner_uid;
1008 else
1009 journal_uid = 0;
1010
1011 write_to_journal(s, journal_uid, iovec, n, priority);
1012 }
1013
1014 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1015
1016 struct iovec *iovec;
1017 size_t n = 0, k, m;
1018 va_list ap;
1019 int r;
1020
1021 assert(s);
1022 assert(format);
1023
1024 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1025 iovec = newa(struct iovec, m);
1026
1027 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1028 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1029 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1030
1031 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1032 assert_cc(6 == LOG_INFO);
1033 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1034
1035 if (message_id)
1036 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1037 k = n;
1038
1039 va_start(ap, format);
1040 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1041 /* Error handling below */
1042 va_end(ap);
1043
1044 if (r >= 0)
1045 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1046
1047 while (k < n)
1048 free(iovec[k++].iov_base);
1049
1050 if (r < 0) {
1051 /* We failed to format the message. Emit a warning instead. */
1052 char buf[LINE_MAX];
1053
1054 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1055
1056 n = 3;
1057 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1058 iovec[n++] = IOVEC_MAKE_STRING(buf);
1059 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1060 }
1061 }
1062
1063 void server_dispatch_message(
1064 Server *s,
1065 struct iovec *iovec, size_t n, size_t m,
1066 ClientContext *c,
1067 const struct timeval *tv,
1068 int priority,
1069 pid_t object_pid) {
1070
1071 uint64_t available = 0;
1072 int rl;
1073
1074 assert(s);
1075 assert(iovec || n == 0);
1076
1077 if (n == 0)
1078 return;
1079
1080 if (LOG_PRI(priority) > s->max_level_store)
1081 return;
1082
1083 /* Stop early in case the information will not be stored
1084 * in a journal. */
1085 if (s->storage == STORAGE_NONE)
1086 return;
1087
1088 if (c && c->unit) {
1089 (void) determine_space(s, &available, NULL);
1090
1091 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
1092 if (rl == 0)
1093 return;
1094
1095 /* Write a suppression message if we suppressed something */
1096 if (rl > 1)
1097 server_driver_message(s, c->pid,
1098 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1099 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1100 "N_DROPPED=%i", rl - 1,
1101 NULL);
1102 }
1103
1104 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1105 }
1106
1107 int server_flush_to_var(Server *s, bool require_flag_file) {
1108 sd_id128_t machine;
1109 sd_journal *j = NULL;
1110 char ts[FORMAT_TIMESPAN_MAX];
1111 usec_t start;
1112 unsigned n = 0;
1113 int r;
1114
1115 assert(s);
1116
1117 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1118 return 0;
1119
1120 if (!s->runtime_journal)
1121 return 0;
1122
1123 if (require_flag_file && !flushed_flag_is_set())
1124 return 0;
1125
1126 (void) system_journal_open(s, true);
1127
1128 if (!s->system_journal)
1129 return 0;
1130
1131 log_debug("Flushing to /var...");
1132
1133 start = now(CLOCK_MONOTONIC);
1134
1135 r = sd_id128_get_machine(&machine);
1136 if (r < 0)
1137 return r;
1138
1139 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1140 if (r < 0)
1141 return log_error_errno(r, "Failed to read runtime journal: %m");
1142
1143 sd_journal_set_data_threshold(j, 0);
1144
1145 SD_JOURNAL_FOREACH(j) {
1146 Object *o = NULL;
1147 JournalFile *f;
1148
1149 f = j->current_file;
1150 assert(f && f->current_offset > 0);
1151
1152 n++;
1153
1154 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1155 if (r < 0) {
1156 log_error_errno(r, "Can't read entry: %m");
1157 goto finish;
1158 }
1159
1160 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1161 if (r >= 0)
1162 continue;
1163
1164 if (!shall_try_append_again(s->system_journal, r)) {
1165 log_error_errno(r, "Can't write entry: %m");
1166 goto finish;
1167 }
1168
1169 server_rotate(s);
1170 server_vacuum(s, false);
1171
1172 if (!s->system_journal) {
1173 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1174 r = -EIO;
1175 goto finish;
1176 }
1177
1178 log_debug("Retrying write.");
1179 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1180 if (r < 0) {
1181 log_error_errno(r, "Can't write entry: %m");
1182 goto finish;
1183 }
1184 }
1185
1186 r = 0;
1187
1188 finish:
1189 if (s->system_journal)
1190 journal_file_post_change(s->system_journal);
1191
1192 s->runtime_journal = journal_file_close(s->runtime_journal);
1193
1194 if (r >= 0)
1195 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1196
1197 sd_journal_close(j);
1198
1199 server_driver_message(s, 0, NULL,
1200 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1201 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1202 n),
1203 NULL);
1204
1205 return r;
1206 }
1207
1208 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1209 Server *s = userdata;
1210 struct ucred *ucred = NULL;
1211 struct timeval *tv = NULL;
1212 struct cmsghdr *cmsg;
1213 char *label = NULL;
1214 size_t label_len = 0, m;
1215 struct iovec iovec;
1216 ssize_t n;
1217 int *fds = NULL, v = 0;
1218 size_t n_fds = 0;
1219
1220 union {
1221 struct cmsghdr cmsghdr;
1222
1223 /* We use NAME_MAX space for the SELinux label
1224 * here. The kernel currently enforces no
1225 * limit, but according to suggestions from
1226 * the SELinux people this will change and it
1227 * will probably be identical to NAME_MAX. For
1228 * now we use that, but this should be updated
1229 * one day when the final limit is known. */
1230 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1231 CMSG_SPACE(sizeof(struct timeval)) +
1232 CMSG_SPACE(sizeof(int)) + /* fd */
1233 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1234 } control = {};
1235
1236 union sockaddr_union sa = {};
1237
1238 struct msghdr msghdr = {
1239 .msg_iov = &iovec,
1240 .msg_iovlen = 1,
1241 .msg_control = &control,
1242 .msg_controllen = sizeof(control),
1243 .msg_name = &sa,
1244 .msg_namelen = sizeof(sa),
1245 };
1246
1247 assert(s);
1248 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1249
1250 if (revents != EPOLLIN) {
1251 log_error("Got invalid event from epoll for datagram fd: %"PRIx32, revents);
1252 return -EIO;
1253 }
1254
1255 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1256 * it.) */
1257 (void) ioctl(fd, SIOCINQ, &v);
1258
1259 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1260 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1261 (size_t) LINE_MAX,
1262 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1263
1264 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1265 return log_oom();
1266
1267 iovec.iov_base = s->buffer;
1268 iovec.iov_len = s->buffer_size - 1; /* Leave room for trailing NUL we add later */
1269
1270 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1271 if (n < 0) {
1272 if (IN_SET(errno, EINTR, EAGAIN))
1273 return 0;
1274
1275 return log_error_errno(errno, "recvmsg() failed: %m");
1276 }
1277
1278 CMSG_FOREACH(cmsg, &msghdr) {
1279
1280 if (cmsg->cmsg_level == SOL_SOCKET &&
1281 cmsg->cmsg_type == SCM_CREDENTIALS &&
1282 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1283 ucred = (struct ucred*) CMSG_DATA(cmsg);
1284 else if (cmsg->cmsg_level == SOL_SOCKET &&
1285 cmsg->cmsg_type == SCM_SECURITY) {
1286 label = (char*) CMSG_DATA(cmsg);
1287 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1288 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1289 cmsg->cmsg_type == SO_TIMESTAMP &&
1290 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1291 tv = (struct timeval*) CMSG_DATA(cmsg);
1292 else if (cmsg->cmsg_level == SOL_SOCKET &&
1293 cmsg->cmsg_type == SCM_RIGHTS) {
1294 fds = (int*) CMSG_DATA(cmsg);
1295 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1296 }
1297 }
1298
1299 /* And a trailing NUL, just in case */
1300 s->buffer[n] = 0;
1301
1302 if (fd == s->syslog_fd) {
1303 if (n > 0 && n_fds == 0)
1304 server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1305 else if (n_fds > 0)
1306 log_warning("Got file descriptors via syslog socket. Ignoring.");
1307
1308 } else if (fd == s->native_fd) {
1309 if (n > 0 && n_fds == 0)
1310 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1311 else if (n == 0 && n_fds == 1)
1312 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1313 else if (n_fds > 0)
1314 log_warning("Got too many file descriptors via native socket. Ignoring.");
1315
1316 } else {
1317 assert(fd == s->audit_fd);
1318
1319 if (n > 0 && n_fds == 0)
1320 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1321 else if (n_fds > 0)
1322 log_warning("Got file descriptors via audit socket. Ignoring.");
1323 }
1324
1325 close_many(fds, n_fds);
1326 return 0;
1327 }
1328
1329 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1330 Server *s = userdata;
1331 int r;
1332
1333 assert(s);
1334
1335 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1336
1337 (void) server_flush_to_var(s, false);
1338 server_sync(s);
1339 server_vacuum(s, false);
1340
1341 r = touch("/run/systemd/journal/flushed");
1342 if (r < 0)
1343 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1344
1345 server_space_usage_message(s, NULL);
1346 return 0;
1347 }
1348
1349 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1350 Server *s = userdata;
1351 int r;
1352
1353 assert(s);
1354
1355 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1356 server_rotate(s);
1357 server_vacuum(s, true);
1358
1359 if (s->system_journal)
1360 patch_min_use(&s->system_storage);
1361 if (s->runtime_journal)
1362 patch_min_use(&s->runtime_storage);
1363
1364 /* Let clients know when the most recent rotation happened. */
1365 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1366 if (r < 0)
1367 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1368
1369 return 0;
1370 }
1371
1372 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1373 Server *s = userdata;
1374
1375 assert(s);
1376
1377 log_received_signal(LOG_INFO, si);
1378
1379 sd_event_exit(s->event, 0);
1380 return 0;
1381 }
1382
1383 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1384 Server *s = userdata;
1385 int r;
1386
1387 assert(s);
1388
1389 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1390
1391 server_sync(s);
1392
1393 /* Let clients know when the most recent sync happened. */
1394 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1395 if (r < 0)
1396 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1397
1398 return 0;
1399 }
1400
1401 static int setup_signals(Server *s) {
1402 int r;
1403
1404 assert(s);
1405
1406 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1407
1408 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1409 if (r < 0)
1410 return r;
1411
1412 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1413 if (r < 0)
1414 return r;
1415
1416 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1417 if (r < 0)
1418 return r;
1419
1420 /* Let's process SIGTERM late, so that we flush all queued
1421 * messages to disk before we exit */
1422 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1423 if (r < 0)
1424 return r;
1425
1426 /* When journald is invoked on the terminal (when debugging),
1427 * it's useful if C-c is handled equivalent to SIGTERM. */
1428 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1429 if (r < 0)
1430 return r;
1431
1432 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1433 if (r < 0)
1434 return r;
1435
1436 /* SIGRTMIN+1 causes an immediate sync. We process this very
1437 * late, so that everything else queued at this point is
1438 * really written to disk. Clients can watch
1439 * /run/systemd/journal/synced with inotify until its mtime
1440 * changes to see when a sync happened. */
1441 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1442 if (r < 0)
1443 return r;
1444
1445 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1446 if (r < 0)
1447 return r;
1448
1449 return 0;
1450 }
1451
1452 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1453 Server *s = data;
1454 int r;
1455
1456 assert(s);
1457
1458 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1459
1460 r = value ? parse_boolean(value) : true;
1461 if (r < 0)
1462 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1463 else
1464 s->forward_to_syslog = r;
1465
1466 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1467
1468 r = value ? parse_boolean(value) : true;
1469 if (r < 0)
1470 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1471 else
1472 s->forward_to_kmsg = r;
1473
1474 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1475
1476 r = value ? parse_boolean(value) : true;
1477 if (r < 0)
1478 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1479 else
1480 s->forward_to_console = r;
1481
1482 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1483
1484 r = value ? parse_boolean(value) : true;
1485 if (r < 0)
1486 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1487 else
1488 s->forward_to_wall = r;
1489
1490 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1491
1492 if (proc_cmdline_value_missing(key, value))
1493 return 0;
1494
1495 r = log_level_from_string(value);
1496 if (r < 0)
1497 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1498 else
1499 s->max_level_console = r;
1500
1501 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1502
1503 if (proc_cmdline_value_missing(key, value))
1504 return 0;
1505
1506 r = log_level_from_string(value);
1507 if (r < 0)
1508 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1509 else
1510 s->max_level_store = r;
1511
1512 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1513
1514 if (proc_cmdline_value_missing(key, value))
1515 return 0;
1516
1517 r = log_level_from_string(value);
1518 if (r < 0)
1519 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1520 else
1521 s->max_level_syslog = r;
1522
1523 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1524
1525 if (proc_cmdline_value_missing(key, value))
1526 return 0;
1527
1528 r = log_level_from_string(value);
1529 if (r < 0)
1530 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1531 else
1532 s->max_level_kmsg = r;
1533
1534 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1535
1536 if (proc_cmdline_value_missing(key, value))
1537 return 0;
1538
1539 r = log_level_from_string(value);
1540 if (r < 0)
1541 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1542 else
1543 s->max_level_wall = r;
1544
1545 } else if (startswith(key, "systemd.journald"))
1546 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1547
1548 /* do not warn about state here, since probably systemd already did */
1549 return 0;
1550 }
1551
1552 static int server_parse_config_file(Server *s) {
1553 assert(s);
1554
1555 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1556 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1557 "Journal\0",
1558 config_item_perf_lookup, journald_gperf_lookup,
1559 CONFIG_PARSE_WARN, s);
1560 }
1561
1562 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1563 Server *s = userdata;
1564
1565 assert(s);
1566
1567 server_sync(s);
1568 return 0;
1569 }
1570
1571 int server_schedule_sync(Server *s, int priority) {
1572 int r;
1573
1574 assert(s);
1575
1576 if (priority <= LOG_CRIT) {
1577 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1578 server_sync(s);
1579 return 0;
1580 }
1581
1582 if (s->sync_scheduled)
1583 return 0;
1584
1585 if (s->sync_interval_usec > 0) {
1586 usec_t when;
1587
1588 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1589 if (r < 0)
1590 return r;
1591
1592 when += s->sync_interval_usec;
1593
1594 if (!s->sync_event_source) {
1595 r = sd_event_add_time(
1596 s->event,
1597 &s->sync_event_source,
1598 CLOCK_MONOTONIC,
1599 when, 0,
1600 server_dispatch_sync, s);
1601 if (r < 0)
1602 return r;
1603
1604 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1605 } else {
1606 r = sd_event_source_set_time(s->sync_event_source, when);
1607 if (r < 0)
1608 return r;
1609
1610 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1611 }
1612 if (r < 0)
1613 return r;
1614
1615 s->sync_scheduled = true;
1616 }
1617
1618 return 0;
1619 }
1620
1621 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1622 Server *s = userdata;
1623
1624 assert(s);
1625
1626 server_cache_hostname(s);
1627 return 0;
1628 }
1629
1630 static int server_open_hostname(Server *s) {
1631 int r;
1632
1633 assert(s);
1634
1635 s->hostname_fd = open("/proc/sys/kernel/hostname",
1636 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1637 if (s->hostname_fd < 0)
1638 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1639
1640 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1641 if (r < 0) {
1642 /* kernels prior to 3.2 don't support polling this file. Ignore
1643 * the failure. */
1644 if (r == -EPERM) {
1645 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1646 s->hostname_fd = safe_close(s->hostname_fd);
1647 return 0;
1648 }
1649
1650 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1651 }
1652
1653 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1654 if (r < 0)
1655 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1656
1657 return 0;
1658 }
1659
1660 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1661 Server *s = userdata;
1662 int r;
1663
1664 assert(s);
1665 assert(s->notify_event_source == es);
1666 assert(s->notify_fd == fd);
1667
1668 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1669 * message on it. Either it's the watchdog event, the initial
1670 * READY=1 event or an stdout stream event. If there's nothing
1671 * to write anymore, turn our event source off. The next time
1672 * there's something to send it will be turned on again. */
1673
1674 if (!s->sent_notify_ready) {
1675 static const char p[] =
1676 "READY=1\n"
1677 "STATUS=Processing requests...";
1678 ssize_t l;
1679
1680 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1681 if (l < 0) {
1682 if (errno == EAGAIN)
1683 return 0;
1684
1685 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1686 }
1687
1688 s->sent_notify_ready = true;
1689 log_debug("Sent READY=1 notification.");
1690
1691 } else if (s->send_watchdog) {
1692
1693 static const char p[] =
1694 "WATCHDOG=1";
1695
1696 ssize_t l;
1697
1698 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1699 if (l < 0) {
1700 if (errno == EAGAIN)
1701 return 0;
1702
1703 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1704 }
1705
1706 s->send_watchdog = false;
1707 log_debug("Sent WATCHDOG=1 notification.");
1708
1709 } else if (s->stdout_streams_notify_queue)
1710 /* Dispatch one stream notification event */
1711 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1712
1713 /* Leave us enabled if there's still more to do. */
1714 if (s->send_watchdog || s->stdout_streams_notify_queue)
1715 return 0;
1716
1717 /* There was nothing to do anymore, let's turn ourselves off. */
1718 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1719 if (r < 0)
1720 return log_error_errno(r, "Failed to turn off notify event source: %m");
1721
1722 return 0;
1723 }
1724
1725 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1726 Server *s = userdata;
1727 int r;
1728
1729 assert(s);
1730
1731 s->send_watchdog = true;
1732
1733 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1734 if (r < 0)
1735 log_warning_errno(r, "Failed to turn on notify event source: %m");
1736
1737 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1738 if (r < 0)
1739 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1740
1741 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1742 if (r < 0)
1743 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1744
1745 return 0;
1746 }
1747
1748 static int server_connect_notify(Server *s) {
1749 union sockaddr_union sa = {};
1750 const char *e;
1751 int r, salen;
1752
1753 assert(s);
1754 assert(s->notify_fd < 0);
1755 assert(!s->notify_event_source);
1756
1757 /*
1758 So here's the problem: we'd like to send notification
1759 messages to PID 1, but we cannot do that via sd_notify(),
1760 since that's synchronous, and we might end up blocking on
1761 it. Specifically: given that PID 1 might block on
1762 dbus-daemon during IPC, and dbus-daemon is logging to us,
1763 and might hence block on us, we might end up in a deadlock
1764 if we block on sending PID 1 notification messages — by
1765 generating a full blocking circle. To avoid this, let's
1766 create a non-blocking socket, and connect it to the
1767 notification socket, and then wait for POLLOUT before we
1768 send anything. This should efficiently avoid any deadlocks,
1769 as we'll never block on PID 1, hence PID 1 can safely block
1770 on dbus-daemon which can safely block on us again.
1771
1772 Don't think that this issue is real? It is, see:
1773 https://github.com/systemd/systemd/issues/1505
1774 */
1775
1776 e = getenv("NOTIFY_SOCKET");
1777 if (!e)
1778 return 0;
1779
1780 salen = sockaddr_un_set_path(&sa.un, e);
1781 if (salen < 0)
1782 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1783
1784 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1785 if (s->notify_fd < 0)
1786 return log_error_errno(errno, "Failed to create notify socket: %m");
1787
1788 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1789
1790 r = connect(s->notify_fd, &sa.sa, salen);
1791 if (r < 0)
1792 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1793
1794 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1795 if (r < 0)
1796 return log_error_errno(r, "Failed to watch notification socket: %m");
1797
1798 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1799 s->send_watchdog = true;
1800
1801 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1802 if (r < 0)
1803 return log_error_errno(r, "Failed to add watchdog time event: %m");
1804 }
1805
1806 /* This should fire pretty soon, which we'll use to send the
1807 * READY=1 event. */
1808
1809 return 0;
1810 }
1811
1812 int server_init(Server *s) {
1813 _cleanup_fdset_free_ FDSet *fds = NULL;
1814 int n, r, fd;
1815 bool no_sockets;
1816
1817 assert(s);
1818
1819 zero(*s);
1820 s->syslog_fd = s->native_fd = s->stdout_fd = s->dev_kmsg_fd = s->audit_fd = s->hostname_fd = s->notify_fd = -1;
1821 s->compress.enabled = true;
1822 s->compress.threshold_bytes = (uint64_t) -1;
1823 s->seal = true;
1824 s->read_kmsg = true;
1825
1826 s->watchdog_usec = USEC_INFINITY;
1827
1828 s->sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC;
1829 s->sync_scheduled = false;
1830
1831 s->rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL;
1832 s->rate_limit_burst = DEFAULT_RATE_LIMIT_BURST;
1833
1834 s->forward_to_wall = true;
1835
1836 s->max_file_usec = DEFAULT_MAX_FILE_USEC;
1837
1838 s->max_level_store = LOG_DEBUG;
1839 s->max_level_syslog = LOG_DEBUG;
1840 s->max_level_kmsg = LOG_NOTICE;
1841 s->max_level_console = LOG_INFO;
1842 s->max_level_wall = LOG_EMERG;
1843
1844 s->line_max = DEFAULT_LINE_MAX;
1845
1846 journal_reset_metrics(&s->system_storage.metrics);
1847 journal_reset_metrics(&s->runtime_storage.metrics);
1848
1849 server_parse_config_file(s);
1850
1851 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1852 if (r < 0)
1853 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1854
1855 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1856 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1857 s->rate_limit_interval, s->rate_limit_burst);
1858 s->rate_limit_interval = s->rate_limit_burst = 0;
1859 }
1860
1861 (void) mkdir_p("/run/systemd/journal", 0755);
1862
1863 s->user_journals = ordered_hashmap_new(NULL);
1864 if (!s->user_journals)
1865 return log_oom();
1866
1867 s->mmap = mmap_cache_new();
1868 if (!s->mmap)
1869 return log_oom();
1870
1871 s->deferred_closes = set_new(NULL);
1872 if (!s->deferred_closes)
1873 return log_oom();
1874
1875 r = sd_event_default(&s->event);
1876 if (r < 0)
1877 return log_error_errno(r, "Failed to create event loop: %m");
1878
1879 n = sd_listen_fds(true);
1880 if (n < 0)
1881 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1882
1883 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1884
1885 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1886
1887 if (s->native_fd >= 0) {
1888 log_error("Too many native sockets passed.");
1889 return -EINVAL;
1890 }
1891
1892 s->native_fd = fd;
1893
1894 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1895
1896 if (s->stdout_fd >= 0) {
1897 log_error("Too many stdout sockets passed.");
1898 return -EINVAL;
1899 }
1900
1901 s->stdout_fd = fd;
1902
1903 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1904 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1905
1906 if (s->syslog_fd >= 0) {
1907 log_error("Too many /dev/log sockets passed.");
1908 return -EINVAL;
1909 }
1910
1911 s->syslog_fd = fd;
1912
1913 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1914
1915 if (s->audit_fd >= 0) {
1916 log_error("Too many audit sockets passed.");
1917 return -EINVAL;
1918 }
1919
1920 s->audit_fd = fd;
1921
1922 } else {
1923
1924 if (!fds) {
1925 fds = fdset_new();
1926 if (!fds)
1927 return log_oom();
1928 }
1929
1930 r = fdset_put(fds, fd);
1931 if (r < 0)
1932 return log_oom();
1933 }
1934 }
1935
1936 /* Try to restore streams, but don't bother if this fails */
1937 (void) server_restore_streams(s, fds);
1938
1939 if (fdset_size(fds) > 0) {
1940 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1941 fds = fdset_free(fds);
1942 }
1943
1944 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1945
1946 /* always open stdout, syslog, native, and kmsg sockets */
1947
1948 /* systemd-journald.socket: /run/systemd/journal/stdout */
1949 r = server_open_stdout_socket(s);
1950 if (r < 0)
1951 return r;
1952
1953 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1954 r = server_open_syslog_socket(s);
1955 if (r < 0)
1956 return r;
1957
1958 /* systemd-journald.socket: /run/systemd/journal/socket */
1959 r = server_open_native_socket(s);
1960 if (r < 0)
1961 return r;
1962
1963 /* /dev/kmsg */
1964 r = server_open_dev_kmsg(s);
1965 if (r < 0)
1966 return r;
1967
1968 /* Unless we got *some* sockets and not audit, open audit socket */
1969 if (s->audit_fd >= 0 || no_sockets) {
1970 r = server_open_audit(s);
1971 if (r < 0)
1972 return r;
1973 }
1974
1975 r = server_open_kernel_seqnum(s);
1976 if (r < 0)
1977 return r;
1978
1979 r = server_open_hostname(s);
1980 if (r < 0)
1981 return r;
1982
1983 r = setup_signals(s);
1984 if (r < 0)
1985 return r;
1986
1987 s->rate_limit = journal_rate_limit_new();
1988 if (!s->rate_limit)
1989 return -ENOMEM;
1990
1991 r = cg_get_root_path(&s->cgroup_root);
1992 if (r < 0)
1993 return r;
1994
1995 server_cache_hostname(s);
1996 server_cache_boot_id(s);
1997 server_cache_machine_id(s);
1998
1999 s->runtime_storage.name = "Runtime journal";
2000 s->system_storage.name = "System journal";
2001
2002 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2003 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2004 if (!s->runtime_storage.path || !s->system_storage.path)
2005 return -ENOMEM;
2006
2007 (void) server_connect_notify(s);
2008
2009 (void) client_context_acquire_default(s);
2010
2011 return system_journal_open(s, false);
2012 }
2013
2014 void server_maybe_append_tags(Server *s) {
2015 #if HAVE_GCRYPT
2016 JournalFile *f;
2017 Iterator i;
2018 usec_t n;
2019
2020 n = now(CLOCK_REALTIME);
2021
2022 if (s->system_journal)
2023 journal_file_maybe_append_tag(s->system_journal, n);
2024
2025 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2026 journal_file_maybe_append_tag(f, n);
2027 #endif
2028 }
2029
2030 void server_done(Server *s) {
2031 assert(s);
2032
2033 set_free_with_destructor(s->deferred_closes, journal_file_close);
2034
2035 while (s->stdout_streams)
2036 stdout_stream_free(s->stdout_streams);
2037
2038 client_context_flush_all(s);
2039
2040 if (s->system_journal)
2041 (void) journal_file_close(s->system_journal);
2042
2043 if (s->runtime_journal)
2044 (void) journal_file_close(s->runtime_journal);
2045
2046 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2047
2048 sd_event_source_unref(s->syslog_event_source);
2049 sd_event_source_unref(s->native_event_source);
2050 sd_event_source_unref(s->stdout_event_source);
2051 sd_event_source_unref(s->dev_kmsg_event_source);
2052 sd_event_source_unref(s->audit_event_source);
2053 sd_event_source_unref(s->sync_event_source);
2054 sd_event_source_unref(s->sigusr1_event_source);
2055 sd_event_source_unref(s->sigusr2_event_source);
2056 sd_event_source_unref(s->sigterm_event_source);
2057 sd_event_source_unref(s->sigint_event_source);
2058 sd_event_source_unref(s->sigrtmin1_event_source);
2059 sd_event_source_unref(s->hostname_event_source);
2060 sd_event_source_unref(s->notify_event_source);
2061 sd_event_source_unref(s->watchdog_event_source);
2062 sd_event_unref(s->event);
2063
2064 safe_close(s->syslog_fd);
2065 safe_close(s->native_fd);
2066 safe_close(s->stdout_fd);
2067 safe_close(s->dev_kmsg_fd);
2068 safe_close(s->audit_fd);
2069 safe_close(s->hostname_fd);
2070 safe_close(s->notify_fd);
2071
2072 if (s->rate_limit)
2073 journal_rate_limit_free(s->rate_limit);
2074
2075 if (s->kernel_seqnum)
2076 munmap(s->kernel_seqnum, sizeof(uint64_t));
2077
2078 free(s->buffer);
2079 free(s->tty_path);
2080 free(s->cgroup_root);
2081 free(s->hostname_field);
2082 free(s->runtime_storage.path);
2083 free(s->system_storage.path);
2084
2085 if (s->mmap)
2086 mmap_cache_unref(s->mmap);
2087 }
2088
2089 static const char* const storage_table[_STORAGE_MAX] = {
2090 [STORAGE_AUTO] = "auto",
2091 [STORAGE_VOLATILE] = "volatile",
2092 [STORAGE_PERSISTENT] = "persistent",
2093 [STORAGE_NONE] = "none"
2094 };
2095
2096 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2097 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2098
2099 static const char* const split_mode_table[_SPLIT_MAX] = {
2100 [SPLIT_LOGIN] = "login",
2101 [SPLIT_UID] = "uid",
2102 [SPLIT_NONE] = "none",
2103 };
2104
2105 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2106 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2107
2108 int config_parse_line_max(
2109 const char* unit,
2110 const char *filename,
2111 unsigned line,
2112 const char *section,
2113 unsigned section_line,
2114 const char *lvalue,
2115 int ltype,
2116 const char *rvalue,
2117 void *data,
2118 void *userdata) {
2119
2120 size_t *sz = data;
2121 int r;
2122
2123 assert(filename);
2124 assert(lvalue);
2125 assert(rvalue);
2126 assert(data);
2127
2128 if (isempty(rvalue))
2129 /* Empty assignment means default */
2130 *sz = DEFAULT_LINE_MAX;
2131 else {
2132 uint64_t v;
2133
2134 r = parse_size(rvalue, 1024, &v);
2135 if (r < 0) {
2136 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2137 return 0;
2138 }
2139
2140 if (v < 79) {
2141 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2142 * terminal size is 80ch, and it might make sense to break one character before the natural
2143 * line break would occur on that. */
2144 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2145 *sz = 79;
2146 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2147 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2148 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2149 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2150 * fail much earlier anyway. */
2151 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2152 *sz = SSIZE_MAX-1;
2153 } else
2154 *sz = (size_t) v;
2155 }
2156
2157 return 0;
2158 }
2159
2160 int config_parse_compress(const char* unit,
2161 const char *filename,
2162 unsigned line,
2163 const char *section,
2164 unsigned section_line,
2165 const char *lvalue,
2166 int ltype,
2167 const char *rvalue,
2168 void *data,
2169 void *userdata) {
2170 JournalCompressOptions* compress = data;
2171 int r;
2172
2173 if (streq(rvalue, "1")) {
2174 log_syntax(unit, LOG_WARNING, filename, line, 0,
2175 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2176 compress->enabled = true;
2177 } else if (streq(rvalue, "0")) {
2178 log_syntax(unit, LOG_WARNING, filename, line, 0,
2179 "Compress= ambiguously specified as 0, disabling compression");
2180 compress->enabled = false;
2181 } else if ((r = parse_boolean(rvalue)) >= 0)
2182 compress->enabled = r;
2183 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2184 compress->enabled = true;
2185 else if (isempty(rvalue)) {
2186 compress->enabled = true;
2187 compress->threshold_bytes = (uint64_t) -1;
2188 } else
2189 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2190
2191 return 0;
2192 }