]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/journal/journald-server.c
journald: no need to check ptr for non-NULL before _unref(), as function does that...
[thirdparty/systemd.git] / src / journal / journald-server.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #if HAVE_SELINUX
4 #include <selinux/selinux.h>
5 #endif
6 #include <sys/ioctl.h>
7 #include <sys/mman.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
11
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
15
16 #include "acl-util.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
23 #include "fd-util.h"
24 #include "fileio.h"
25 #include "format-util.h"
26 #include "fs-util.h"
27 #include "hashmap.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
30 #include "io-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
43 #include "log.h"
44 #include "missing.h"
45 #include "mkdir.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
49 #include "rm-rf.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
58
59 #define USER_JOURNALS_MAX 1024
60
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
65
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
67
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
69
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
72
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
76
77 #define DEFERRED_CLOSES_MAX (4096)
78
79 static int determine_path_usage(Server *s, const char *path, uint64_t *ret_used, uint64_t *ret_free) {
80 _cleanup_closedir_ DIR *d = NULL;
81 struct dirent *de;
82 struct statvfs ss;
83
84 assert(ret_used);
85 assert(ret_free);
86
87 d = opendir(path);
88 if (!d)
89 return log_full_errno(errno == ENOENT ? LOG_DEBUG : LOG_ERR,
90 errno, "Failed to open %s: %m", path);
91
92 if (fstatvfs(dirfd(d), &ss) < 0)
93 return log_error_errno(errno, "Failed to fstatvfs(%s): %m", path);
94
95 *ret_free = ss.f_bsize * ss.f_bavail;
96 *ret_used = 0;
97 FOREACH_DIRENT_ALL(de, d, break) {
98 struct stat st;
99
100 if (!endswith(de->d_name, ".journal") &&
101 !endswith(de->d_name, ".journal~"))
102 continue;
103
104 if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0) {
105 log_debug_errno(errno, "Failed to stat %s/%s, ignoring: %m", path, de->d_name);
106 continue;
107 }
108
109 if (!S_ISREG(st.st_mode))
110 continue;
111
112 *ret_used += (uint64_t) st.st_blocks * 512UL;
113 }
114
115 return 0;
116 }
117
118 static void cache_space_invalidate(JournalStorageSpace *space) {
119 zero(*space);
120 }
121
122 static int cache_space_refresh(Server *s, JournalStorage *storage) {
123 JournalStorageSpace *space;
124 JournalMetrics *metrics;
125 uint64_t vfs_used, vfs_avail, avail;
126 usec_t ts;
127 int r;
128
129 assert(s);
130
131 metrics = &storage->metrics;
132 space = &storage->space;
133
134 ts = now(CLOCK_MONOTONIC);
135
136 if (space->timestamp != 0 && space->timestamp + RECHECK_SPACE_USEC > ts)
137 return 0;
138
139 r = determine_path_usage(s, storage->path, &vfs_used, &vfs_avail);
140 if (r < 0)
141 return r;
142
143 space->vfs_used = vfs_used;
144 space->vfs_available = vfs_avail;
145
146 avail = LESS_BY(vfs_avail, metrics->keep_free);
147
148 space->limit = MIN(MAX(vfs_used + avail, metrics->min_use), metrics->max_use);
149 space->available = LESS_BY(space->limit, vfs_used);
150 space->timestamp = ts;
151 return 1;
152 }
153
154 static void patch_min_use(JournalStorage *storage) {
155 assert(storage);
156
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
162
163 storage->metrics.min_use = MAX(storage->metrics.min_use, storage->space.vfs_used);
164 }
165
166 static int determine_space(Server *s, uint64_t *available, uint64_t *limit) {
167 JournalStorage *js;
168 int r;
169
170 assert(s);
171
172 js = s->system_journal ? &s->system_storage : &s->runtime_storage;
173
174 r = cache_space_refresh(s, js);
175 if (r >= 0) {
176 if (available)
177 *available = js->space.available;
178 if (limit)
179 *limit = js->space.limit;
180 }
181 return r;
182 }
183
184 void server_space_usage_message(Server *s, JournalStorage *storage) {
185 char fb1[FORMAT_BYTES_MAX], fb2[FORMAT_BYTES_MAX], fb3[FORMAT_BYTES_MAX],
186 fb4[FORMAT_BYTES_MAX], fb5[FORMAT_BYTES_MAX], fb6[FORMAT_BYTES_MAX];
187 JournalMetrics *metrics;
188
189 assert(s);
190
191 if (!storage)
192 storage = s->system_journal ? &s->system_storage : &s->runtime_storage;
193
194 if (cache_space_refresh(s, storage) < 0)
195 return;
196
197 metrics = &storage->metrics;
198 format_bytes(fb1, sizeof(fb1), storage->space.vfs_used);
199 format_bytes(fb2, sizeof(fb2), metrics->max_use);
200 format_bytes(fb3, sizeof(fb3), metrics->keep_free);
201 format_bytes(fb4, sizeof(fb4), storage->space.vfs_available);
202 format_bytes(fb5, sizeof(fb5), storage->space.limit);
203 format_bytes(fb6, sizeof(fb6), storage->space.available);
204
205 server_driver_message(s, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage->name, storage->path, fb1, fb5, fb6),
209 "JOURNAL_NAME=%s", storage->name,
210 "JOURNAL_PATH=%s", storage->path,
211 "CURRENT_USE=%"PRIu64, storage->space.vfs_used,
212 "CURRENT_USE_PRETTY=%s", fb1,
213 "MAX_USE=%"PRIu64, metrics->max_use,
214 "MAX_USE_PRETTY=%s", fb2,
215 "DISK_KEEP_FREE=%"PRIu64, metrics->keep_free,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3,
217 "DISK_AVAILABLE=%"PRIu64, storage->space.vfs_available,
218 "DISK_AVAILABLE_PRETTY=%s", fb4,
219 "LIMIT=%"PRIu64, storage->space.limit,
220 "LIMIT_PRETTY=%s", fb5,
221 "AVAILABLE=%"PRIu64, storage->space.available,
222 "AVAILABLE_PRETTY=%s", fb6,
223 NULL);
224 }
225
226 static bool uid_for_system_journal(uid_t uid) {
227
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
229
230 return uid_is_system(uid) || uid_is_dynamic(uid) || uid == UID_NOBODY;
231 }
232
233 static void server_add_acls(JournalFile *f, uid_t uid) {
234 #if HAVE_ACL
235 int r;
236 #endif
237 assert(f);
238
239 #if HAVE_ACL
240 if (uid_for_system_journal(uid))
241 return;
242
243 r = add_acls_for_user(f->fd, uid);
244 if (r < 0)
245 log_warning_errno(r, "Failed to set ACL on %s, ignoring: %m", f->path);
246 #endif
247 }
248
249 static int open_journal(
250 Server *s,
251 bool reliably,
252 const char *fname,
253 int flags,
254 bool seal,
255 JournalMetrics *metrics,
256 JournalFile **ret) {
257
258 JournalFile *f;
259 int r;
260
261 assert(s);
262 assert(fname);
263 assert(ret);
264
265 if (reliably)
266 r = journal_file_open_reliably(fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes,
267 seal, metrics, s->mmap, s->deferred_closes, NULL, &f);
268 else
269 r = journal_file_open(-1, fname, flags, 0640, s->compress.enabled, s->compress.threshold_bytes, seal,
270 metrics, s->mmap, s->deferred_closes, NULL, &f);
271
272 if (r < 0)
273 return r;
274
275 r = journal_file_enable_post_change_timer(f, s->event, POST_CHANGE_TIMER_INTERVAL_USEC);
276 if (r < 0) {
277 (void) journal_file_close(f);
278 return r;
279 }
280
281 *ret = f;
282 return r;
283 }
284
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK) >= 0;
287 }
288
289 static int system_journal_open(Server *s, bool flush_requested) {
290 const char *fn;
291 int r = 0;
292
293 if (!s->system_journal &&
294 IN_SET(s->storage, STORAGE_PERSISTENT, STORAGE_AUTO) &&
295 (flush_requested || flushed_flag_is_set())) {
296
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
299 *
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
302
303 if (s->storage == STORAGE_PERSISTENT)
304 (void) mkdir_p("/var/log/journal/", 0755);
305
306 (void) mkdir(s->system_storage.path, 0755);
307
308 fn = strjoina(s->system_storage.path, "/system.journal");
309 r = open_journal(s, true, fn, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &s->system_journal);
310 if (r >= 0) {
311 server_add_acls(s->system_journal, 0);
312 (void) cache_space_refresh(s, &s->system_storage);
313 patch_min_use(&s->system_storage);
314 } else {
315 if (!IN_SET(r, -ENOENT, -EROFS))
316 log_warning_errno(r, "Failed to open system journal: %m");
317
318 r = 0;
319 }
320
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
324 *
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
327 */
328 if (!flush_requested)
329 (void) server_flush_to_var(s, true);
330 }
331
332 if (!s->runtime_journal &&
333 (s->storage != STORAGE_NONE)) {
334
335 fn = strjoina(s->runtime_storage.path, "/system.journal");
336
337 if (s->system_journal) {
338
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
342
343 r = open_journal(s, false, fn, O_RDWR, false, &s->runtime_storage.metrics, &s->runtime_journal);
344 if (r < 0) {
345 if (r != -ENOENT)
346 log_warning_errno(r, "Failed to open runtime journal: %m");
347
348 r = 0;
349 }
350
351 } else {
352
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
355
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn, 0750);
359
360 r = open_journal(s, true, fn, O_RDWR|O_CREAT, false, &s->runtime_storage.metrics, &s->runtime_journal);
361 if (r < 0)
362 return log_error_errno(r, "Failed to open runtime journal: %m");
363 }
364
365 if (s->runtime_journal) {
366 server_add_acls(s->runtime_journal, 0);
367 (void) cache_space_refresh(s, &s->runtime_storage);
368 patch_min_use(&s->runtime_storage);
369 }
370 }
371
372 return r;
373 }
374
375 static JournalFile* find_journal(Server *s, uid_t uid) {
376 _cleanup_free_ char *p = NULL;
377 int r;
378 JournalFile *f;
379 sd_id128_t machine;
380
381 assert(s);
382
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
386 *
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
390 *
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s, false);
393
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
398
399 if (s->runtime_journal)
400 return s->runtime_journal;
401
402 if (uid_for_system_journal(uid))
403 return s->system_journal;
404
405 f = ordered_hashmap_get(s->user_journals, UID_TO_PTR(uid));
406 if (f)
407 return f;
408
409 r = sd_id128_get_machine(&machine);
410 if (r < 0) {
411 log_debug_errno(r, "Failed to determine machine ID, using system log: %m");
412 return s->system_journal;
413 }
414
415 if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/user-"UID_FMT".journal",
416 SD_ID128_FORMAT_VAL(machine), uid) < 0) {
417 log_oom();
418 return s->system_journal;
419 }
420
421 while (ordered_hashmap_size(s->user_journals) >= USER_JOURNALS_MAX) {
422 /* Too many open? Then let's close one */
423 f = ordered_hashmap_steal_first(s->user_journals);
424 assert(f);
425 (void) journal_file_close(f);
426 }
427
428 r = open_journal(s, true, p, O_RDWR|O_CREAT, s->seal, &s->system_storage.metrics, &f);
429 if (r < 0)
430 return s->system_journal;
431
432 server_add_acls(f, uid);
433
434 r = ordered_hashmap_put(s->user_journals, UID_TO_PTR(uid), f);
435 if (r < 0) {
436 (void) journal_file_close(f);
437 return s->system_journal;
438 }
439
440 return f;
441 }
442
443 static int do_rotate(
444 Server *s,
445 JournalFile **f,
446 const char* name,
447 bool seal,
448 uint32_t uid) {
449
450 int r;
451 assert(s);
452
453 if (!*f)
454 return -EINVAL;
455
456 r = journal_file_rotate(f, s->compress.enabled, s->compress.threshold_bytes, seal, s->deferred_closes);
457 if (r < 0) {
458 if (*f)
459 return log_error_errno(r, "Failed to rotate %s: %m", (*f)->path);
460 else
461 return log_error_errno(r, "Failed to create new %s journal: %m", name);
462 }
463
464 server_add_acls(*f, uid);
465
466 return r;
467 }
468
469 static void server_process_deferred_closes(Server *s) {
470 JournalFile *f;
471 Iterator i;
472
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f, s->deferred_closes, i) {
475 if (journal_file_is_offlining(f))
476 continue;
477
478 (void) set_remove(s->deferred_closes, f);
479 (void) journal_file_close(f);
480 }
481 }
482
483 static void server_vacuum_deferred_closes(Server *s) {
484 assert(s);
485
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s->deferred_closes) < DEFERRED_CLOSES_MAX)
488 return;
489
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s);
492
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s->deferred_closes) >= DEFERRED_CLOSES_MAX) {
495 JournalFile *f;
496
497 assert_se(f = set_steal_first(s->deferred_closes));
498 journal_file_close(f);
499 }
500 }
501
502 static int open_user_journal_directory(Server *s, DIR **ret_dir, char **ret_path) {
503 _cleanup_closedir_ DIR *dir = NULL;
504 _cleanup_free_ char *path = NULL;
505 sd_id128_t machine;
506 int r;
507
508 assert(s);
509
510 r = sd_id128_get_machine(&machine);
511 if (r < 0)
512 return log_error_errno(r, "Failed to determine machine ID, ignoring: %m");
513
514 if (asprintf(&path, "/var/log/journal/" SD_ID128_FORMAT_STR "/", SD_ID128_FORMAT_VAL(machine)) < 0)
515 return log_oom();
516
517 dir = opendir(path);
518 if (!dir)
519 return log_error_errno(errno, "Failed to open user journal directory '%s': %m", path);
520
521 if (ret_dir)
522 *ret_dir = TAKE_PTR(dir);
523 if (ret_path)
524 *ret_path = TAKE_PTR(path);
525
526 return 0;
527 }
528
529 void server_rotate(Server *s) {
530 _cleanup_free_ char *path = NULL;
531 _cleanup_closedir_ DIR *d = NULL;
532 JournalFile *f;
533 Iterator i;
534 void *k;
535 int r;
536
537 log_debug("Rotating...");
538
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s, &s->runtime_journal, "runtime", false, 0);
541 (void) do_rotate(s, &s->system_journal, "system", s->seal, 0);
542
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f, k, s->user_journals, i) {
545 r = do_rotate(s, &f, "user", s->seal, PTR_TO_UID(k));
546 if (r >= 0)
547 ordered_hashmap_replace(s->user_journals, k, f);
548 else if (!f)
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s->user_journals, k);
551 }
552
553 /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
554 * access to /var, i.e. are not in the log-to-runtime-journal mode). */
555 if (!s->runtime_journal &&
556 open_user_journal_directory(s, &d, &path) >= 0) {
557
558 struct dirent *de;
559
560 FOREACH_DIRENT(de, d, log_warning_errno(errno, "Failed to enumerate %s, ignoring: %m", path)) {
561 _cleanup_free_ char *u = NULL, *full = NULL;
562 _cleanup_close_ int fd = -1;
563 const char *a, *b;
564 uid_t uid;
565
566 a = startswith(de->d_name, "user-");
567 if (!a)
568 continue;
569 b = endswith(de->d_name, ".journal");
570 if (!b)
571 continue;
572
573 u = strndup(a, b-a);
574 if (!u) {
575 log_oom();
576 break;
577 }
578
579 r = parse_uid(u, &uid);
580 if (r < 0) {
581 log_debug_errno(r, "Failed to parse UID from file name '%s', ignoring: %m", de->d_name);
582 continue;
583 }
584
585 /* Already rotated in the above loop? i.e. is it an open user journal? */
586 if (ordered_hashmap_contains(s->user_journals, UID_TO_PTR(uid)))
587 continue;
588
589 full = strjoin(path, de->d_name);
590 if (!full) {
591 log_oom();
592 break;
593 }
594
595 fd = openat(dirfd(d), de->d_name, O_RDWR|O_CLOEXEC|O_NOCTTY|O_NOFOLLOW|O_NONBLOCK);
596 if (fd < 0) {
597 log_full_errno(IN_SET(errno, ELOOP, ENOENT) ? LOG_DEBUG : LOG_WARNING, errno,
598 "Failed to open journal file '%s' for rotation: %m", full);
599 continue;
600 }
601
602 /* Make some room in the set of deferred close()s */
603 server_vacuum_deferred_closes(s);
604
605 /* Open the file briefly, so that we can archive it */
606 r = journal_file_open(fd,
607 full,
608 O_RDWR,
609 0640,
610 s->compress.enabled,
611 s->compress.threshold_bytes,
612 s->seal,
613 &s->system_storage.metrics,
614 s->mmap,
615 s->deferred_closes,
616 NULL,
617 &f);
618 if (r < 0) {
619 log_warning_errno(r, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full);
620
621 r = journal_file_dispose(dirfd(d), de->d_name);
622 if (r < 0)
623 log_warning_errno(r, "Failed to move %s out of the way, ignoring: %m", full);
624 else
625 log_debug("Successfully moved %s out of the way.", full);
626
627 continue;
628 }
629
630 TAKE_FD(fd); /* Donated to journal_file_open() */
631
632 r = journal_file_archive(f);
633 if (r < 0)
634 log_debug_errno(r, "Failed to archive journal file '%s', ignoring: %m", full);
635
636 f = journal_initiate_close(f, s->deferred_closes);
637 }
638 }
639
640 server_process_deferred_closes(s);
641 }
642
643 void server_sync(Server *s) {
644 JournalFile *f;
645 Iterator i;
646 int r;
647
648 if (s->system_journal) {
649 r = journal_file_set_offline(s->system_journal, false);
650 if (r < 0)
651 log_warning_errno(r, "Failed to sync system journal, ignoring: %m");
652 }
653
654 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i) {
655 r = journal_file_set_offline(f, false);
656 if (r < 0)
657 log_warning_errno(r, "Failed to sync user journal, ignoring: %m");
658 }
659
660 if (s->sync_event_source) {
661 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_OFF);
662 if (r < 0)
663 log_error_errno(r, "Failed to disable sync timer source: %m");
664 }
665
666 s->sync_scheduled = false;
667 }
668
669 static void do_vacuum(Server *s, JournalStorage *storage, bool verbose) {
670
671 int r;
672
673 assert(s);
674 assert(storage);
675
676 (void) cache_space_refresh(s, storage);
677
678 if (verbose)
679 server_space_usage_message(s, storage);
680
681 r = journal_directory_vacuum(storage->path, storage->space.limit,
682 storage->metrics.n_max_files, s->max_retention_usec,
683 &s->oldest_file_usec, verbose);
684 if (r < 0 && r != -ENOENT)
685 log_warning_errno(r, "Failed to vacuum %s, ignoring: %m", storage->path);
686
687 cache_space_invalidate(&storage->space);
688 }
689
690 int server_vacuum(Server *s, bool verbose) {
691 assert(s);
692
693 log_debug("Vacuuming...");
694
695 s->oldest_file_usec = 0;
696
697 if (s->system_journal)
698 do_vacuum(s, &s->system_storage, verbose);
699 if (s->runtime_journal)
700 do_vacuum(s, &s->runtime_storage, verbose);
701
702 return 0;
703 }
704
705 static void server_cache_machine_id(Server *s) {
706 sd_id128_t id;
707 int r;
708
709 assert(s);
710
711 r = sd_id128_get_machine(&id);
712 if (r < 0)
713 return;
714
715 sd_id128_to_string(id, stpcpy(s->machine_id_field, "_MACHINE_ID="));
716 }
717
718 static void server_cache_boot_id(Server *s) {
719 sd_id128_t id;
720 int r;
721
722 assert(s);
723
724 r = sd_id128_get_boot(&id);
725 if (r < 0)
726 return;
727
728 sd_id128_to_string(id, stpcpy(s->boot_id_field, "_BOOT_ID="));
729 }
730
731 static void server_cache_hostname(Server *s) {
732 _cleanup_free_ char *t = NULL;
733 char *x;
734
735 assert(s);
736
737 t = gethostname_malloc();
738 if (!t)
739 return;
740
741 x = strappend("_HOSTNAME=", t);
742 if (!x)
743 return;
744
745 free(s->hostname_field);
746 s->hostname_field = x;
747 }
748
749 static bool shall_try_append_again(JournalFile *f, int r) {
750 switch(r) {
751
752 case -E2BIG: /* Hit configured limit */
753 case -EFBIG: /* Hit fs limit */
754 case -EDQUOT: /* Quota limit hit */
755 case -ENOSPC: /* Disk full */
756 log_debug("%s: Allocation limit reached, rotating.", f->path);
757 return true;
758
759 case -EIO: /* I/O error of some kind (mmap) */
760 log_warning("%s: IO error, rotating.", f->path);
761 return true;
762
763 case -EHOSTDOWN: /* Other machine */
764 log_info("%s: Journal file from other machine, rotating.", f->path);
765 return true;
766
767 case -EBUSY: /* Unclean shutdown */
768 log_info("%s: Unclean shutdown, rotating.", f->path);
769 return true;
770
771 case -EPROTONOSUPPORT: /* Unsupported feature */
772 log_info("%s: Unsupported feature, rotating.", f->path);
773 return true;
774
775 case -EBADMSG: /* Corrupted */
776 case -ENODATA: /* Truncated */
777 case -ESHUTDOWN: /* Already archived */
778 log_warning("%s: Journal file corrupted, rotating.", f->path);
779 return true;
780
781 case -EIDRM: /* Journal file has been deleted */
782 log_warning("%s: Journal file has been deleted, rotating.", f->path);
783 return true;
784
785 case -ETXTBSY: /* Journal file is from the future */
786 log_warning("%s: Journal file is from the future, rotating.", f->path);
787 return true;
788
789 default:
790 return false;
791 }
792 }
793
794 static void write_to_journal(Server *s, uid_t uid, struct iovec *iovec, size_t n, int priority) {
795 bool vacuumed = false, rotate = false;
796 struct dual_timestamp ts;
797 JournalFile *f;
798 int r;
799
800 assert(s);
801 assert(iovec);
802 assert(n > 0);
803
804 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
805 * the source time, and not even the time the event was originally seen, but instead simply the time we started
806 * processing it, as we want strictly linear ordering in what we write out.) */
807 assert_se(sd_event_now(s->event, CLOCK_REALTIME, &ts.realtime) >= 0);
808 assert_se(sd_event_now(s->event, CLOCK_MONOTONIC, &ts.monotonic) >= 0);
809
810 if (ts.realtime < s->last_realtime_clock) {
811 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
812 * regular operation. However, when it does happen, then we should make sure that we start fresh files
813 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
814 * bisection works correctly. */
815
816 log_debug("Time jumped backwards, rotating.");
817 rotate = true;
818 } else {
819
820 f = find_journal(s, uid);
821 if (!f)
822 return;
823
824 if (journal_file_rotate_suggested(f, s->max_file_usec)) {
825 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f->path);
826 rotate = true;
827 }
828 }
829
830 if (rotate) {
831 server_rotate(s);
832 server_vacuum(s, false);
833 vacuumed = true;
834
835 f = find_journal(s, uid);
836 if (!f)
837 return;
838 }
839
840 s->last_realtime_clock = ts.realtime;
841
842 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
843 if (r >= 0) {
844 server_schedule_sync(s, priority);
845 return;
846 }
847
848 if (vacuumed || !shall_try_append_again(f, r)) {
849 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
850 return;
851 }
852
853 server_rotate(s);
854 server_vacuum(s, false);
855
856 f = find_journal(s, uid);
857 if (!f)
858 return;
859
860 log_debug("Retrying write.");
861 r = journal_file_append_entry(f, &ts, NULL, iovec, n, &s->seqnum, NULL, NULL);
862 if (r < 0)
863 log_error_errno(r, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n, IOVEC_TOTAL_SIZE(iovec, n));
864 else
865 server_schedule_sync(s, priority);
866 }
867
868 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
869 if (isset(value)) { \
870 char *k; \
871 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
872 sprintf(k, field "=" format, value); \
873 iovec[n++] = IOVEC_MAKE_STRING(k); \
874 }
875
876 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
877 if (!isempty(value)) { \
878 char *k; \
879 k = strjoina(field "=", value); \
880 iovec[n++] = IOVEC_MAKE_STRING(k); \
881 }
882
883 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
884 if (!sd_id128_is_null(value)) { \
885 char *k; \
886 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
887 sd_id128_to_string(value, stpcpy(k, field "=")); \
888 iovec[n++] = IOVEC_MAKE_STRING(k); \
889 }
890
891 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
892 if (value_size > 0) { \
893 char *k; \
894 k = newa(char, STRLEN(field "=") + value_size + 1); \
895 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
896 iovec[n++] = IOVEC_MAKE_STRING(k); \
897 } \
898
899 static void dispatch_message_real(
900 Server *s,
901 struct iovec *iovec, size_t n, size_t m,
902 const ClientContext *c,
903 const struct timeval *tv,
904 int priority,
905 pid_t object_pid) {
906
907 char source_time[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t)];
908 _cleanup_free_ char *cmdline1 = NULL, *cmdline2 = NULL;
909 uid_t journal_uid;
910 ClientContext *o;
911
912 assert(s);
913 assert(iovec);
914 assert(n > 0);
915 assert(n +
916 N_IOVEC_META_FIELDS +
917 (pid_is_valid(object_pid) ? N_IOVEC_OBJECT_FIELDS : 0) +
918 client_context_extra_fields_n_iovec(c) <= m);
919
920 if (c) {
921 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->pid, pid_t, pid_is_valid, PID_FMT, "_PID");
922 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->uid, uid_t, uid_is_valid, UID_FMT, "_UID");
923 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->gid, gid_t, gid_is_valid, GID_FMT, "_GID");
924
925 IOVEC_ADD_STRING_FIELD(iovec, n, c->comm, "_COMM"); /* At most TASK_COMM_LENGTH (16 bytes) */
926 IOVEC_ADD_STRING_FIELD(iovec, n, c->exe, "_EXE"); /* A path, so at most PATH_MAX (4096 bytes) */
927
928 if (c->cmdline)
929 /* At most _SC_ARG_MAX (2MB usually), which is too much to put on stack.
930 * Let's use a heap allocation for this one. */
931 cmdline1 = set_iovec_string_field(iovec, &n, "_CMDLINE=", c->cmdline);
932
933 IOVEC_ADD_STRING_FIELD(iovec, n, c->capeff, "_CAP_EFFECTIVE"); /* Read from /proc/.../status */
934 IOVEC_ADD_SIZED_FIELD(iovec, n, c->label, c->label_size, "_SELINUX_CONTEXT");
935 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "_AUDIT_SESSION");
936 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->loginuid, uid_t, uid_is_valid, UID_FMT, "_AUDIT_LOGINUID");
937
938 IOVEC_ADD_STRING_FIELD(iovec, n, c->cgroup, "_SYSTEMD_CGROUP"); /* A path */
939 IOVEC_ADD_STRING_FIELD(iovec, n, c->session, "_SYSTEMD_SESSION");
940 IOVEC_ADD_NUMERIC_FIELD(iovec, n, c->owner_uid, uid_t, uid_is_valid, UID_FMT, "_SYSTEMD_OWNER_UID");
941 IOVEC_ADD_STRING_FIELD(iovec, n, c->unit, "_SYSTEMD_UNIT"); /* Unit names are bounded by UNIT_NAME_MAX */
942 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_unit, "_SYSTEMD_USER_UNIT");
943 IOVEC_ADD_STRING_FIELD(iovec, n, c->slice, "_SYSTEMD_SLICE");
944 IOVEC_ADD_STRING_FIELD(iovec, n, c->user_slice, "_SYSTEMD_USER_SLICE");
945
946 IOVEC_ADD_ID128_FIELD(iovec, n, c->invocation_id, "_SYSTEMD_INVOCATION_ID");
947
948 if (c->extra_fields_n_iovec > 0) {
949 memcpy(iovec + n, c->extra_fields_iovec, c->extra_fields_n_iovec * sizeof(struct iovec));
950 n += c->extra_fields_n_iovec;
951 }
952 }
953
954 assert(n <= m);
955
956 if (pid_is_valid(object_pid) && client_context_get(s, object_pid, NULL, NULL, 0, NULL, &o) >= 0) {
957
958 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->pid, pid_t, pid_is_valid, PID_FMT, "OBJECT_PID");
959 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_UID");
960 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->gid, gid_t, gid_is_valid, GID_FMT, "OBJECT_GID");
961
962 /* See above for size limits, only ->cmdline may be large, so use a heap allocation for it. */
963 IOVEC_ADD_STRING_FIELD(iovec, n, o->comm, "OBJECT_COMM");
964 IOVEC_ADD_STRING_FIELD(iovec, n, o->exe, "OBJECT_EXE");
965 if (o->cmdline)
966 cmdline2 = set_iovec_string_field(iovec, &n, "OBJECT_CMDLINE=", o->cmdline);
967
968 IOVEC_ADD_STRING_FIELD(iovec, n, o->capeff, "OBJECT_CAP_EFFECTIVE");
969 IOVEC_ADD_SIZED_FIELD(iovec, n, o->label, o->label_size, "OBJECT_SELINUX_CONTEXT");
970 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->auditid, uint32_t, audit_session_is_valid, "%" PRIu32, "OBJECT_AUDIT_SESSION");
971 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->loginuid, uid_t, uid_is_valid, UID_FMT, "OBJECT_AUDIT_LOGINUID");
972
973 IOVEC_ADD_STRING_FIELD(iovec, n, o->cgroup, "OBJECT_SYSTEMD_CGROUP");
974 IOVEC_ADD_STRING_FIELD(iovec, n, o->session, "OBJECT_SYSTEMD_SESSION");
975 IOVEC_ADD_NUMERIC_FIELD(iovec, n, o->owner_uid, uid_t, uid_is_valid, UID_FMT, "OBJECT_SYSTEMD_OWNER_UID");
976 IOVEC_ADD_STRING_FIELD(iovec, n, o->unit, "OBJECT_SYSTEMD_UNIT");
977 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_unit, "OBJECT_SYSTEMD_USER_UNIT");
978 IOVEC_ADD_STRING_FIELD(iovec, n, o->slice, "OBJECT_SYSTEMD_SLICE");
979 IOVEC_ADD_STRING_FIELD(iovec, n, o->user_slice, "OBJECT_SYSTEMD_USER_SLICE");
980
981 IOVEC_ADD_ID128_FIELD(iovec, n, o->invocation_id, "OBJECT_SYSTEMD_INVOCATION_ID=");
982 }
983
984 assert(n <= m);
985
986 if (tv) {
987 sprintf(source_time, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT, timeval_load(tv));
988 iovec[n++] = IOVEC_MAKE_STRING(source_time);
989 }
990
991 /* Note that strictly speaking storing the boot id here is
992 * redundant since the entry includes this in-line
993 * anyway. However, we need this indexed, too. */
994 if (!isempty(s->boot_id_field))
995 iovec[n++] = IOVEC_MAKE_STRING(s->boot_id_field);
996
997 if (!isempty(s->machine_id_field))
998 iovec[n++] = IOVEC_MAKE_STRING(s->machine_id_field);
999
1000 if (!isempty(s->hostname_field))
1001 iovec[n++] = IOVEC_MAKE_STRING(s->hostname_field);
1002
1003 assert(n <= m);
1004
1005 if (s->split_mode == SPLIT_UID && c && uid_is_valid(c->uid))
1006 /* Split up strictly by (non-root) UID */
1007 journal_uid = c->uid;
1008 else if (s->split_mode == SPLIT_LOGIN && c && c->uid > 0 && uid_is_valid(c->owner_uid))
1009 /* Split up by login UIDs. We do this only if the
1010 * realuid is not root, in order not to accidentally
1011 * leak privileged information to the user that is
1012 * logged by a privileged process that is part of an
1013 * unprivileged session. */
1014 journal_uid = c->owner_uid;
1015 else
1016 journal_uid = 0;
1017
1018 write_to_journal(s, journal_uid, iovec, n, priority);
1019 }
1020
1021 void server_driver_message(Server *s, pid_t object_pid, const char *message_id, const char *format, ...) {
1022
1023 struct iovec *iovec;
1024 size_t n = 0, k, m;
1025 va_list ap;
1026 int r;
1027
1028 assert(s);
1029 assert(format);
1030
1031 m = N_IOVEC_META_FIELDS + 5 + N_IOVEC_PAYLOAD_FIELDS + client_context_extra_fields_n_iovec(s->my_context) + N_IOVEC_OBJECT_FIELDS;
1032 iovec = newa(struct iovec, m);
1033
1034 assert_cc(3 == LOG_FAC(LOG_DAEMON));
1035 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1036 iovec[n++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1037
1038 iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1039 assert_cc(6 == LOG_INFO);
1040 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=6");
1041
1042 if (message_id)
1043 iovec[n++] = IOVEC_MAKE_STRING(message_id);
1044 k = n;
1045
1046 va_start(ap, format);
1047 r = log_format_iovec(iovec, m, &n, false, 0, format, ap);
1048 /* Error handling below */
1049 va_end(ap);
1050
1051 if (r >= 0)
1052 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1053
1054 while (k < n)
1055 free(iovec[k++].iov_base);
1056
1057 if (r < 0) {
1058 /* We failed to format the message. Emit a warning instead. */
1059 char buf[LINE_MAX];
1060
1061 xsprintf(buf, "MESSAGE=Entry printing failed: %s", strerror(-r));
1062
1063 n = 3;
1064 iovec[n++] = IOVEC_MAKE_STRING("PRIORITY=4");
1065 iovec[n++] = IOVEC_MAKE_STRING(buf);
1066 dispatch_message_real(s, iovec, n, m, s->my_context, NULL, LOG_INFO, object_pid);
1067 }
1068 }
1069
1070 void server_dispatch_message(
1071 Server *s,
1072 struct iovec *iovec, size_t n, size_t m,
1073 ClientContext *c,
1074 const struct timeval *tv,
1075 int priority,
1076 pid_t object_pid) {
1077
1078 uint64_t available = 0;
1079 int rl;
1080
1081 assert(s);
1082 assert(iovec || n == 0);
1083
1084 if (n == 0)
1085 return;
1086
1087 if (LOG_PRI(priority) > s->max_level_store)
1088 return;
1089
1090 /* Stop early in case the information will not be stored
1091 * in a journal. */
1092 if (s->storage == STORAGE_NONE)
1093 return;
1094
1095 if (c && c->unit) {
1096 (void) determine_space(s, &available, NULL);
1097
1098 rl = journal_rate_limit_test(s->rate_limit, c->unit, c->log_rate_limit_interval, c->log_rate_limit_burst, priority & LOG_PRIMASK, available);
1099 if (rl == 0)
1100 return;
1101
1102 /* Write a suppression message if we suppressed something */
1103 if (rl > 1)
1104 server_driver_message(s, c->pid,
1105 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR,
1106 LOG_MESSAGE("Suppressed %i messages from %s", rl - 1, c->unit),
1107 "N_DROPPED=%i", rl - 1,
1108 NULL);
1109 }
1110
1111 dispatch_message_real(s, iovec, n, m, c, tv, priority, object_pid);
1112 }
1113
1114 int server_flush_to_var(Server *s, bool require_flag_file) {
1115 sd_id128_t machine;
1116 sd_journal *j = NULL;
1117 char ts[FORMAT_TIMESPAN_MAX];
1118 usec_t start;
1119 unsigned n = 0;
1120 int r;
1121
1122 assert(s);
1123
1124 if (!IN_SET(s->storage, STORAGE_AUTO, STORAGE_PERSISTENT))
1125 return 0;
1126
1127 if (!s->runtime_journal)
1128 return 0;
1129
1130 if (require_flag_file && !flushed_flag_is_set())
1131 return 0;
1132
1133 (void) system_journal_open(s, true);
1134
1135 if (!s->system_journal)
1136 return 0;
1137
1138 log_debug("Flushing to /var...");
1139
1140 start = now(CLOCK_MONOTONIC);
1141
1142 r = sd_id128_get_machine(&machine);
1143 if (r < 0)
1144 return r;
1145
1146 r = sd_journal_open(&j, SD_JOURNAL_RUNTIME_ONLY);
1147 if (r < 0)
1148 return log_error_errno(r, "Failed to read runtime journal: %m");
1149
1150 sd_journal_set_data_threshold(j, 0);
1151
1152 SD_JOURNAL_FOREACH(j) {
1153 Object *o = NULL;
1154 JournalFile *f;
1155
1156 f = j->current_file;
1157 assert(f && f->current_offset > 0);
1158
1159 n++;
1160
1161 r = journal_file_move_to_object(f, OBJECT_ENTRY, f->current_offset, &o);
1162 if (r < 0) {
1163 log_error_errno(r, "Can't read entry: %m");
1164 goto finish;
1165 }
1166
1167 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1168 if (r >= 0)
1169 continue;
1170
1171 if (!shall_try_append_again(s->system_journal, r)) {
1172 log_error_errno(r, "Can't write entry: %m");
1173 goto finish;
1174 }
1175
1176 server_rotate(s);
1177 server_vacuum(s, false);
1178
1179 if (!s->system_journal) {
1180 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1181 r = -EIO;
1182 goto finish;
1183 }
1184
1185 log_debug("Retrying write.");
1186 r = journal_file_copy_entry(f, s->system_journal, o, f->current_offset);
1187 if (r < 0) {
1188 log_error_errno(r, "Can't write entry: %m");
1189 goto finish;
1190 }
1191 }
1192
1193 r = 0;
1194
1195 finish:
1196 if (s->system_journal)
1197 journal_file_post_change(s->system_journal);
1198
1199 s->runtime_journal = journal_file_close(s->runtime_journal);
1200
1201 if (r >= 0)
1202 (void) rm_rf("/run/log/journal", REMOVE_ROOT);
1203
1204 sd_journal_close(j);
1205
1206 server_driver_message(s, 0, NULL,
1207 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1208 format_timespan(ts, sizeof(ts), now(CLOCK_MONOTONIC) - start, 0),
1209 n),
1210 NULL);
1211
1212 return r;
1213 }
1214
1215 int server_process_datagram(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1216 Server *s = userdata;
1217 struct ucred *ucred = NULL;
1218 struct timeval *tv = NULL;
1219 struct cmsghdr *cmsg;
1220 char *label = NULL;
1221 size_t label_len = 0, m;
1222 struct iovec iovec;
1223 ssize_t n;
1224 int *fds = NULL, v = 0;
1225 size_t n_fds = 0;
1226
1227 union {
1228 struct cmsghdr cmsghdr;
1229
1230 /* We use NAME_MAX space for the SELinux label
1231 * here. The kernel currently enforces no
1232 * limit, but according to suggestions from
1233 * the SELinux people this will change and it
1234 * will probably be identical to NAME_MAX. For
1235 * now we use that, but this should be updated
1236 * one day when the final limit is known. */
1237 uint8_t buf[CMSG_SPACE(sizeof(struct ucred)) +
1238 CMSG_SPACE(sizeof(struct timeval)) +
1239 CMSG_SPACE(sizeof(int)) + /* fd */
1240 CMSG_SPACE(NAME_MAX)]; /* selinux label */
1241 } control = {};
1242
1243 union sockaddr_union sa = {};
1244
1245 struct msghdr msghdr = {
1246 .msg_iov = &iovec,
1247 .msg_iovlen = 1,
1248 .msg_control = &control,
1249 .msg_controllen = sizeof(control),
1250 .msg_name = &sa,
1251 .msg_namelen = sizeof(sa),
1252 };
1253
1254 assert(s);
1255 assert(fd == s->native_fd || fd == s->syslog_fd || fd == s->audit_fd);
1256
1257 if (revents != EPOLLIN)
1258 return log_error_errno(SYNTHETIC_ERRNO(EIO),
1259 "Got invalid event from epoll for datagram fd: %" PRIx32,
1260 revents);
1261
1262 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1263 * it.) */
1264 (void) ioctl(fd, SIOCINQ, &v);
1265
1266 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1267 m = PAGE_ALIGN(MAX3((size_t) v + 1,
1268 (size_t) LINE_MAX,
1269 ALIGN(sizeof(struct nlmsghdr)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH)) + 1);
1270
1271 if (!GREEDY_REALLOC(s->buffer, s->buffer_size, m))
1272 return log_oom();
1273
1274 iovec = IOVEC_MAKE(s->buffer, s->buffer_size - 1); /* Leave room for trailing NUL we add later */
1275
1276 n = recvmsg(fd, &msghdr, MSG_DONTWAIT|MSG_CMSG_CLOEXEC);
1277 if (n < 0) {
1278 if (IN_SET(errno, EINTR, EAGAIN))
1279 return 0;
1280
1281 return log_error_errno(errno, "recvmsg() failed: %m");
1282 }
1283
1284 CMSG_FOREACH(cmsg, &msghdr)
1285 if (cmsg->cmsg_level == SOL_SOCKET &&
1286 cmsg->cmsg_type == SCM_CREDENTIALS &&
1287 cmsg->cmsg_len == CMSG_LEN(sizeof(struct ucred)))
1288 ucred = (struct ucred*) CMSG_DATA(cmsg);
1289 else if (cmsg->cmsg_level == SOL_SOCKET &&
1290 cmsg->cmsg_type == SCM_SECURITY) {
1291 label = (char*) CMSG_DATA(cmsg);
1292 label_len = cmsg->cmsg_len - CMSG_LEN(0);
1293 } else if (cmsg->cmsg_level == SOL_SOCKET &&
1294 cmsg->cmsg_type == SO_TIMESTAMP &&
1295 cmsg->cmsg_len == CMSG_LEN(sizeof(struct timeval)))
1296 tv = (struct timeval*) CMSG_DATA(cmsg);
1297 else if (cmsg->cmsg_level == SOL_SOCKET &&
1298 cmsg->cmsg_type == SCM_RIGHTS) {
1299 fds = (int*) CMSG_DATA(cmsg);
1300 n_fds = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
1301 }
1302
1303 /* And a trailing NUL, just in case */
1304 s->buffer[n] = 0;
1305
1306 if (fd == s->syslog_fd) {
1307 if (n > 0 && n_fds == 0)
1308 server_process_syslog_message(s, s->buffer, n, ucred, tv, label, label_len);
1309 else if (n_fds > 0)
1310 log_warning("Got file descriptors via syslog socket. Ignoring.");
1311
1312 } else if (fd == s->native_fd) {
1313 if (n > 0 && n_fds == 0)
1314 server_process_native_message(s, s->buffer, n, ucred, tv, label, label_len);
1315 else if (n == 0 && n_fds == 1)
1316 server_process_native_file(s, fds[0], ucred, tv, label, label_len);
1317 else if (n_fds > 0)
1318 log_warning("Got too many file descriptors via native socket. Ignoring.");
1319
1320 } else {
1321 assert(fd == s->audit_fd);
1322
1323 if (n > 0 && n_fds == 0)
1324 server_process_audit_message(s, s->buffer, n, ucred, &sa, msghdr.msg_namelen);
1325 else if (n_fds > 0)
1326 log_warning("Got file descriptors via audit socket. Ignoring.");
1327 }
1328
1329 close_many(fds, n_fds);
1330 return 0;
1331 }
1332
1333 static int dispatch_sigusr1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1334 Server *s = userdata;
1335 int r;
1336
1337 assert(s);
1338
1339 log_info("Received request to flush runtime journal from PID " PID_FMT, si->ssi_pid);
1340
1341 (void) server_flush_to_var(s, false);
1342 server_sync(s);
1343 server_vacuum(s, false);
1344
1345 r = touch("/run/systemd/journal/flushed");
1346 if (r < 0)
1347 log_warning_errno(r, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1348
1349 server_space_usage_message(s, NULL);
1350 return 0;
1351 }
1352
1353 static int dispatch_sigusr2(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1354 Server *s = userdata;
1355 int r;
1356
1357 assert(s);
1358
1359 log_info("Received request to rotate journal from PID " PID_FMT, si->ssi_pid);
1360 server_rotate(s);
1361 server_vacuum(s, true);
1362
1363 if (s->system_journal)
1364 patch_min_use(&s->system_storage);
1365 if (s->runtime_journal)
1366 patch_min_use(&s->runtime_storage);
1367
1368 /* Let clients know when the most recent rotation happened. */
1369 r = write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC));
1370 if (r < 0)
1371 log_warning_errno(r, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1372
1373 return 0;
1374 }
1375
1376 static int dispatch_sigterm(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1377 Server *s = userdata;
1378
1379 assert(s);
1380
1381 log_received_signal(LOG_INFO, si);
1382
1383 sd_event_exit(s->event, 0);
1384 return 0;
1385 }
1386
1387 static int dispatch_sigrtmin1(sd_event_source *es, const struct signalfd_siginfo *si, void *userdata) {
1388 Server *s = userdata;
1389 int r;
1390
1391 assert(s);
1392
1393 log_debug("Received request to sync from PID " PID_FMT, si->ssi_pid);
1394
1395 server_sync(s);
1396
1397 /* Let clients know when the most recent sync happened. */
1398 r = write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC));
1399 if (r < 0)
1400 log_warning_errno(r, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1401
1402 return 0;
1403 }
1404
1405 static int setup_signals(Server *s) {
1406 int r;
1407
1408 assert(s);
1409
1410 assert_se(sigprocmask_many(SIG_SETMASK, NULL, SIGINT, SIGTERM, SIGUSR1, SIGUSR2, SIGRTMIN+1, -1) >= 0);
1411
1412 r = sd_event_add_signal(s->event, &s->sigusr1_event_source, SIGUSR1, dispatch_sigusr1, s);
1413 if (r < 0)
1414 return r;
1415
1416 r = sd_event_add_signal(s->event, &s->sigusr2_event_source, SIGUSR2, dispatch_sigusr2, s);
1417 if (r < 0)
1418 return r;
1419
1420 r = sd_event_add_signal(s->event, &s->sigterm_event_source, SIGTERM, dispatch_sigterm, s);
1421 if (r < 0)
1422 return r;
1423
1424 /* Let's process SIGTERM late, so that we flush all queued
1425 * messages to disk before we exit */
1426 r = sd_event_source_set_priority(s->sigterm_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1427 if (r < 0)
1428 return r;
1429
1430 /* When journald is invoked on the terminal (when debugging),
1431 * it's useful if C-c is handled equivalent to SIGTERM. */
1432 r = sd_event_add_signal(s->event, &s->sigint_event_source, SIGINT, dispatch_sigterm, s);
1433 if (r < 0)
1434 return r;
1435
1436 r = sd_event_source_set_priority(s->sigint_event_source, SD_EVENT_PRIORITY_NORMAL+20);
1437 if (r < 0)
1438 return r;
1439
1440 /* SIGRTMIN+1 causes an immediate sync. We process this very
1441 * late, so that everything else queued at this point is
1442 * really written to disk. Clients can watch
1443 * /run/systemd/journal/synced with inotify until its mtime
1444 * changes to see when a sync happened. */
1445 r = sd_event_add_signal(s->event, &s->sigrtmin1_event_source, SIGRTMIN+1, dispatch_sigrtmin1, s);
1446 if (r < 0)
1447 return r;
1448
1449 r = sd_event_source_set_priority(s->sigrtmin1_event_source, SD_EVENT_PRIORITY_NORMAL+15);
1450 if (r < 0)
1451 return r;
1452
1453 return 0;
1454 }
1455
1456 static int parse_proc_cmdline_item(const char *key, const char *value, void *data) {
1457 Server *s = data;
1458 int r;
1459
1460 assert(s);
1461
1462 if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_syslog")) {
1463
1464 r = value ? parse_boolean(value) : true;
1465 if (r < 0)
1466 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value);
1467 else
1468 s->forward_to_syslog = r;
1469
1470 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_kmsg")) {
1471
1472 r = value ? parse_boolean(value) : true;
1473 if (r < 0)
1474 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value);
1475 else
1476 s->forward_to_kmsg = r;
1477
1478 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_console")) {
1479
1480 r = value ? parse_boolean(value) : true;
1481 if (r < 0)
1482 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value);
1483 else
1484 s->forward_to_console = r;
1485
1486 } else if (proc_cmdline_key_streq(key, "systemd.journald.forward_to_wall")) {
1487
1488 r = value ? parse_boolean(value) : true;
1489 if (r < 0)
1490 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value);
1491 else
1492 s->forward_to_wall = r;
1493
1494 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_console")) {
1495
1496 if (proc_cmdline_value_missing(key, value))
1497 return 0;
1498
1499 r = log_level_from_string(value);
1500 if (r < 0)
1501 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value);
1502 else
1503 s->max_level_console = r;
1504
1505 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_store")) {
1506
1507 if (proc_cmdline_value_missing(key, value))
1508 return 0;
1509
1510 r = log_level_from_string(value);
1511 if (r < 0)
1512 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value);
1513 else
1514 s->max_level_store = r;
1515
1516 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_syslog")) {
1517
1518 if (proc_cmdline_value_missing(key, value))
1519 return 0;
1520
1521 r = log_level_from_string(value);
1522 if (r < 0)
1523 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value);
1524 else
1525 s->max_level_syslog = r;
1526
1527 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_kmsg")) {
1528
1529 if (proc_cmdline_value_missing(key, value))
1530 return 0;
1531
1532 r = log_level_from_string(value);
1533 if (r < 0)
1534 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value);
1535 else
1536 s->max_level_kmsg = r;
1537
1538 } else if (proc_cmdline_key_streq(key, "systemd.journald.max_level_wall")) {
1539
1540 if (proc_cmdline_value_missing(key, value))
1541 return 0;
1542
1543 r = log_level_from_string(value);
1544 if (r < 0)
1545 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value);
1546 else
1547 s->max_level_wall = r;
1548
1549 } else if (startswith(key, "systemd.journald"))
1550 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key);
1551
1552 /* do not warn about state here, since probably systemd already did */
1553 return 0;
1554 }
1555
1556 static int server_parse_config_file(Server *s) {
1557 assert(s);
1558
1559 return config_parse_many_nulstr(PKGSYSCONFDIR "/journald.conf",
1560 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1561 "Journal\0",
1562 config_item_perf_lookup, journald_gperf_lookup,
1563 CONFIG_PARSE_WARN, s);
1564 }
1565
1566 static int server_dispatch_sync(sd_event_source *es, usec_t t, void *userdata) {
1567 Server *s = userdata;
1568
1569 assert(s);
1570
1571 server_sync(s);
1572 return 0;
1573 }
1574
1575 int server_schedule_sync(Server *s, int priority) {
1576 int r;
1577
1578 assert(s);
1579
1580 if (priority <= LOG_CRIT) {
1581 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1582 server_sync(s);
1583 return 0;
1584 }
1585
1586 if (s->sync_scheduled)
1587 return 0;
1588
1589 if (s->sync_interval_usec > 0) {
1590 usec_t when;
1591
1592 r = sd_event_now(s->event, CLOCK_MONOTONIC, &when);
1593 if (r < 0)
1594 return r;
1595
1596 when += s->sync_interval_usec;
1597
1598 if (!s->sync_event_source) {
1599 r = sd_event_add_time(
1600 s->event,
1601 &s->sync_event_source,
1602 CLOCK_MONOTONIC,
1603 when, 0,
1604 server_dispatch_sync, s);
1605 if (r < 0)
1606 return r;
1607
1608 r = sd_event_source_set_priority(s->sync_event_source, SD_EVENT_PRIORITY_IMPORTANT);
1609 } else {
1610 r = sd_event_source_set_time(s->sync_event_source, when);
1611 if (r < 0)
1612 return r;
1613
1614 r = sd_event_source_set_enabled(s->sync_event_source, SD_EVENT_ONESHOT);
1615 }
1616 if (r < 0)
1617 return r;
1618
1619 s->sync_scheduled = true;
1620 }
1621
1622 return 0;
1623 }
1624
1625 static int dispatch_hostname_change(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1626 Server *s = userdata;
1627
1628 assert(s);
1629
1630 server_cache_hostname(s);
1631 return 0;
1632 }
1633
1634 static int server_open_hostname(Server *s) {
1635 int r;
1636
1637 assert(s);
1638
1639 s->hostname_fd = open("/proc/sys/kernel/hostname",
1640 O_RDONLY|O_CLOEXEC|O_NONBLOCK|O_NOCTTY);
1641 if (s->hostname_fd < 0)
1642 return log_error_errno(errno, "Failed to open /proc/sys/kernel/hostname: %m");
1643
1644 r = sd_event_add_io(s->event, &s->hostname_event_source, s->hostname_fd, 0, dispatch_hostname_change, s);
1645 if (r < 0) {
1646 /* kernels prior to 3.2 don't support polling this file. Ignore
1647 * the failure. */
1648 if (r == -EPERM) {
1649 log_warning_errno(r, "Failed to register hostname fd in event loop, ignoring: %m");
1650 s->hostname_fd = safe_close(s->hostname_fd);
1651 return 0;
1652 }
1653
1654 return log_error_errno(r, "Failed to register hostname fd in event loop: %m");
1655 }
1656
1657 r = sd_event_source_set_priority(s->hostname_event_source, SD_EVENT_PRIORITY_IMPORTANT-10);
1658 if (r < 0)
1659 return log_error_errno(r, "Failed to adjust priority of host name event source: %m");
1660
1661 return 0;
1662 }
1663
1664 static int dispatch_notify_event(sd_event_source *es, int fd, uint32_t revents, void *userdata) {
1665 Server *s = userdata;
1666 int r;
1667
1668 assert(s);
1669 assert(s->notify_event_source == es);
1670 assert(s->notify_fd == fd);
1671
1672 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1673 * message on it. Either it's the watchdog event, the initial
1674 * READY=1 event or an stdout stream event. If there's nothing
1675 * to write anymore, turn our event source off. The next time
1676 * there's something to send it will be turned on again. */
1677
1678 if (!s->sent_notify_ready) {
1679 static const char p[] =
1680 "READY=1\n"
1681 "STATUS=Processing requests...";
1682 ssize_t l;
1683
1684 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1685 if (l < 0) {
1686 if (errno == EAGAIN)
1687 return 0;
1688
1689 return log_error_errno(errno, "Failed to send READY=1 notification message: %m");
1690 }
1691
1692 s->sent_notify_ready = true;
1693 log_debug("Sent READY=1 notification.");
1694
1695 } else if (s->send_watchdog) {
1696
1697 static const char p[] =
1698 "WATCHDOG=1";
1699
1700 ssize_t l;
1701
1702 l = send(s->notify_fd, p, strlen(p), MSG_DONTWAIT);
1703 if (l < 0) {
1704 if (errno == EAGAIN)
1705 return 0;
1706
1707 return log_error_errno(errno, "Failed to send WATCHDOG=1 notification message: %m");
1708 }
1709
1710 s->send_watchdog = false;
1711 log_debug("Sent WATCHDOG=1 notification.");
1712
1713 } else if (s->stdout_streams_notify_queue)
1714 /* Dispatch one stream notification event */
1715 stdout_stream_send_notify(s->stdout_streams_notify_queue);
1716
1717 /* Leave us enabled if there's still more to do. */
1718 if (s->send_watchdog || s->stdout_streams_notify_queue)
1719 return 0;
1720
1721 /* There was nothing to do anymore, let's turn ourselves off. */
1722 r = sd_event_source_set_enabled(es, SD_EVENT_OFF);
1723 if (r < 0)
1724 return log_error_errno(r, "Failed to turn off notify event source: %m");
1725
1726 return 0;
1727 }
1728
1729 static int dispatch_watchdog(sd_event_source *es, uint64_t usec, void *userdata) {
1730 Server *s = userdata;
1731 int r;
1732
1733 assert(s);
1734
1735 s->send_watchdog = true;
1736
1737 r = sd_event_source_set_enabled(s->notify_event_source, SD_EVENT_ON);
1738 if (r < 0)
1739 log_warning_errno(r, "Failed to turn on notify event source: %m");
1740
1741 r = sd_event_source_set_time(s->watchdog_event_source, usec + s->watchdog_usec / 2);
1742 if (r < 0)
1743 return log_error_errno(r, "Failed to restart watchdog event source: %m");
1744
1745 r = sd_event_source_set_enabled(s->watchdog_event_source, SD_EVENT_ON);
1746 if (r < 0)
1747 return log_error_errno(r, "Failed to enable watchdog event source: %m");
1748
1749 return 0;
1750 }
1751
1752 static int server_connect_notify(Server *s) {
1753 union sockaddr_union sa = {};
1754 const char *e;
1755 int r, salen;
1756
1757 assert(s);
1758 assert(s->notify_fd < 0);
1759 assert(!s->notify_event_source);
1760
1761 /*
1762 So here's the problem: we'd like to send notification
1763 messages to PID 1, but we cannot do that via sd_notify(),
1764 since that's synchronous, and we might end up blocking on
1765 it. Specifically: given that PID 1 might block on
1766 dbus-daemon during IPC, and dbus-daemon is logging to us,
1767 and might hence block on us, we might end up in a deadlock
1768 if we block on sending PID 1 notification messages — by
1769 generating a full blocking circle. To avoid this, let's
1770 create a non-blocking socket, and connect it to the
1771 notification socket, and then wait for POLLOUT before we
1772 send anything. This should efficiently avoid any deadlocks,
1773 as we'll never block on PID 1, hence PID 1 can safely block
1774 on dbus-daemon which can safely block on us again.
1775
1776 Don't think that this issue is real? It is, see:
1777 https://github.com/systemd/systemd/issues/1505
1778 */
1779
1780 e = getenv("NOTIFY_SOCKET");
1781 if (!e)
1782 return 0;
1783
1784 salen = sockaddr_un_set_path(&sa.un, e);
1785 if (salen < 0)
1786 return log_error_errno(salen, "NOTIFY_SOCKET set to invalid value '%s': %m", e);
1787
1788 s->notify_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
1789 if (s->notify_fd < 0)
1790 return log_error_errno(errno, "Failed to create notify socket: %m");
1791
1792 (void) fd_inc_sndbuf(s->notify_fd, NOTIFY_SNDBUF_SIZE);
1793
1794 r = connect(s->notify_fd, &sa.sa, salen);
1795 if (r < 0)
1796 return log_error_errno(errno, "Failed to connect to notify socket: %m");
1797
1798 r = sd_event_add_io(s->event, &s->notify_event_source, s->notify_fd, EPOLLOUT, dispatch_notify_event, s);
1799 if (r < 0)
1800 return log_error_errno(r, "Failed to watch notification socket: %m");
1801
1802 if (sd_watchdog_enabled(false, &s->watchdog_usec) > 0) {
1803 s->send_watchdog = true;
1804
1805 r = sd_event_add_time(s->event, &s->watchdog_event_source, CLOCK_MONOTONIC, now(CLOCK_MONOTONIC) + s->watchdog_usec/2, s->watchdog_usec/4, dispatch_watchdog, s);
1806 if (r < 0)
1807 return log_error_errno(r, "Failed to add watchdog time event: %m");
1808 }
1809
1810 /* This should fire pretty soon, which we'll use to send the
1811 * READY=1 event. */
1812
1813 return 0;
1814 }
1815
1816 int server_init(Server *s) {
1817 _cleanup_fdset_free_ FDSet *fds = NULL;
1818 int n, r, fd;
1819 bool no_sockets;
1820
1821 assert(s);
1822
1823 *s = (Server) {
1824 .syslog_fd = -1,
1825 .native_fd = -1,
1826 .stdout_fd = -1,
1827 .dev_kmsg_fd = -1,
1828 .audit_fd = -1,
1829 .hostname_fd = -1,
1830 .notify_fd = -1,
1831
1832 .compress.enabled = true,
1833 .compress.threshold_bytes = (uint64_t) -1,
1834 .seal = true,
1835 .read_kmsg = true,
1836
1837 .watchdog_usec = USEC_INFINITY,
1838
1839 .sync_interval_usec = DEFAULT_SYNC_INTERVAL_USEC,
1840 .sync_scheduled = false,
1841
1842 .rate_limit_interval = DEFAULT_RATE_LIMIT_INTERVAL,
1843 .rate_limit_burst = DEFAULT_RATE_LIMIT_BURST,
1844
1845 .forward_to_wall = true,
1846
1847 .max_file_usec = DEFAULT_MAX_FILE_USEC,
1848
1849 .max_level_store = LOG_DEBUG,
1850 .max_level_syslog = LOG_DEBUG,
1851 .max_level_kmsg = LOG_NOTICE,
1852 .max_level_console = LOG_INFO,
1853 .max_level_wall = LOG_EMERG,
1854
1855 .line_max = DEFAULT_LINE_MAX,
1856
1857 .runtime_storage.name = "Runtime Journal",
1858 .system_storage.name = "System Journal",
1859 };
1860
1861 journal_reset_metrics(&s->system_storage.metrics);
1862 journal_reset_metrics(&s->runtime_storage.metrics);
1863
1864 server_parse_config_file(s);
1865
1866 r = proc_cmdline_parse(parse_proc_cmdline_item, s, PROC_CMDLINE_STRIP_RD_PREFIX);
1867 if (r < 0)
1868 log_warning_errno(r, "Failed to parse kernel command line, ignoring: %m");
1869
1870 if (!!s->rate_limit_interval ^ !!s->rate_limit_burst) {
1871 log_debug("Setting both rate limit interval and burst from "USEC_FMT",%u to 0,0",
1872 s->rate_limit_interval, s->rate_limit_burst);
1873 s->rate_limit_interval = s->rate_limit_burst = 0;
1874 }
1875
1876 (void) mkdir_p("/run/systemd/journal", 0755);
1877
1878 s->user_journals = ordered_hashmap_new(NULL);
1879 if (!s->user_journals)
1880 return log_oom();
1881
1882 s->mmap = mmap_cache_new();
1883 if (!s->mmap)
1884 return log_oom();
1885
1886 s->deferred_closes = set_new(NULL);
1887 if (!s->deferred_closes)
1888 return log_oom();
1889
1890 r = sd_event_default(&s->event);
1891 if (r < 0)
1892 return log_error_errno(r, "Failed to create event loop: %m");
1893
1894 n = sd_listen_fds(true);
1895 if (n < 0)
1896 return log_error_errno(n, "Failed to read listening file descriptors from environment: %m");
1897
1898 for (fd = SD_LISTEN_FDS_START; fd < SD_LISTEN_FDS_START + n; fd++) {
1899
1900 if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/socket", 0) > 0) {
1901
1902 if (s->native_fd >= 0)
1903 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1904 "Too many native sockets passed.");
1905
1906 s->native_fd = fd;
1907
1908 } else if (sd_is_socket_unix(fd, SOCK_STREAM, 1, "/run/systemd/journal/stdout", 0) > 0) {
1909
1910 if (s->stdout_fd >= 0)
1911 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1912 "Too many stdout sockets passed.");
1913
1914 s->stdout_fd = fd;
1915
1916 } else if (sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/dev/log", 0) > 0 ||
1917 sd_is_socket_unix(fd, SOCK_DGRAM, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1918
1919 if (s->syslog_fd >= 0)
1920 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1921 "Too many /dev/log sockets passed.");
1922
1923 s->syslog_fd = fd;
1924
1925 } else if (sd_is_socket(fd, AF_NETLINK, SOCK_RAW, -1) > 0) {
1926
1927 if (s->audit_fd >= 0)
1928 return log_error_errno(SYNTHETIC_ERRNO(EINVAL),
1929 "Too many audit sockets passed.");
1930
1931 s->audit_fd = fd;
1932
1933 } else {
1934
1935 if (!fds) {
1936 fds = fdset_new();
1937 if (!fds)
1938 return log_oom();
1939 }
1940
1941 r = fdset_put(fds, fd);
1942 if (r < 0)
1943 return log_oom();
1944 }
1945 }
1946
1947 /* Try to restore streams, but don't bother if this fails */
1948 (void) server_restore_streams(s, fds);
1949
1950 if (fdset_size(fds) > 0) {
1951 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds));
1952 fds = fdset_free(fds);
1953 }
1954
1955 no_sockets = s->native_fd < 0 && s->stdout_fd < 0 && s->syslog_fd < 0 && s->audit_fd < 0;
1956
1957 /* always open stdout, syslog, native, and kmsg sockets */
1958
1959 /* systemd-journald.socket: /run/systemd/journal/stdout */
1960 r = server_open_stdout_socket(s);
1961 if (r < 0)
1962 return r;
1963
1964 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1965 r = server_open_syslog_socket(s);
1966 if (r < 0)
1967 return r;
1968
1969 /* systemd-journald.socket: /run/systemd/journal/socket */
1970 r = server_open_native_socket(s);
1971 if (r < 0)
1972 return r;
1973
1974 /* /dev/kmsg */
1975 r = server_open_dev_kmsg(s);
1976 if (r < 0)
1977 return r;
1978
1979 /* Unless we got *some* sockets and not audit, open audit socket */
1980 if (s->audit_fd >= 0 || no_sockets) {
1981 r = server_open_audit(s);
1982 if (r < 0)
1983 return r;
1984 }
1985
1986 r = server_open_kernel_seqnum(s);
1987 if (r < 0)
1988 return r;
1989
1990 r = server_open_hostname(s);
1991 if (r < 0)
1992 return r;
1993
1994 r = setup_signals(s);
1995 if (r < 0)
1996 return r;
1997
1998 s->rate_limit = journal_rate_limit_new();
1999 if (!s->rate_limit)
2000 return -ENOMEM;
2001
2002 r = cg_get_root_path(&s->cgroup_root);
2003 if (r < 0)
2004 return r;
2005
2006 server_cache_hostname(s);
2007 server_cache_boot_id(s);
2008 server_cache_machine_id(s);
2009
2010 s->runtime_storage.path = strjoin("/run/log/journal/", SERVER_MACHINE_ID(s));
2011 s->system_storage.path = strjoin("/var/log/journal/", SERVER_MACHINE_ID(s));
2012 if (!s->runtime_storage.path || !s->system_storage.path)
2013 return -ENOMEM;
2014
2015 (void) server_connect_notify(s);
2016
2017 (void) client_context_acquire_default(s);
2018
2019 return system_journal_open(s, false);
2020 }
2021
2022 void server_maybe_append_tags(Server *s) {
2023 #if HAVE_GCRYPT
2024 JournalFile *f;
2025 Iterator i;
2026 usec_t n;
2027
2028 n = now(CLOCK_REALTIME);
2029
2030 if (s->system_journal)
2031 journal_file_maybe_append_tag(s->system_journal, n);
2032
2033 ORDERED_HASHMAP_FOREACH(f, s->user_journals, i)
2034 journal_file_maybe_append_tag(f, n);
2035 #endif
2036 }
2037
2038 void server_done(Server *s) {
2039 assert(s);
2040
2041 set_free_with_destructor(s->deferred_closes, journal_file_close);
2042
2043 while (s->stdout_streams)
2044 stdout_stream_free(s->stdout_streams);
2045
2046 client_context_flush_all(s);
2047
2048 if (s->system_journal)
2049 (void) journal_file_close(s->system_journal);
2050
2051 if (s->runtime_journal)
2052 (void) journal_file_close(s->runtime_journal);
2053
2054 ordered_hashmap_free_with_destructor(s->user_journals, journal_file_close);
2055
2056 sd_event_source_unref(s->syslog_event_source);
2057 sd_event_source_unref(s->native_event_source);
2058 sd_event_source_unref(s->stdout_event_source);
2059 sd_event_source_unref(s->dev_kmsg_event_source);
2060 sd_event_source_unref(s->audit_event_source);
2061 sd_event_source_unref(s->sync_event_source);
2062 sd_event_source_unref(s->sigusr1_event_source);
2063 sd_event_source_unref(s->sigusr2_event_source);
2064 sd_event_source_unref(s->sigterm_event_source);
2065 sd_event_source_unref(s->sigint_event_source);
2066 sd_event_source_unref(s->sigrtmin1_event_source);
2067 sd_event_source_unref(s->hostname_event_source);
2068 sd_event_source_unref(s->notify_event_source);
2069 sd_event_source_unref(s->watchdog_event_source);
2070 sd_event_unref(s->event);
2071
2072 safe_close(s->syslog_fd);
2073 safe_close(s->native_fd);
2074 safe_close(s->stdout_fd);
2075 safe_close(s->dev_kmsg_fd);
2076 safe_close(s->audit_fd);
2077 safe_close(s->hostname_fd);
2078 safe_close(s->notify_fd);
2079
2080 if (s->rate_limit)
2081 journal_rate_limit_free(s->rate_limit);
2082
2083 if (s->kernel_seqnum)
2084 munmap(s->kernel_seqnum, sizeof(uint64_t));
2085
2086 free(s->buffer);
2087 free(s->tty_path);
2088 free(s->cgroup_root);
2089 free(s->hostname_field);
2090 free(s->runtime_storage.path);
2091 free(s->system_storage.path);
2092
2093 mmap_cache_unref(s->mmap);
2094 }
2095
2096 static const char* const storage_table[_STORAGE_MAX] = {
2097 [STORAGE_AUTO] = "auto",
2098 [STORAGE_VOLATILE] = "volatile",
2099 [STORAGE_PERSISTENT] = "persistent",
2100 [STORAGE_NONE] = "none"
2101 };
2102
2103 DEFINE_STRING_TABLE_LOOKUP(storage, Storage);
2104 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage, storage, Storage, "Failed to parse storage setting");
2105
2106 static const char* const split_mode_table[_SPLIT_MAX] = {
2107 [SPLIT_LOGIN] = "login",
2108 [SPLIT_UID] = "uid",
2109 [SPLIT_NONE] = "none",
2110 };
2111
2112 DEFINE_STRING_TABLE_LOOKUP(split_mode, SplitMode);
2113 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode, split_mode, SplitMode, "Failed to parse split mode setting");
2114
2115 int config_parse_line_max(
2116 const char* unit,
2117 const char *filename,
2118 unsigned line,
2119 const char *section,
2120 unsigned section_line,
2121 const char *lvalue,
2122 int ltype,
2123 const char *rvalue,
2124 void *data,
2125 void *userdata) {
2126
2127 size_t *sz = data;
2128 int r;
2129
2130 assert(filename);
2131 assert(lvalue);
2132 assert(rvalue);
2133 assert(data);
2134
2135 if (isempty(rvalue))
2136 /* Empty assignment means default */
2137 *sz = DEFAULT_LINE_MAX;
2138 else {
2139 uint64_t v;
2140
2141 r = parse_size(rvalue, 1024, &v);
2142 if (r < 0) {
2143 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse LineMax= value, ignoring: %s", rvalue);
2144 return 0;
2145 }
2146
2147 if (v < 79) {
2148 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2149 * terminal size is 80ch, and it might make sense to break one character before the natural
2150 * line break would occur on that. */
2151 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too small, clamping to 79: %s", rvalue);
2152 *sz = 79;
2153 } else if (v > (uint64_t) (SSIZE_MAX-1)) {
2154 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2155 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2156 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2157 * fail much earlier anyway. */
2158 log_syntax(unit, LOG_WARNING, filename, line, 0, "LineMax= too large, clamping to %" PRIu64 ": %s", (uint64_t) (SSIZE_MAX-1), rvalue);
2159 *sz = SSIZE_MAX-1;
2160 } else
2161 *sz = (size_t) v;
2162 }
2163
2164 return 0;
2165 }
2166
2167 int config_parse_compress(const char* unit,
2168 const char *filename,
2169 unsigned line,
2170 const char *section,
2171 unsigned section_line,
2172 const char *lvalue,
2173 int ltype,
2174 const char *rvalue,
2175 void *data,
2176 void *userdata) {
2177 JournalCompressOptions* compress = data;
2178 int r;
2179
2180 if (streq(rvalue, "1")) {
2181 log_syntax(unit, LOG_WARNING, filename, line, 0,
2182 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2183 compress->enabled = true;
2184 } else if (streq(rvalue, "0")) {
2185 log_syntax(unit, LOG_WARNING, filename, line, 0,
2186 "Compress= ambiguously specified as 0, disabling compression");
2187 compress->enabled = false;
2188 } else if ((r = parse_boolean(rvalue)) >= 0)
2189 compress->enabled = r;
2190 else if (parse_size(rvalue, 1024, &compress->threshold_bytes) == 0)
2191 compress->enabled = true;
2192 else if (isempty(rvalue)) {
2193 compress->enabled = true;
2194 compress->threshold_bytes = (uint64_t) -1;
2195 } else
2196 log_syntax(unit, LOG_ERR, filename, line, r, "Failed to parse Compress= value, ignoring: %s", rvalue);
2197
2198 return 0;
2199 }