1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <selinux/selinux.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
25 #include "format-util.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
59 #define USER_JOURNALS_MAX 1024
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
77 #define DEFERRED_CLOSES_MAX (4096)
79 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
80 _cleanup_closedir_
DIR *d
= NULL
;
89 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
90 errno
, "Failed to open %s: %m", path
);
92 if (fstatvfs(dirfd(d
), &ss
) < 0)
93 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
95 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
97 FOREACH_DIRENT_ALL(de
, d
, break) {
100 if (!endswith(de
->d_name
, ".journal") &&
101 !endswith(de
->d_name
, ".journal~"))
104 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
105 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
109 if (!S_ISREG(st
.st_mode
))
112 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
118 static void cache_space_invalidate(JournalStorageSpace
*space
) {
122 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
123 JournalStorageSpace
*space
;
124 JournalMetrics
*metrics
;
125 uint64_t vfs_used
, vfs_avail
, avail
;
131 metrics
= &storage
->metrics
;
132 space
= &storage
->space
;
134 ts
= now(CLOCK_MONOTONIC
);
136 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
139 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
143 space
->vfs_used
= vfs_used
;
144 space
->vfs_available
= vfs_avail
;
146 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
148 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
149 space
->available
= LESS_BY(space
->limit
, vfs_used
);
150 space
->timestamp
= ts
;
154 static void patch_min_use(JournalStorage
*storage
) {
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
163 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
166 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
172 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
174 r
= cache_space_refresh(s
, js
);
177 *available
= js
->space
.available
;
179 *limit
= js
->space
.limit
;
184 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
185 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
186 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
187 JournalMetrics
*metrics
;
192 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
194 if (cache_space_refresh(s
, storage
) < 0)
197 metrics
= &storage
->metrics
;
198 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
199 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
200 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
201 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
202 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
203 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
205 server_driver_message(s
, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
209 "JOURNAL_NAME=%s", storage
->name
,
210 "JOURNAL_PATH=%s", storage
->path
,
211 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
212 "CURRENT_USE_PRETTY=%s", fb1
,
213 "MAX_USE=%"PRIu64
, metrics
->max_use
,
214 "MAX_USE_PRETTY=%s", fb2
,
215 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
217 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
218 "DISK_AVAILABLE_PRETTY=%s", fb4
,
219 "LIMIT=%"PRIu64
, storage
->space
.limit
,
220 "LIMIT_PRETTY=%s", fb5
,
221 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
222 "AVAILABLE_PRETTY=%s", fb6
,
226 static bool uid_for_system_journal(uid_t uid
) {
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
230 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
233 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
240 if (uid_for_system_journal(uid
))
243 r
= add_acls_for_user(f
->fd
, uid
);
245 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
249 static int open_journal(
255 JournalMetrics
*metrics
,
266 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
267 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
269 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
270 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
275 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
277 (void) journal_file_close(f
);
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
289 static int system_journal_open(Server
*s
, bool flush_requested
) {
293 if (!s
->system_journal
&&
294 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
295 (flush_requested
|| flushed_flag_is_set())) {
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
303 if (s
->storage
== STORAGE_PERSISTENT
)
304 (void) mkdir_p("/var/log/journal/", 0755);
306 (void) mkdir(s
->system_storage
.path
, 0755);
308 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
309 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
311 server_add_acls(s
->system_journal
, 0);
312 (void) cache_space_refresh(s
, &s
->system_storage
);
313 patch_min_use(&s
->system_storage
);
315 if (!IN_SET(r
, -ENOENT
, -EROFS
))
316 log_warning_errno(r
, "Failed to open system journal: %m");
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
328 if (!flush_requested
)
329 (void) server_flush_to_var(s
, true);
332 if (!s
->runtime_journal
&&
333 (s
->storage
!= STORAGE_NONE
)) {
335 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
337 if (s
->system_journal
) {
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
343 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
346 log_warning_errno(r
, "Failed to open runtime journal: %m");
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn
, 0750);
360 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
362 return log_error_errno(r
, "Failed to open runtime journal: %m");
365 if (s
->runtime_journal
) {
366 server_add_acls(s
->runtime_journal
, 0);
367 (void) cache_space_refresh(s
, &s
->runtime_storage
);
368 patch_min_use(&s
->runtime_storage
);
375 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
376 _cleanup_free_
char *p
= NULL
;
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s
, false);
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
399 if (s
->runtime_journal
)
400 return s
->runtime_journal
;
402 if (uid_for_system_journal(uid
))
403 return s
->system_journal
;
405 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
409 r
= sd_id128_get_machine(&machine
);
411 log_debug_errno(r
, "Failed to determine machine ID, using system log: %m");
412 return s
->system_journal
;
415 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
416 SD_ID128_FORMAT_VAL(machine
), uid
) < 0) {
418 return s
->system_journal
;
421 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
422 /* Too many open? Then let's close one */
423 f
= ordered_hashmap_steal_first(s
->user_journals
);
425 (void) journal_file_close(f
);
428 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
430 return s
->system_journal
;
432 server_add_acls(f
, uid
);
434 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
436 (void) journal_file_close(f
);
437 return s
->system_journal
;
443 static int do_rotate(
456 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
459 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
461 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
464 server_add_acls(*f
, uid
);
469 static void server_process_deferred_closes(Server
*s
) {
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f
, s
->deferred_closes
, i
) {
475 if (journal_file_is_offlining(f
))
478 (void) set_remove(s
->deferred_closes
, f
);
479 (void) journal_file_close(f
);
483 static void server_vacuum_deferred_closes(Server
*s
) {
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s
->deferred_closes
) < DEFERRED_CLOSES_MAX
)
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s
);
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s
->deferred_closes
) >= DEFERRED_CLOSES_MAX
) {
497 assert_se(f
= set_steal_first(s
->deferred_closes
));
498 journal_file_close(f
);
502 static int open_user_journal_directory(Server
*s
, DIR **ret_dir
, char **ret_path
) {
503 _cleanup_closedir_
DIR *dir
= NULL
;
504 _cleanup_free_
char *path
= NULL
;
510 r
= sd_id128_get_machine(&machine
);
512 return log_error_errno(r
, "Failed to determine machine ID, ignoring: %m");
514 if (asprintf(&path
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/", SD_ID128_FORMAT_VAL(machine
)) < 0)
519 return log_error_errno(errno
, "Failed to open user journal directory '%s': %m", path
);
522 *ret_dir
= TAKE_PTR(dir
);
524 *ret_path
= TAKE_PTR(path
);
529 void server_rotate(Server
*s
) {
530 _cleanup_free_
char *path
= NULL
;
531 _cleanup_closedir_
DIR *d
= NULL
;
537 log_debug("Rotating...");
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
541 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
545 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
547 ordered_hashmap_replace(s
->user_journals
, k
, f
);
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s
->user_journals
, k
);
553 /* Finally, also rotate all user journals we currently do not have open. (But do so only if we actually have
554 * access to /var, i.e. are not in the log-to-runtime-journal mode). */
555 if (!s
->runtime_journal
&&
556 open_user_journal_directory(s
, &d
, &path
) >= 0) {
560 FOREACH_DIRENT(de
, d
, log_warning_errno(errno
, "Failed to enumerate %s, ignoring: %m", path
)) {
561 _cleanup_free_
char *u
= NULL
, *full
= NULL
;
562 _cleanup_close_
int fd
= -1;
566 a
= startswith(de
->d_name
, "user-");
569 b
= endswith(de
->d_name
, ".journal");
579 r
= parse_uid(u
, &uid
);
581 log_debug_errno(r
, "Failed to parse UID from file name '%s', ignoring: %m", de
->d_name
);
585 /* Already rotated in the above loop? i.e. is it an open user journal? */
586 if (ordered_hashmap_contains(s
->user_journals
, UID_TO_PTR(uid
)))
589 full
= strjoin(path
, de
->d_name
);
595 fd
= openat(dirfd(d
), de
->d_name
, O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
|O_NONBLOCK
);
597 log_full_errno(IN_SET(errno
, ELOOP
, ENOENT
) ? LOG_DEBUG
: LOG_WARNING
, errno
,
598 "Failed to open journal file '%s' for rotation: %m", full
);
602 /* Make some room in the set of deferred close()s */
603 server_vacuum_deferred_closes(s
);
605 /* Open the file briefly, so that we can archive it */
606 r
= journal_file_open(fd
,
611 s
->compress
.threshold_bytes
,
613 &s
->system_storage
.metrics
,
619 log_warning_errno(r
, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full
);
621 r
= journal_file_dispose(dirfd(d
), de
->d_name
);
623 log_warning_errno(r
, "Failed to move %s out of the way, ignoring: %m", full
);
625 log_debug("Successfully moved %s out of the way.", full
);
630 TAKE_FD(fd
); /* Donated to journal_file_open() */
632 r
= journal_file_archive(f
);
634 log_debug_errno(r
, "Failed to archive journal file '%s', ignoring: %m", full
);
636 f
= journal_initiate_close(f
, s
->deferred_closes
);
640 server_process_deferred_closes(s
);
643 void server_sync(Server
*s
) {
648 if (s
->system_journal
) {
649 r
= journal_file_set_offline(s
->system_journal
, false);
651 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
654 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
655 r
= journal_file_set_offline(f
, false);
657 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
660 if (s
->sync_event_source
) {
661 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
663 log_error_errno(r
, "Failed to disable sync timer source: %m");
666 s
->sync_scheduled
= false;
669 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
676 (void) cache_space_refresh(s
, storage
);
679 server_space_usage_message(s
, storage
);
681 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
682 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
683 &s
->oldest_file_usec
, verbose
);
684 if (r
< 0 && r
!= -ENOENT
)
685 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
687 cache_space_invalidate(&storage
->space
);
690 int server_vacuum(Server
*s
, bool verbose
) {
693 log_debug("Vacuuming...");
695 s
->oldest_file_usec
= 0;
697 if (s
->system_journal
)
698 do_vacuum(s
, &s
->system_storage
, verbose
);
699 if (s
->runtime_journal
)
700 do_vacuum(s
, &s
->runtime_storage
, verbose
);
705 static void server_cache_machine_id(Server
*s
) {
711 r
= sd_id128_get_machine(&id
);
715 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
718 static void server_cache_boot_id(Server
*s
) {
724 r
= sd_id128_get_boot(&id
);
728 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
731 static void server_cache_hostname(Server
*s
) {
732 _cleanup_free_
char *t
= NULL
;
737 t
= gethostname_malloc();
741 x
= strappend("_HOSTNAME=", t
);
745 free(s
->hostname_field
);
746 s
->hostname_field
= x
;
749 static bool shall_try_append_again(JournalFile
*f
, int r
) {
752 case -E2BIG
: /* Hit configured limit */
753 case -EFBIG
: /* Hit fs limit */
754 case -EDQUOT
: /* Quota limit hit */
755 case -ENOSPC
: /* Disk full */
756 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
759 case -EIO
: /* I/O error of some kind (mmap) */
760 log_warning("%s: IO error, rotating.", f
->path
);
763 case -EHOSTDOWN
: /* Other machine */
764 log_info("%s: Journal file from other machine, rotating.", f
->path
);
767 case -EBUSY
: /* Unclean shutdown */
768 log_info("%s: Unclean shutdown, rotating.", f
->path
);
771 case -EPROTONOSUPPORT
: /* Unsupported feature */
772 log_info("%s: Unsupported feature, rotating.", f
->path
);
775 case -EBADMSG
: /* Corrupted */
776 case -ENODATA
: /* Truncated */
777 case -ESHUTDOWN
: /* Already archived */
778 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
781 case -EIDRM
: /* Journal file has been deleted */
782 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
785 case -ETXTBSY
: /* Journal file is from the future */
786 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
794 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
795 bool vacuumed
= false, rotate
= false;
796 struct dual_timestamp ts
;
804 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
805 * the source time, and not even the time the event was originally seen, but instead simply the time we started
806 * processing it, as we want strictly linear ordering in what we write out.) */
807 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
808 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
810 if (ts
.realtime
< s
->last_realtime_clock
) {
811 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
812 * regular operation. However, when it does happen, then we should make sure that we start fresh files
813 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
814 * bisection works correctly. */
816 log_debug("Time jumped backwards, rotating.");
820 f
= find_journal(s
, uid
);
824 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
825 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
832 server_vacuum(s
, false);
835 f
= find_journal(s
, uid
);
840 s
->last_realtime_clock
= ts
.realtime
;
842 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
844 server_schedule_sync(s
, priority
);
848 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
849 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
854 server_vacuum(s
, false);
856 f
= find_journal(s
, uid
);
860 log_debug("Retrying write.");
861 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
863 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
865 server_schedule_sync(s
, priority
);
868 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
869 if (isset(value)) { \
871 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
872 sprintf(k, field "=" format, value); \
873 iovec[n++] = IOVEC_MAKE_STRING(k); \
876 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
877 if (!isempty(value)) { \
879 k = strjoina(field "=", value); \
880 iovec[n++] = IOVEC_MAKE_STRING(k); \
883 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
884 if (!sd_id128_is_null(value)) { \
886 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
887 sd_id128_to_string(value, stpcpy(k, field "=")); \
888 iovec[n++] = IOVEC_MAKE_STRING(k); \
891 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
892 if (value_size > 0) { \
894 k = newa(char, STRLEN(field "=") + value_size + 1); \
895 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
896 iovec[n++] = IOVEC_MAKE_STRING(k); \
899 static void dispatch_message_real(
901 struct iovec
*iovec
, size_t n
, size_t m
,
902 const ClientContext
*c
,
903 const struct timeval
*tv
,
907 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
915 N_IOVEC_META_FIELDS
+
916 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
917 client_context_extra_fields_n_iovec(c
) <= m
);
920 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
921 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
922 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
924 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
925 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
926 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
927 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
929 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
931 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
932 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
934 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
935 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
936 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
937 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
938 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
939 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
940 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
942 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
944 if (c
->extra_fields_n_iovec
> 0) {
945 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
946 n
+= c
->extra_fields_n_iovec
;
952 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
954 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
955 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
956 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
958 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
959 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
960 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
961 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
963 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
965 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
966 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
968 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
969 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
970 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
971 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
972 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
973 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
974 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
976 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
982 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
983 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
986 /* Note that strictly speaking storing the boot id here is
987 * redundant since the entry includes this in-line
988 * anyway. However, we need this indexed, too. */
989 if (!isempty(s
->boot_id_field
))
990 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
992 if (!isempty(s
->machine_id_field
))
993 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
995 if (!isempty(s
->hostname_field
))
996 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
1000 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
1001 /* Split up strictly by (non-root) UID */
1002 journal_uid
= c
->uid
;
1003 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
1004 /* Split up by login UIDs. We do this only if the
1005 * realuid is not root, in order not to accidentally
1006 * leak privileged information to the user that is
1007 * logged by a privileged process that is part of an
1008 * unprivileged session. */
1009 journal_uid
= c
->owner_uid
;
1013 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1016 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
1018 struct iovec
*iovec
;
1026 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
1027 iovec
= newa(struct iovec
, m
);
1029 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1030 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1031 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1033 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1034 assert_cc(6 == LOG_INFO
);
1035 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
1038 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
1041 va_start(ap
, format
);
1042 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
1043 /* Error handling below */
1047 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1050 free(iovec
[k
++].iov_base
);
1053 /* We failed to format the message. Emit a warning instead. */
1056 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1059 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
1060 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
1061 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1065 void server_dispatch_message(
1067 struct iovec
*iovec
, size_t n
, size_t m
,
1069 const struct timeval
*tv
,
1073 uint64_t available
= 0;
1077 assert(iovec
|| n
== 0);
1082 if (LOG_PRI(priority
) > s
->max_level_store
)
1085 /* Stop early in case the information will not be stored
1087 if (s
->storage
== STORAGE_NONE
)
1091 (void) determine_space(s
, &available
, NULL
);
1093 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, c
->log_rate_limit_interval
, c
->log_rate_limit_burst
, priority
& LOG_PRIMASK
, available
);
1097 /* Write a suppression message if we suppressed something */
1099 server_driver_message(s
, c
->pid
,
1100 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
1101 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
1102 "N_DROPPED=%i", rl
- 1,
1106 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
1109 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
1111 sd_journal
*j
= NULL
;
1112 char ts
[FORMAT_TIMESPAN_MAX
];
1119 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
1122 if (!s
->runtime_journal
)
1125 if (require_flag_file
&& !flushed_flag_is_set())
1128 (void) system_journal_open(s
, true);
1130 if (!s
->system_journal
)
1133 log_debug("Flushing to /var...");
1135 start
= now(CLOCK_MONOTONIC
);
1137 r
= sd_id128_get_machine(&machine
);
1141 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1143 return log_error_errno(r
, "Failed to read runtime journal: %m");
1145 sd_journal_set_data_threshold(j
, 0);
1147 SD_JOURNAL_FOREACH(j
) {
1151 f
= j
->current_file
;
1152 assert(f
&& f
->current_offset
> 0);
1156 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1158 log_error_errno(r
, "Can't read entry: %m");
1162 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1166 if (!shall_try_append_again(s
->system_journal
, r
)) {
1167 log_error_errno(r
, "Can't write entry: %m");
1172 server_vacuum(s
, false);
1174 if (!s
->system_journal
) {
1175 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1180 log_debug("Retrying write.");
1181 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1183 log_error_errno(r
, "Can't write entry: %m");
1191 if (s
->system_journal
)
1192 journal_file_post_change(s
->system_journal
);
1194 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1197 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1199 sd_journal_close(j
);
1201 server_driver_message(s
, 0, NULL
,
1202 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1203 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1210 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1211 Server
*s
= userdata
;
1212 struct ucred
*ucred
= NULL
;
1213 struct timeval
*tv
= NULL
;
1214 struct cmsghdr
*cmsg
;
1216 size_t label_len
= 0, m
;
1219 int *fds
= NULL
, v
= 0;
1223 struct cmsghdr cmsghdr
;
1225 /* We use NAME_MAX space for the SELinux label
1226 * here. The kernel currently enforces no
1227 * limit, but according to suggestions from
1228 * the SELinux people this will change and it
1229 * will probably be identical to NAME_MAX. For
1230 * now we use that, but this should be updated
1231 * one day when the final limit is known. */
1232 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1233 CMSG_SPACE(sizeof(struct timeval
)) +
1234 CMSG_SPACE(sizeof(int)) + /* fd */
1235 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1238 union sockaddr_union sa
= {};
1240 struct msghdr msghdr
= {
1243 .msg_control
= &control
,
1244 .msg_controllen
= sizeof(control
),
1246 .msg_namelen
= sizeof(sa
),
1250 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1252 if (revents
!= EPOLLIN
)
1253 return log_error_errno(SYNTHETIC_ERRNO(EIO
),
1254 "Got invalid event from epoll for datagram fd: %" PRIx32
,
1257 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1259 (void) ioctl(fd
, SIOCINQ
, &v
);
1261 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1262 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1264 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1266 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1269 iovec
= IOVEC_MAKE(s
->buffer
, s
->buffer_size
- 1); /* Leave room for trailing NUL we add later */
1271 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1273 if (IN_SET(errno
, EINTR
, EAGAIN
))
1276 return log_error_errno(errno
, "recvmsg() failed: %m");
1279 CMSG_FOREACH(cmsg
, &msghdr
)
1280 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1281 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1282 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1283 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1284 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1285 cmsg
->cmsg_type
== SCM_SECURITY
) {
1286 label
= (char*) CMSG_DATA(cmsg
);
1287 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1288 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1289 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1290 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1291 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1292 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1293 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1294 fds
= (int*) CMSG_DATA(cmsg
);
1295 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1298 /* And a trailing NUL, just in case */
1301 if (fd
== s
->syslog_fd
) {
1302 if (n
> 0 && n_fds
== 0)
1303 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1305 log_warning("Got file descriptors via syslog socket. Ignoring.");
1307 } else if (fd
== s
->native_fd
) {
1308 if (n
> 0 && n_fds
== 0)
1309 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1310 else if (n
== 0 && n_fds
== 1)
1311 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1313 log_warning("Got too many file descriptors via native socket. Ignoring.");
1316 assert(fd
== s
->audit_fd
);
1318 if (n
> 0 && n_fds
== 0)
1319 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1321 log_warning("Got file descriptors via audit socket. Ignoring.");
1324 close_many(fds
, n_fds
);
1328 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1329 Server
*s
= userdata
;
1334 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1336 (void) server_flush_to_var(s
, false);
1338 server_vacuum(s
, false);
1340 r
= touch("/run/systemd/journal/flushed");
1342 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1344 server_space_usage_message(s
, NULL
);
1348 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1349 Server
*s
= userdata
;
1354 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1356 server_vacuum(s
, true);
1358 if (s
->system_journal
)
1359 patch_min_use(&s
->system_storage
);
1360 if (s
->runtime_journal
)
1361 patch_min_use(&s
->runtime_storage
);
1363 /* Let clients know when the most recent rotation happened. */
1364 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1366 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1371 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1372 Server
*s
= userdata
;
1376 log_received_signal(LOG_INFO
, si
);
1378 sd_event_exit(s
->event
, 0);
1382 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1383 Server
*s
= userdata
;
1388 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1392 /* Let clients know when the most recent sync happened. */
1393 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1395 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1400 static int setup_signals(Server
*s
) {
1405 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1407 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1411 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1415 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1419 /* Let's process SIGTERM late, so that we flush all queued
1420 * messages to disk before we exit */
1421 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1425 /* When journald is invoked on the terminal (when debugging),
1426 * it's useful if C-c is handled equivalent to SIGTERM. */
1427 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1431 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1435 /* SIGRTMIN+1 causes an immediate sync. We process this very
1436 * late, so that everything else queued at this point is
1437 * really written to disk. Clients can watch
1438 * /run/systemd/journal/synced with inotify until its mtime
1439 * changes to see when a sync happened. */
1440 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1444 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1451 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1457 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1459 r
= value
? parse_boolean(value
) : true;
1461 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1463 s
->forward_to_syslog
= r
;
1465 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1467 r
= value
? parse_boolean(value
) : true;
1469 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1471 s
->forward_to_kmsg
= r
;
1473 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1475 r
= value
? parse_boolean(value
) : true;
1477 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1479 s
->forward_to_console
= r
;
1481 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1483 r
= value
? parse_boolean(value
) : true;
1485 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1487 s
->forward_to_wall
= r
;
1489 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1491 if (proc_cmdline_value_missing(key
, value
))
1494 r
= log_level_from_string(value
);
1496 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1498 s
->max_level_console
= r
;
1500 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1502 if (proc_cmdline_value_missing(key
, value
))
1505 r
= log_level_from_string(value
);
1507 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1509 s
->max_level_store
= r
;
1511 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1513 if (proc_cmdline_value_missing(key
, value
))
1516 r
= log_level_from_string(value
);
1518 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1520 s
->max_level_syslog
= r
;
1522 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1524 if (proc_cmdline_value_missing(key
, value
))
1527 r
= log_level_from_string(value
);
1529 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1531 s
->max_level_kmsg
= r
;
1533 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1535 if (proc_cmdline_value_missing(key
, value
))
1538 r
= log_level_from_string(value
);
1540 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1542 s
->max_level_wall
= r
;
1544 } else if (startswith(key
, "systemd.journald"))
1545 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1547 /* do not warn about state here, since probably systemd already did */
1551 static int server_parse_config_file(Server
*s
) {
1554 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1555 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1557 config_item_perf_lookup
, journald_gperf_lookup
,
1558 CONFIG_PARSE_WARN
, s
);
1561 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1562 Server
*s
= userdata
;
1570 int server_schedule_sync(Server
*s
, int priority
) {
1575 if (priority
<= LOG_CRIT
) {
1576 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1581 if (s
->sync_scheduled
)
1584 if (s
->sync_interval_usec
> 0) {
1587 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1591 when
+= s
->sync_interval_usec
;
1593 if (!s
->sync_event_source
) {
1594 r
= sd_event_add_time(
1596 &s
->sync_event_source
,
1599 server_dispatch_sync
, s
);
1603 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1605 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1609 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1614 s
->sync_scheduled
= true;
1620 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1621 Server
*s
= userdata
;
1625 server_cache_hostname(s
);
1629 static int server_open_hostname(Server
*s
) {
1634 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1635 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1636 if (s
->hostname_fd
< 0)
1637 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1639 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1641 /* kernels prior to 3.2 don't support polling this file. Ignore
1644 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1645 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1649 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1652 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1654 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1659 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1660 Server
*s
= userdata
;
1664 assert(s
->notify_event_source
== es
);
1665 assert(s
->notify_fd
== fd
);
1667 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1668 * message on it. Either it's the watchdog event, the initial
1669 * READY=1 event or an stdout stream event. If there's nothing
1670 * to write anymore, turn our event source off. The next time
1671 * there's something to send it will be turned on again. */
1673 if (!s
->sent_notify_ready
) {
1674 static const char p
[] =
1676 "STATUS=Processing requests...";
1679 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1681 if (errno
== EAGAIN
)
1684 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1687 s
->sent_notify_ready
= true;
1688 log_debug("Sent READY=1 notification.");
1690 } else if (s
->send_watchdog
) {
1692 static const char p
[] =
1697 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1699 if (errno
== EAGAIN
)
1702 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1705 s
->send_watchdog
= false;
1706 log_debug("Sent WATCHDOG=1 notification.");
1708 } else if (s
->stdout_streams_notify_queue
)
1709 /* Dispatch one stream notification event */
1710 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1712 /* Leave us enabled if there's still more to do. */
1713 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1716 /* There was nothing to do anymore, let's turn ourselves off. */
1717 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1719 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1724 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1725 Server
*s
= userdata
;
1730 s
->send_watchdog
= true;
1732 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1734 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1736 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1738 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1740 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1742 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1747 static int server_connect_notify(Server
*s
) {
1748 union sockaddr_union sa
= {};
1753 assert(s
->notify_fd
< 0);
1754 assert(!s
->notify_event_source
);
1757 So here's the problem: we'd like to send notification
1758 messages to PID 1, but we cannot do that via sd_notify(),
1759 since that's synchronous, and we might end up blocking on
1760 it. Specifically: given that PID 1 might block on
1761 dbus-daemon during IPC, and dbus-daemon is logging to us,
1762 and might hence block on us, we might end up in a deadlock
1763 if we block on sending PID 1 notification messages — by
1764 generating a full blocking circle. To avoid this, let's
1765 create a non-blocking socket, and connect it to the
1766 notification socket, and then wait for POLLOUT before we
1767 send anything. This should efficiently avoid any deadlocks,
1768 as we'll never block on PID 1, hence PID 1 can safely block
1769 on dbus-daemon which can safely block on us again.
1771 Don't think that this issue is real? It is, see:
1772 https://github.com/systemd/systemd/issues/1505
1775 e
= getenv("NOTIFY_SOCKET");
1779 salen
= sockaddr_un_set_path(&sa
.un
, e
);
1781 return log_error_errno(salen
, "NOTIFY_SOCKET set to invalid value '%s': %m", e
);
1783 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1784 if (s
->notify_fd
< 0)
1785 return log_error_errno(errno
, "Failed to create notify socket: %m");
1787 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1789 r
= connect(s
->notify_fd
, &sa
.sa
, salen
);
1791 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1793 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1795 return log_error_errno(r
, "Failed to watch notification socket: %m");
1797 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1798 s
->send_watchdog
= true;
1800 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1802 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1805 /* This should fire pretty soon, which we'll use to send the
1811 int server_init(Server
*s
) {
1812 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1819 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1820 s
->compress
.enabled
= true;
1821 s
->compress
.threshold_bytes
= (uint64_t) -1;
1823 s
->read_kmsg
= true;
1825 s
->watchdog_usec
= USEC_INFINITY
;
1827 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1828 s
->sync_scheduled
= false;
1830 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1831 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1833 s
->forward_to_wall
= true;
1835 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1837 s
->max_level_store
= LOG_DEBUG
;
1838 s
->max_level_syslog
= LOG_DEBUG
;
1839 s
->max_level_kmsg
= LOG_NOTICE
;
1840 s
->max_level_console
= LOG_INFO
;
1841 s
->max_level_wall
= LOG_EMERG
;
1843 s
->line_max
= DEFAULT_LINE_MAX
;
1845 journal_reset_metrics(&s
->system_storage
.metrics
);
1846 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1848 server_parse_config_file(s
);
1850 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1852 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1854 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1855 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1856 s
->rate_limit_interval
, s
->rate_limit_burst
);
1857 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1860 (void) mkdir_p("/run/systemd/journal", 0755);
1862 s
->user_journals
= ordered_hashmap_new(NULL
);
1863 if (!s
->user_journals
)
1866 s
->mmap
= mmap_cache_new();
1870 s
->deferred_closes
= set_new(NULL
);
1871 if (!s
->deferred_closes
)
1874 r
= sd_event_default(&s
->event
);
1876 return log_error_errno(r
, "Failed to create event loop: %m");
1878 n
= sd_listen_fds(true);
1880 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1882 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1884 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1886 if (s
->native_fd
>= 0)
1887 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1888 "Too many native sockets passed.");
1892 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1894 if (s
->stdout_fd
>= 0)
1895 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1896 "Too many stdout sockets passed.");
1900 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1901 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1903 if (s
->syslog_fd
>= 0)
1904 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1905 "Too many /dev/log sockets passed.");
1909 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1911 if (s
->audit_fd
>= 0)
1912 return log_error_errno(SYNTHETIC_ERRNO(EINVAL
),
1913 "Too many audit sockets passed.");
1925 r
= fdset_put(fds
, fd
);
1931 /* Try to restore streams, but don't bother if this fails */
1932 (void) server_restore_streams(s
, fds
);
1934 if (fdset_size(fds
) > 0) {
1935 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1936 fds
= fdset_free(fds
);
1939 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1941 /* always open stdout, syslog, native, and kmsg sockets */
1943 /* systemd-journald.socket: /run/systemd/journal/stdout */
1944 r
= server_open_stdout_socket(s
);
1948 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1949 r
= server_open_syslog_socket(s
);
1953 /* systemd-journald.socket: /run/systemd/journal/socket */
1954 r
= server_open_native_socket(s
);
1959 r
= server_open_dev_kmsg(s
);
1963 /* Unless we got *some* sockets and not audit, open audit socket */
1964 if (s
->audit_fd
>= 0 || no_sockets
) {
1965 r
= server_open_audit(s
);
1970 r
= server_open_kernel_seqnum(s
);
1974 r
= server_open_hostname(s
);
1978 r
= setup_signals(s
);
1982 s
->rate_limit
= journal_rate_limit_new();
1986 r
= cg_get_root_path(&s
->cgroup_root
);
1990 server_cache_hostname(s
);
1991 server_cache_boot_id(s
);
1992 server_cache_machine_id(s
);
1994 s
->runtime_storage
.name
= "Runtime journal";
1995 s
->system_storage
.name
= "System journal";
1997 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1998 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1999 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2002 (void) server_connect_notify(s
);
2004 (void) client_context_acquire_default(s
);
2006 return system_journal_open(s
, false);
2009 void server_maybe_append_tags(Server
*s
) {
2015 n
= now(CLOCK_REALTIME
);
2017 if (s
->system_journal
)
2018 journal_file_maybe_append_tag(s
->system_journal
, n
);
2020 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2021 journal_file_maybe_append_tag(f
, n
);
2025 void server_done(Server
*s
) {
2028 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
2030 while (s
->stdout_streams
)
2031 stdout_stream_free(s
->stdout_streams
);
2033 client_context_flush_all(s
);
2035 if (s
->system_journal
)
2036 (void) journal_file_close(s
->system_journal
);
2038 if (s
->runtime_journal
)
2039 (void) journal_file_close(s
->runtime_journal
);
2041 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
2043 sd_event_source_unref(s
->syslog_event_source
);
2044 sd_event_source_unref(s
->native_event_source
);
2045 sd_event_source_unref(s
->stdout_event_source
);
2046 sd_event_source_unref(s
->dev_kmsg_event_source
);
2047 sd_event_source_unref(s
->audit_event_source
);
2048 sd_event_source_unref(s
->sync_event_source
);
2049 sd_event_source_unref(s
->sigusr1_event_source
);
2050 sd_event_source_unref(s
->sigusr2_event_source
);
2051 sd_event_source_unref(s
->sigterm_event_source
);
2052 sd_event_source_unref(s
->sigint_event_source
);
2053 sd_event_source_unref(s
->sigrtmin1_event_source
);
2054 sd_event_source_unref(s
->hostname_event_source
);
2055 sd_event_source_unref(s
->notify_event_source
);
2056 sd_event_source_unref(s
->watchdog_event_source
);
2057 sd_event_unref(s
->event
);
2059 safe_close(s
->syslog_fd
);
2060 safe_close(s
->native_fd
);
2061 safe_close(s
->stdout_fd
);
2062 safe_close(s
->dev_kmsg_fd
);
2063 safe_close(s
->audit_fd
);
2064 safe_close(s
->hostname_fd
);
2065 safe_close(s
->notify_fd
);
2068 journal_rate_limit_free(s
->rate_limit
);
2070 if (s
->kernel_seqnum
)
2071 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2075 free(s
->cgroup_root
);
2076 free(s
->hostname_field
);
2077 free(s
->runtime_storage
.path
);
2078 free(s
->system_storage
.path
);
2081 mmap_cache_unref(s
->mmap
);
2084 static const char* const storage_table
[_STORAGE_MAX
] = {
2085 [STORAGE_AUTO
] = "auto",
2086 [STORAGE_VOLATILE
] = "volatile",
2087 [STORAGE_PERSISTENT
] = "persistent",
2088 [STORAGE_NONE
] = "none"
2091 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2092 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2094 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2095 [SPLIT_LOGIN
] = "login",
2096 [SPLIT_UID
] = "uid",
2097 [SPLIT_NONE
] = "none",
2100 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2101 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
2103 int config_parse_line_max(
2105 const char *filename
,
2107 const char *section
,
2108 unsigned section_line
,
2123 if (isempty(rvalue
))
2124 /* Empty assignment means default */
2125 *sz
= DEFAULT_LINE_MAX
;
2129 r
= parse_size(rvalue
, 1024, &v
);
2131 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2136 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2137 * terminal size is 80ch, and it might make sense to break one character before the natural
2138 * line break would occur on that. */
2139 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2141 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2142 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2143 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2144 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2145 * fail much earlier anyway. */
2146 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2155 int config_parse_compress(const char* unit
,
2156 const char *filename
,
2158 const char *section
,
2159 unsigned section_line
,
2165 JournalCompressOptions
* compress
= data
;
2168 if (streq(rvalue
, "1")) {
2169 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2170 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2171 compress
->enabled
= true;
2172 } else if (streq(rvalue
, "0")) {
2173 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2174 "Compress= ambiguously specified as 0, disabling compression");
2175 compress
->enabled
= false;
2176 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2177 compress
->enabled
= r
;
2178 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2179 compress
->enabled
= true;
2180 else if (isempty(rvalue
)) {
2181 compress
->enabled
= true;
2182 compress
->threshold_bytes
= (uint64_t) -1;
2184 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);