1 /* SPDX-License-Identifier: LGPL-2.1+ */
4 #include <selinux/selinux.h>
8 #include <sys/signalfd.h>
9 #include <sys/statvfs.h>
10 #include <linux/sockios.h>
12 #include "sd-daemon.h"
13 #include "sd-journal.h"
14 #include "sd-messages.h"
17 #include "alloc-util.h"
18 #include "audit-util.h"
19 #include "cgroup-util.h"
20 #include "conf-parser.h"
21 #include "dirent-util.h"
22 #include "extract-word.h"
25 #include "format-util.h"
28 #include "hostname-util.h"
29 #include "id128-util.h"
31 #include "journal-authenticate.h"
32 #include "journal-file.h"
33 #include "journal-internal.h"
34 #include "journal-vacuum.h"
35 #include "journald-audit.h"
36 #include "journald-context.h"
37 #include "journald-kmsg.h"
38 #include "journald-native.h"
39 #include "journald-rate-limit.h"
40 #include "journald-server.h"
41 #include "journald-stream.h"
42 #include "journald-syslog.h"
46 #include "parse-util.h"
47 #include "proc-cmdline.h"
48 #include "process-util.h"
50 #include "selinux-util.h"
51 #include "signal-util.h"
52 #include "socket-util.h"
53 #include "stdio-util.h"
54 #include "string-table.h"
55 #include "string-util.h"
56 #include "syslog-util.h"
57 #include "user-util.h"
59 #define USER_JOURNALS_MAX 1024
61 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
62 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
63 #define DEFAULT_RATE_LIMIT_BURST 10000
64 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
66 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
68 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
70 /* The period to insert between posting changes for coalescing */
71 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
73 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
74 * for a bit of additional metadata. */
75 #define DEFAULT_LINE_MAX (48*1024)
77 #define DEFERRED_CLOSES_MAX (4096)
79 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
80 _cleanup_closedir_
DIR *d
= NULL
;
89 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
90 errno
, "Failed to open %s: %m", path
);
92 if (fstatvfs(dirfd(d
), &ss
) < 0)
93 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
95 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
97 FOREACH_DIRENT_ALL(de
, d
, break) {
100 if (!endswith(de
->d_name
, ".journal") &&
101 !endswith(de
->d_name
, ".journal~"))
104 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
105 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
109 if (!S_ISREG(st
.st_mode
))
112 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
118 static void cache_space_invalidate(JournalStorageSpace
*space
) {
122 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
123 JournalStorageSpace
*space
;
124 JournalMetrics
*metrics
;
125 uint64_t vfs_used
, vfs_avail
, avail
;
131 metrics
= &storage
->metrics
;
132 space
= &storage
->space
;
134 ts
= now(CLOCK_MONOTONIC
);
136 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
139 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
143 space
->vfs_used
= vfs_used
;
144 space
->vfs_available
= vfs_avail
;
146 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
148 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
149 space
->available
= LESS_BY(space
->limit
, vfs_used
);
150 space
->timestamp
= ts
;
154 static void patch_min_use(JournalStorage
*storage
) {
157 /* Let's bump the min_use limit to the current usage on disk. We do
158 * this when starting up and first opening the journal files. This way
159 * sudden spikes in disk usage will not cause journald to vacuum files
160 * without bounds. Note that this means that only a restart of journald
161 * will make it reset this value. */
163 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
166 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
172 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
174 r
= cache_space_refresh(s
, js
);
177 *available
= js
->space
.available
;
179 *limit
= js
->space
.limit
;
184 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
185 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
186 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
187 JournalMetrics
*metrics
;
192 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
194 if (cache_space_refresh(s
, storage
) < 0)
197 metrics
= &storage
->metrics
;
198 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
199 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
200 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
201 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
202 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
203 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
205 server_driver_message(s
, 0,
206 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
207 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
208 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
209 "JOURNAL_NAME=%s", storage
->name
,
210 "JOURNAL_PATH=%s", storage
->path
,
211 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
212 "CURRENT_USE_PRETTY=%s", fb1
,
213 "MAX_USE=%"PRIu64
, metrics
->max_use
,
214 "MAX_USE_PRETTY=%s", fb2
,
215 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
216 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
217 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
218 "DISK_AVAILABLE_PRETTY=%s", fb4
,
219 "LIMIT=%"PRIu64
, storage
->space
.limit
,
220 "LIMIT_PRETTY=%s", fb5
,
221 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
222 "AVAILABLE_PRETTY=%s", fb6
,
226 static bool uid_for_system_journal(uid_t uid
) {
228 /* Returns true if the specified UID shall get its data stored in the system journal*/
230 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
233 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
240 if (uid_for_system_journal(uid
))
243 r
= add_acls_for_user(f
->fd
, uid
);
245 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
249 static int open_journal(
255 JournalMetrics
*metrics
,
266 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
267 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
269 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
270 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
275 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
277 (void) journal_file_close(f
);
285 static bool flushed_flag_is_set(void) {
286 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
289 static int system_journal_open(Server
*s
, bool flush_requested
) {
293 if (!s
->system_journal
&&
294 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
295 (flush_requested
|| flushed_flag_is_set())) {
297 /* If in auto mode: first try to create the machine
298 * path, but not the prefix.
300 * If in persistent mode: create /var/log/journal and
301 * the machine path */
303 if (s
->storage
== STORAGE_PERSISTENT
)
304 (void) mkdir_p("/var/log/journal/", 0755);
306 (void) mkdir(s
->system_storage
.path
, 0755);
308 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
309 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
311 server_add_acls(s
->system_journal
, 0);
312 (void) cache_space_refresh(s
, &s
->system_storage
);
313 patch_min_use(&s
->system_storage
);
315 if (!IN_SET(r
, -ENOENT
, -EROFS
))
316 log_warning_errno(r
, "Failed to open system journal: %m");
321 /* If the runtime journal is open, and we're post-flush, we're
322 * recovering from a failed system journal rotate (ENOSPC)
323 * for which the runtime journal was reopened.
325 * Perform an implicit flush to var, leaving the runtime
326 * journal closed, now that the system journal is back.
328 if (!flush_requested
)
329 (void) server_flush_to_var(s
, true);
332 if (!s
->runtime_journal
&&
333 (s
->storage
!= STORAGE_NONE
)) {
335 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
337 if (s
->system_journal
) {
339 /* Try to open the runtime journal, but only
340 * if it already exists, so that we can flush
341 * it into the system journal */
343 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
346 log_warning_errno(r
, "Failed to open runtime journal: %m");
353 /* OK, we really need the runtime journal, so create
354 * it if necessary. */
356 (void) mkdir("/run/log", 0755);
357 (void) mkdir("/run/log/journal", 0755);
358 (void) mkdir_parents(fn
, 0750);
360 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
362 return log_error_errno(r
, "Failed to open runtime journal: %m");
365 if (s
->runtime_journal
) {
366 server_add_acls(s
->runtime_journal
, 0);
367 (void) cache_space_refresh(s
, &s
->runtime_storage
);
368 patch_min_use(&s
->runtime_storage
);
375 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
376 _cleanup_free_
char *p
= NULL
;
383 /* A rotate that fails to create the new journal (ENOSPC) leaves the
384 * rotated journal as NULL. Unless we revisit opening, even after
385 * space is made available we'll continue to return NULL indefinitely.
387 * system_journal_open() is a noop if the journals are already open, so
388 * we can just call it here to recover from failed rotates (or anything
389 * else that's left the journals as NULL).
391 * Fixes https://github.com/systemd/systemd/issues/3968 */
392 (void) system_journal_open(s
, false);
394 /* We split up user logs only on /var, not on /run. If the
395 * runtime file is open, we write to it exclusively, in order
396 * to guarantee proper order as soon as we flush /run to
397 * /var and close the runtime file. */
399 if (s
->runtime_journal
)
400 return s
->runtime_journal
;
402 if (uid_for_system_journal(uid
))
403 return s
->system_journal
;
405 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
409 r
= sd_id128_get_machine(&machine
);
411 log_debug_errno(r
, "Failed to determine machine ID, using system log: %m");
412 return s
->system_journal
;
415 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
416 SD_ID128_FORMAT_VAL(machine
), uid
) < 0) {
418 return s
->system_journal
;
421 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
422 /* Too many open? Then let's close one */
423 f
= ordered_hashmap_steal_first(s
->user_journals
);
425 (void) journal_file_close(f
);
428 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
430 return s
->system_journal
;
432 server_add_acls(f
, uid
);
434 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
436 (void) journal_file_close(f
);
437 return s
->system_journal
;
443 static int do_rotate(
456 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
459 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
461 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
464 server_add_acls(*f
, uid
);
469 static void server_process_deferred_closes(Server
*s
) {
473 /* Perform any deferred closes which aren't still offlining. */
474 SET_FOREACH(f
, s
->deferred_closes
, i
) {
475 if (journal_file_is_offlining(f
))
478 (void) set_remove(s
->deferred_closes
, f
);
479 (void) journal_file_close(f
);
483 static void server_vacuum_deferred_closes(Server
*s
) {
486 /* Make some room in the deferred closes list, so that it doesn't grow without bounds */
487 if (set_size(s
->deferred_closes
) < DEFERRED_CLOSES_MAX
)
490 /* Let's first remove all journal files that might already have completed closing */
491 server_process_deferred_closes(s
);
493 /* And now, let's close some more until we reach the limit again. */
494 while (set_size(s
->deferred_closes
) >= DEFERRED_CLOSES_MAX
) {
497 assert_se(f
= set_steal_first(s
->deferred_closes
));
498 journal_file_close(f
);
502 static int open_user_journal_directory(Server
*s
, DIR **ret_dir
, char **ret_path
) {
503 _cleanup_closedir_
DIR *dir
= NULL
;
504 _cleanup_free_
char *path
= NULL
;
510 r
= sd_id128_get_machine(&machine
);
512 return log_error_errno(r
, "Failed to determine machine ID, ignoring: %m");
514 if (asprintf(&path
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/", SD_ID128_FORMAT_VAL(machine
)) < 0)
519 return log_error_errno(errno
, "Failed to open user journal directory '%s': %m", path
);
522 *ret_dir
= TAKE_PTR(dir
);
524 *ret_path
= TAKE_PTR(path
);
529 void server_rotate(Server
*s
) {
530 _cleanup_free_
char *path
= NULL
;
531 _cleanup_closedir_
DIR *d
= NULL
;
537 log_debug("Rotating...");
539 /* First, rotate the system journal (either in its runtime flavour or in its runtime flavour) */
540 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
541 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
543 /* Then, rotate all user journals we have open (keeping them open) */
544 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
545 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
547 ordered_hashmap_replace(s
->user_journals
, k
, f
);
549 /* Old file has been closed and deallocated */
550 ordered_hashmap_remove(s
->user_journals
, k
);
553 /* Finally, also rotate all user journals we currently do not have open. */
554 r
= open_user_journal_directory(s
, &d
, &path
);
558 FOREACH_DIRENT(de
, d
, log_warning_errno(errno
, "Failed to enumerate %s, ignoring: %m", path
)) {
559 _cleanup_free_
char *u
= NULL
, *full
= NULL
;
560 _cleanup_close_
int fd
= -1;
564 a
= startswith(de
->d_name
, "user-");
567 b
= endswith(de
->d_name
, ".journal");
577 r
= parse_uid(u
, &uid
);
579 log_debug_errno(r
, "Failed to parse UID from file name '%s', ignoring: %m", de
->d_name
);
583 /* Already rotated in the above loop? i.e. is it an open user journal? */
584 if (ordered_hashmap_contains(s
->user_journals
, UID_TO_PTR(uid
)))
587 full
= strjoin(path
, de
->d_name
);
593 fd
= openat(dirfd(d
), de
->d_name
, O_RDWR
|O_CLOEXEC
|O_NOCTTY
|O_NOFOLLOW
|O_NONBLOCK
);
595 log_full_errno(IN_SET(errno
, ELOOP
, ENOENT
) ? LOG_DEBUG
: LOG_WARNING
, errno
,
596 "Failed to open journal file '%s' for rotation: %m", full
);
600 /* Make some room in the set of deferred close()s */
601 server_vacuum_deferred_closes(s
);
603 /* Open the file briefly, so that we can archive it */
604 r
= journal_file_open(fd
,
609 s
->compress
.threshold_bytes
,
611 &s
->system_storage
.metrics
,
617 log_warning_errno(r
, "Failed to read journal file %s for rotation, trying to move it out of the way: %m", full
);
619 r
= journal_file_dispose(dirfd(d
), de
->d_name
);
621 log_warning_errno(r
, "Failed to move %s out of the way, ignoring: %m", full
);
623 log_debug("Successfully moved %s out of the way.", full
);
628 TAKE_FD(fd
); /* Donated to journal_file_open() */
630 r
= journal_file_archive(f
);
632 log_debug_errno(r
, "Failed to archive journal file '%s', ignoring: %m", full
);
634 f
= journal_initiate_close(f
, s
->deferred_closes
);
638 server_process_deferred_closes(s
);
641 void server_sync(Server
*s
) {
646 if (s
->system_journal
) {
647 r
= journal_file_set_offline(s
->system_journal
, false);
649 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
652 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
653 r
= journal_file_set_offline(f
, false);
655 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
658 if (s
->sync_event_source
) {
659 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
661 log_error_errno(r
, "Failed to disable sync timer source: %m");
664 s
->sync_scheduled
= false;
667 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
674 (void) cache_space_refresh(s
, storage
);
677 server_space_usage_message(s
, storage
);
679 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
680 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
681 &s
->oldest_file_usec
, verbose
);
682 if (r
< 0 && r
!= -ENOENT
)
683 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
685 cache_space_invalidate(&storage
->space
);
688 int server_vacuum(Server
*s
, bool verbose
) {
691 log_debug("Vacuuming...");
693 s
->oldest_file_usec
= 0;
695 if (s
->system_journal
)
696 do_vacuum(s
, &s
->system_storage
, verbose
);
697 if (s
->runtime_journal
)
698 do_vacuum(s
, &s
->runtime_storage
, verbose
);
703 static void server_cache_machine_id(Server
*s
) {
709 r
= sd_id128_get_machine(&id
);
713 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
716 static void server_cache_boot_id(Server
*s
) {
722 r
= sd_id128_get_boot(&id
);
726 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
729 static void server_cache_hostname(Server
*s
) {
730 _cleanup_free_
char *t
= NULL
;
735 t
= gethostname_malloc();
739 x
= strappend("_HOSTNAME=", t
);
743 free(s
->hostname_field
);
744 s
->hostname_field
= x
;
747 static bool shall_try_append_again(JournalFile
*f
, int r
) {
750 case -E2BIG
: /* Hit configured limit */
751 case -EFBIG
: /* Hit fs limit */
752 case -EDQUOT
: /* Quota limit hit */
753 case -ENOSPC
: /* Disk full */
754 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
757 case -EIO
: /* I/O error of some kind (mmap) */
758 log_warning("%s: IO error, rotating.", f
->path
);
761 case -EHOSTDOWN
: /* Other machine */
762 log_info("%s: Journal file from other machine, rotating.", f
->path
);
765 case -EBUSY
: /* Unclean shutdown */
766 log_info("%s: Unclean shutdown, rotating.", f
->path
);
769 case -EPROTONOSUPPORT
: /* Unsupported feature */
770 log_info("%s: Unsupported feature, rotating.", f
->path
);
773 case -EBADMSG
: /* Corrupted */
774 case -ENODATA
: /* Truncated */
775 case -ESHUTDOWN
: /* Already archived */
776 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
779 case -EIDRM
: /* Journal file has been deleted */
780 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
783 case -ETXTBSY
: /* Journal file is from the future */
784 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
792 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
793 bool vacuumed
= false, rotate
= false;
794 struct dual_timestamp ts
;
802 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
803 * the source time, and not even the time the event was originally seen, but instead simply the time we started
804 * processing it, as we want strictly linear ordering in what we write out.) */
805 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
806 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
808 if (ts
.realtime
< s
->last_realtime_clock
) {
809 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
810 * regular operation. However, when it does happen, then we should make sure that we start fresh files
811 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
812 * bisection works correctly. */
814 log_debug("Time jumped backwards, rotating.");
818 f
= find_journal(s
, uid
);
822 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
823 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
830 server_vacuum(s
, false);
833 f
= find_journal(s
, uid
);
838 s
->last_realtime_clock
= ts
.realtime
;
840 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
842 server_schedule_sync(s
, priority
);
846 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
847 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
852 server_vacuum(s
, false);
854 f
= find_journal(s
, uid
);
858 log_debug("Retrying write.");
859 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
861 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
863 server_schedule_sync(s
, priority
);
866 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
867 if (isset(value)) { \
869 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
870 sprintf(k, field "=" format, value); \
871 iovec[n++] = IOVEC_MAKE_STRING(k); \
874 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
875 if (!isempty(value)) { \
877 k = strjoina(field "=", value); \
878 iovec[n++] = IOVEC_MAKE_STRING(k); \
881 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
882 if (!sd_id128_is_null(value)) { \
884 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
885 sd_id128_to_string(value, stpcpy(k, field "=")); \
886 iovec[n++] = IOVEC_MAKE_STRING(k); \
889 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
890 if (value_size > 0) { \
892 k = newa(char, STRLEN(field "=") + value_size + 1); \
893 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
894 iovec[n++] = IOVEC_MAKE_STRING(k); \
897 static void dispatch_message_real(
899 struct iovec
*iovec
, size_t n
, size_t m
,
900 const ClientContext
*c
,
901 const struct timeval
*tv
,
905 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
913 N_IOVEC_META_FIELDS
+
914 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
915 client_context_extra_fields_n_iovec(c
) <= m
);
918 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
919 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
920 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
922 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
923 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
924 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
925 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
927 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
929 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
930 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
932 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
933 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
934 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
935 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
936 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
937 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
938 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
940 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
942 if (c
->extra_fields_n_iovec
> 0) {
943 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
944 n
+= c
->extra_fields_n_iovec
;
950 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
952 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
953 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
954 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
956 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
957 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
958 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
959 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
961 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
963 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
964 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
966 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
967 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
968 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
969 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
970 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
971 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
972 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
974 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
980 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
981 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
984 /* Note that strictly speaking storing the boot id here is
985 * redundant since the entry includes this in-line
986 * anyway. However, we need this indexed, too. */
987 if (!isempty(s
->boot_id_field
))
988 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
990 if (!isempty(s
->machine_id_field
))
991 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
993 if (!isempty(s
->hostname_field
))
994 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
998 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
999 /* Split up strictly by (non-root) UID */
1000 journal_uid
= c
->uid
;
1001 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
1002 /* Split up by login UIDs. We do this only if the
1003 * realuid is not root, in order not to accidentally
1004 * leak privileged information to the user that is
1005 * logged by a privileged process that is part of an
1006 * unprivileged session. */
1007 journal_uid
= c
->owner_uid
;
1011 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
1014 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
1016 struct iovec
*iovec
;
1024 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
1025 iovec
= newa(struct iovec
, m
);
1027 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
1028 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
1029 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
1031 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
1032 assert_cc(6 == LOG_INFO
);
1033 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
1036 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
1039 va_start(ap
, format
);
1040 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
1041 /* Error handling below */
1045 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1048 free(iovec
[k
++].iov_base
);
1051 /* We failed to format the message. Emit a warning instead. */
1054 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
1057 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
1058 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
1059 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
1063 void server_dispatch_message(
1065 struct iovec
*iovec
, size_t n
, size_t m
,
1067 const struct timeval
*tv
,
1071 uint64_t available
= 0;
1075 assert(iovec
|| n
== 0);
1080 if (LOG_PRI(priority
) > s
->max_level_store
)
1083 /* Stop early in case the information will not be stored
1085 if (s
->storage
== STORAGE_NONE
)
1089 (void) determine_space(s
, &available
, NULL
);
1091 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, c
->log_rate_limit_interval
, c
->log_rate_limit_burst
, priority
& LOG_PRIMASK
, available
);
1095 /* Write a suppression message if we suppressed something */
1097 server_driver_message(s
, c
->pid
,
1098 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
1099 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
1100 "N_DROPPED=%i", rl
- 1,
1104 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
1107 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
1109 sd_journal
*j
= NULL
;
1110 char ts
[FORMAT_TIMESPAN_MAX
];
1117 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
1120 if (!s
->runtime_journal
)
1123 if (require_flag_file
&& !flushed_flag_is_set())
1126 (void) system_journal_open(s
, true);
1128 if (!s
->system_journal
)
1131 log_debug("Flushing to /var...");
1133 start
= now(CLOCK_MONOTONIC
);
1135 r
= sd_id128_get_machine(&machine
);
1139 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1141 return log_error_errno(r
, "Failed to read runtime journal: %m");
1143 sd_journal_set_data_threshold(j
, 0);
1145 SD_JOURNAL_FOREACH(j
) {
1149 f
= j
->current_file
;
1150 assert(f
&& f
->current_offset
> 0);
1154 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1156 log_error_errno(r
, "Can't read entry: %m");
1160 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1164 if (!shall_try_append_again(s
->system_journal
, r
)) {
1165 log_error_errno(r
, "Can't write entry: %m");
1170 server_vacuum(s
, false);
1172 if (!s
->system_journal
) {
1173 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1178 log_debug("Retrying write.");
1179 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1181 log_error_errno(r
, "Can't write entry: %m");
1189 if (s
->system_journal
)
1190 journal_file_post_change(s
->system_journal
);
1192 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1195 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1197 sd_journal_close(j
);
1199 server_driver_message(s
, 0, NULL
,
1200 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1201 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1208 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1209 Server
*s
= userdata
;
1210 struct ucred
*ucred
= NULL
;
1211 struct timeval
*tv
= NULL
;
1212 struct cmsghdr
*cmsg
;
1214 size_t label_len
= 0, m
;
1217 int *fds
= NULL
, v
= 0;
1221 struct cmsghdr cmsghdr
;
1223 /* We use NAME_MAX space for the SELinux label
1224 * here. The kernel currently enforces no
1225 * limit, but according to suggestions from
1226 * the SELinux people this will change and it
1227 * will probably be identical to NAME_MAX. For
1228 * now we use that, but this should be updated
1229 * one day when the final limit is known. */
1230 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1231 CMSG_SPACE(sizeof(struct timeval
)) +
1232 CMSG_SPACE(sizeof(int)) + /* fd */
1233 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1236 union sockaddr_union sa
= {};
1238 struct msghdr msghdr
= {
1241 .msg_control
= &control
,
1242 .msg_controllen
= sizeof(control
),
1244 .msg_namelen
= sizeof(sa
),
1248 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1250 if (revents
!= EPOLLIN
) {
1251 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1255 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1257 (void) ioctl(fd
, SIOCINQ
, &v
);
1259 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1260 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1262 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1264 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1267 iovec
.iov_base
= s
->buffer
;
1268 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1270 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1272 if (IN_SET(errno
, EINTR
, EAGAIN
))
1275 return log_error_errno(errno
, "recvmsg() failed: %m");
1278 CMSG_FOREACH(cmsg
, &msghdr
) {
1280 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1281 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1282 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1283 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1284 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1285 cmsg
->cmsg_type
== SCM_SECURITY
) {
1286 label
= (char*) CMSG_DATA(cmsg
);
1287 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1288 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1289 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1290 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1291 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1292 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1293 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1294 fds
= (int*) CMSG_DATA(cmsg
);
1295 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1299 /* And a trailing NUL, just in case */
1302 if (fd
== s
->syslog_fd
) {
1303 if (n
> 0 && n_fds
== 0)
1304 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1306 log_warning("Got file descriptors via syslog socket. Ignoring.");
1308 } else if (fd
== s
->native_fd
) {
1309 if (n
> 0 && n_fds
== 0)
1310 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1311 else if (n
== 0 && n_fds
== 1)
1312 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1314 log_warning("Got too many file descriptors via native socket. Ignoring.");
1317 assert(fd
== s
->audit_fd
);
1319 if (n
> 0 && n_fds
== 0)
1320 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1322 log_warning("Got file descriptors via audit socket. Ignoring.");
1325 close_many(fds
, n_fds
);
1329 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1330 Server
*s
= userdata
;
1335 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1337 (void) server_flush_to_var(s
, false);
1339 server_vacuum(s
, false);
1341 r
= touch("/run/systemd/journal/flushed");
1343 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1345 server_space_usage_message(s
, NULL
);
1349 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1350 Server
*s
= userdata
;
1355 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1357 server_vacuum(s
, true);
1359 if (s
->system_journal
)
1360 patch_min_use(&s
->system_storage
);
1361 if (s
->runtime_journal
)
1362 patch_min_use(&s
->runtime_storage
);
1364 /* Let clients know when the most recent rotation happened. */
1365 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1367 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1372 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1373 Server
*s
= userdata
;
1377 log_received_signal(LOG_INFO
, si
);
1379 sd_event_exit(s
->event
, 0);
1383 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1384 Server
*s
= userdata
;
1389 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1393 /* Let clients know when the most recent sync happened. */
1394 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1396 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1401 static int setup_signals(Server
*s
) {
1406 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1408 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1412 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1416 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1420 /* Let's process SIGTERM late, so that we flush all queued
1421 * messages to disk before we exit */
1422 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1426 /* When journald is invoked on the terminal (when debugging),
1427 * it's useful if C-c is handled equivalent to SIGTERM. */
1428 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1432 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1436 /* SIGRTMIN+1 causes an immediate sync. We process this very
1437 * late, so that everything else queued at this point is
1438 * really written to disk. Clients can watch
1439 * /run/systemd/journal/synced with inotify until its mtime
1440 * changes to see when a sync happened. */
1441 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1445 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1452 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1458 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1460 r
= value
? parse_boolean(value
) : true;
1462 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1464 s
->forward_to_syslog
= r
;
1466 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1468 r
= value
? parse_boolean(value
) : true;
1470 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1472 s
->forward_to_kmsg
= r
;
1474 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1476 r
= value
? parse_boolean(value
) : true;
1478 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1480 s
->forward_to_console
= r
;
1482 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1484 r
= value
? parse_boolean(value
) : true;
1486 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1488 s
->forward_to_wall
= r
;
1490 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1492 if (proc_cmdline_value_missing(key
, value
))
1495 r
= log_level_from_string(value
);
1497 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1499 s
->max_level_console
= r
;
1501 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1503 if (proc_cmdline_value_missing(key
, value
))
1506 r
= log_level_from_string(value
);
1508 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1510 s
->max_level_store
= r
;
1512 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1514 if (proc_cmdline_value_missing(key
, value
))
1517 r
= log_level_from_string(value
);
1519 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1521 s
->max_level_syslog
= r
;
1523 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1525 if (proc_cmdline_value_missing(key
, value
))
1528 r
= log_level_from_string(value
);
1530 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1532 s
->max_level_kmsg
= r
;
1534 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1536 if (proc_cmdline_value_missing(key
, value
))
1539 r
= log_level_from_string(value
);
1541 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1543 s
->max_level_wall
= r
;
1545 } else if (startswith(key
, "systemd.journald"))
1546 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1548 /* do not warn about state here, since probably systemd already did */
1552 static int server_parse_config_file(Server
*s
) {
1555 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1556 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1558 config_item_perf_lookup
, journald_gperf_lookup
,
1559 CONFIG_PARSE_WARN
, s
);
1562 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1563 Server
*s
= userdata
;
1571 int server_schedule_sync(Server
*s
, int priority
) {
1576 if (priority
<= LOG_CRIT
) {
1577 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1582 if (s
->sync_scheduled
)
1585 if (s
->sync_interval_usec
> 0) {
1588 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1592 when
+= s
->sync_interval_usec
;
1594 if (!s
->sync_event_source
) {
1595 r
= sd_event_add_time(
1597 &s
->sync_event_source
,
1600 server_dispatch_sync
, s
);
1604 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1606 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1610 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1615 s
->sync_scheduled
= true;
1621 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1622 Server
*s
= userdata
;
1626 server_cache_hostname(s
);
1630 static int server_open_hostname(Server
*s
) {
1635 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1636 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1637 if (s
->hostname_fd
< 0)
1638 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1640 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1642 /* kernels prior to 3.2 don't support polling this file. Ignore
1645 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1646 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1650 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1653 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1655 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1660 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1661 Server
*s
= userdata
;
1665 assert(s
->notify_event_source
== es
);
1666 assert(s
->notify_fd
== fd
);
1668 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1669 * message on it. Either it's the watchdog event, the initial
1670 * READY=1 event or an stdout stream event. If there's nothing
1671 * to write anymore, turn our event source off. The next time
1672 * there's something to send it will be turned on again. */
1674 if (!s
->sent_notify_ready
) {
1675 static const char p
[] =
1677 "STATUS=Processing requests...";
1680 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1682 if (errno
== EAGAIN
)
1685 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1688 s
->sent_notify_ready
= true;
1689 log_debug("Sent READY=1 notification.");
1691 } else if (s
->send_watchdog
) {
1693 static const char p
[] =
1698 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1700 if (errno
== EAGAIN
)
1703 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1706 s
->send_watchdog
= false;
1707 log_debug("Sent WATCHDOG=1 notification.");
1709 } else if (s
->stdout_streams_notify_queue
)
1710 /* Dispatch one stream notification event */
1711 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1713 /* Leave us enabled if there's still more to do. */
1714 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1717 /* There was nothing to do anymore, let's turn ourselves off. */
1718 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1720 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1725 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1726 Server
*s
= userdata
;
1731 s
->send_watchdog
= true;
1733 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1735 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1737 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1739 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1741 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1743 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1748 static int server_connect_notify(Server
*s
) {
1749 union sockaddr_union sa
= {};
1754 assert(s
->notify_fd
< 0);
1755 assert(!s
->notify_event_source
);
1758 So here's the problem: we'd like to send notification
1759 messages to PID 1, but we cannot do that via sd_notify(),
1760 since that's synchronous, and we might end up blocking on
1761 it. Specifically: given that PID 1 might block on
1762 dbus-daemon during IPC, and dbus-daemon is logging to us,
1763 and might hence block on us, we might end up in a deadlock
1764 if we block on sending PID 1 notification messages — by
1765 generating a full blocking circle. To avoid this, let's
1766 create a non-blocking socket, and connect it to the
1767 notification socket, and then wait for POLLOUT before we
1768 send anything. This should efficiently avoid any deadlocks,
1769 as we'll never block on PID 1, hence PID 1 can safely block
1770 on dbus-daemon which can safely block on us again.
1772 Don't think that this issue is real? It is, see:
1773 https://github.com/systemd/systemd/issues/1505
1776 e
= getenv("NOTIFY_SOCKET");
1780 salen
= sockaddr_un_set_path(&sa
.un
, e
);
1782 return log_error_errno(salen
, "NOTIFY_SOCKET set to invalid value '%s': %m", e
);
1784 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1785 if (s
->notify_fd
< 0)
1786 return log_error_errno(errno
, "Failed to create notify socket: %m");
1788 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1790 r
= connect(s
->notify_fd
, &sa
.sa
, salen
);
1792 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1794 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1796 return log_error_errno(r
, "Failed to watch notification socket: %m");
1798 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1799 s
->send_watchdog
= true;
1801 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1803 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1806 /* This should fire pretty soon, which we'll use to send the
1812 int server_init(Server
*s
) {
1813 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1820 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1821 s
->compress
.enabled
= true;
1822 s
->compress
.threshold_bytes
= (uint64_t) -1;
1824 s
->read_kmsg
= true;
1826 s
->watchdog_usec
= USEC_INFINITY
;
1828 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1829 s
->sync_scheduled
= false;
1831 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1832 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1834 s
->forward_to_wall
= true;
1836 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1838 s
->max_level_store
= LOG_DEBUG
;
1839 s
->max_level_syslog
= LOG_DEBUG
;
1840 s
->max_level_kmsg
= LOG_NOTICE
;
1841 s
->max_level_console
= LOG_INFO
;
1842 s
->max_level_wall
= LOG_EMERG
;
1844 s
->line_max
= DEFAULT_LINE_MAX
;
1846 journal_reset_metrics(&s
->system_storage
.metrics
);
1847 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1849 server_parse_config_file(s
);
1851 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1853 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1855 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1856 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1857 s
->rate_limit_interval
, s
->rate_limit_burst
);
1858 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1861 (void) mkdir_p("/run/systemd/journal", 0755);
1863 s
->user_journals
= ordered_hashmap_new(NULL
);
1864 if (!s
->user_journals
)
1867 s
->mmap
= mmap_cache_new();
1871 s
->deferred_closes
= set_new(NULL
);
1872 if (!s
->deferred_closes
)
1875 r
= sd_event_default(&s
->event
);
1877 return log_error_errno(r
, "Failed to create event loop: %m");
1879 n
= sd_listen_fds(true);
1881 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1883 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1885 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1887 if (s
->native_fd
>= 0) {
1888 log_error("Too many native sockets passed.");
1894 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1896 if (s
->stdout_fd
>= 0) {
1897 log_error("Too many stdout sockets passed.");
1903 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1904 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1906 if (s
->syslog_fd
>= 0) {
1907 log_error("Too many /dev/log sockets passed.");
1913 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1915 if (s
->audit_fd
>= 0) {
1916 log_error("Too many audit sockets passed.");
1930 r
= fdset_put(fds
, fd
);
1936 /* Try to restore streams, but don't bother if this fails */
1937 (void) server_restore_streams(s
, fds
);
1939 if (fdset_size(fds
) > 0) {
1940 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1941 fds
= fdset_free(fds
);
1944 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1946 /* always open stdout, syslog, native, and kmsg sockets */
1948 /* systemd-journald.socket: /run/systemd/journal/stdout */
1949 r
= server_open_stdout_socket(s
);
1953 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1954 r
= server_open_syslog_socket(s
);
1958 /* systemd-journald.socket: /run/systemd/journal/socket */
1959 r
= server_open_native_socket(s
);
1964 r
= server_open_dev_kmsg(s
);
1968 /* Unless we got *some* sockets and not audit, open audit socket */
1969 if (s
->audit_fd
>= 0 || no_sockets
) {
1970 r
= server_open_audit(s
);
1975 r
= server_open_kernel_seqnum(s
);
1979 r
= server_open_hostname(s
);
1983 r
= setup_signals(s
);
1987 s
->rate_limit
= journal_rate_limit_new();
1991 r
= cg_get_root_path(&s
->cgroup_root
);
1995 server_cache_hostname(s
);
1996 server_cache_boot_id(s
);
1997 server_cache_machine_id(s
);
1999 s
->runtime_storage
.name
= "Runtime journal";
2000 s
->system_storage
.name
= "System journal";
2002 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
2003 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
2004 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
2007 (void) server_connect_notify(s
);
2009 (void) client_context_acquire_default(s
);
2011 return system_journal_open(s
, false);
2014 void server_maybe_append_tags(Server
*s
) {
2020 n
= now(CLOCK_REALTIME
);
2022 if (s
->system_journal
)
2023 journal_file_maybe_append_tag(s
->system_journal
, n
);
2025 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
2026 journal_file_maybe_append_tag(f
, n
);
2030 void server_done(Server
*s
) {
2033 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
2035 while (s
->stdout_streams
)
2036 stdout_stream_free(s
->stdout_streams
);
2038 client_context_flush_all(s
);
2040 if (s
->system_journal
)
2041 (void) journal_file_close(s
->system_journal
);
2043 if (s
->runtime_journal
)
2044 (void) journal_file_close(s
->runtime_journal
);
2046 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
2048 sd_event_source_unref(s
->syslog_event_source
);
2049 sd_event_source_unref(s
->native_event_source
);
2050 sd_event_source_unref(s
->stdout_event_source
);
2051 sd_event_source_unref(s
->dev_kmsg_event_source
);
2052 sd_event_source_unref(s
->audit_event_source
);
2053 sd_event_source_unref(s
->sync_event_source
);
2054 sd_event_source_unref(s
->sigusr1_event_source
);
2055 sd_event_source_unref(s
->sigusr2_event_source
);
2056 sd_event_source_unref(s
->sigterm_event_source
);
2057 sd_event_source_unref(s
->sigint_event_source
);
2058 sd_event_source_unref(s
->sigrtmin1_event_source
);
2059 sd_event_source_unref(s
->hostname_event_source
);
2060 sd_event_source_unref(s
->notify_event_source
);
2061 sd_event_source_unref(s
->watchdog_event_source
);
2062 sd_event_unref(s
->event
);
2064 safe_close(s
->syslog_fd
);
2065 safe_close(s
->native_fd
);
2066 safe_close(s
->stdout_fd
);
2067 safe_close(s
->dev_kmsg_fd
);
2068 safe_close(s
->audit_fd
);
2069 safe_close(s
->hostname_fd
);
2070 safe_close(s
->notify_fd
);
2073 journal_rate_limit_free(s
->rate_limit
);
2075 if (s
->kernel_seqnum
)
2076 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
2080 free(s
->cgroup_root
);
2081 free(s
->hostname_field
);
2082 free(s
->runtime_storage
.path
);
2083 free(s
->system_storage
.path
);
2086 mmap_cache_unref(s
->mmap
);
2089 static const char* const storage_table
[_STORAGE_MAX
] = {
2090 [STORAGE_AUTO
] = "auto",
2091 [STORAGE_VOLATILE
] = "volatile",
2092 [STORAGE_PERSISTENT
] = "persistent",
2093 [STORAGE_NONE
] = "none"
2096 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
2097 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
2099 static const char* const split_mode_table
[_SPLIT_MAX
] = {
2100 [SPLIT_LOGIN
] = "login",
2101 [SPLIT_UID
] = "uid",
2102 [SPLIT_NONE
] = "none",
2105 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
2106 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
2108 int config_parse_line_max(
2110 const char *filename
,
2112 const char *section
,
2113 unsigned section_line
,
2128 if (isempty(rvalue
))
2129 /* Empty assignment means default */
2130 *sz
= DEFAULT_LINE_MAX
;
2134 r
= parse_size(rvalue
, 1024, &v
);
2136 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2141 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2142 * terminal size is 80ch, and it might make sense to break one character before the natural
2143 * line break would occur on that. */
2144 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2146 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2147 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2148 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2149 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2150 * fail much earlier anyway. */
2151 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2160 int config_parse_compress(const char* unit
,
2161 const char *filename
,
2163 const char *section
,
2164 unsigned section_line
,
2170 JournalCompressOptions
* compress
= data
;
2173 if (streq(rvalue
, "1")) {
2174 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2175 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2176 compress
->enabled
= true;
2177 } else if (streq(rvalue
, "0")) {
2178 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2179 "Compress= ambiguously specified as 0, disabling compression");
2180 compress
->enabled
= false;
2181 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2182 compress
->enabled
= r
;
2183 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2184 compress
->enabled
= true;
2185 else if (isempty(rvalue
)) {
2186 compress
->enabled
= true;
2187 compress
->threshold_bytes
= (uint64_t) -1;
2189 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);