1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
9 #include <selinux/selinux.h>
11 #include <sys/ioctl.h>
13 #include <sys/signalfd.h>
14 #include <sys/statvfs.h>
15 #include <linux/sockios.h>
18 #include "sd-daemon.h"
19 #include "sd-journal.h"
20 #include "sd-messages.h"
23 #include "alloc-util.h"
24 #include "audit-util.h"
25 #include "cgroup-util.h"
26 #include "conf-parser.h"
27 #include "dirent-util.h"
28 #include "extract-word.h"
31 #include "format-util.h"
34 #include "hostname-util.h"
35 #include "id128-util.h"
37 #include "journal-authenticate.h"
38 #include "journal-file.h"
39 #include "journal-internal.h"
40 #include "journal-vacuum.h"
41 #include "journald-audit.h"
42 #include "journald-context.h"
43 #include "journald-kmsg.h"
44 #include "journald-native.h"
45 #include "journald-rate-limit.h"
46 #include "journald-server.h"
47 #include "journald-stream.h"
48 #include "journald-syslog.h"
52 #include "parse-util.h"
53 #include "proc-cmdline.h"
54 #include "process-util.h"
56 #include "selinux-util.h"
57 #include "signal-util.h"
58 #include "socket-util.h"
59 #include "stdio-util.h"
60 #include "string-table.h"
61 #include "string-util.h"
62 #include "syslog-util.h"
63 #include "user-util.h"
65 #define USER_JOURNALS_MAX 1024
67 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
68 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
69 #define DEFAULT_RATE_LIMIT_BURST 10000
70 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
72 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
74 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
76 /* The period to insert between posting changes for coalescing */
77 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
79 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
80 * for a bit of additional metadata. */
81 #define DEFAULT_LINE_MAX (48*1024)
83 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
84 _cleanup_closedir_
DIR *d
= NULL
;
93 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
94 errno
, "Failed to open %s: %m", path
);
96 if (fstatvfs(dirfd(d
), &ss
) < 0)
97 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
99 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
101 FOREACH_DIRENT_ALL(de
, d
, break) {
104 if (!endswith(de
->d_name
, ".journal") &&
105 !endswith(de
->d_name
, ".journal~"))
108 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
109 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
113 if (!S_ISREG(st
.st_mode
))
116 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
122 static void cache_space_invalidate(JournalStorageSpace
*space
) {
126 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
127 JournalStorageSpace
*space
;
128 JournalMetrics
*metrics
;
129 uint64_t vfs_used
, vfs_avail
, avail
;
135 metrics
= &storage
->metrics
;
136 space
= &storage
->space
;
138 ts
= now(CLOCK_MONOTONIC
);
140 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
143 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
147 space
->vfs_used
= vfs_used
;
148 space
->vfs_available
= vfs_avail
;
150 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
152 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
153 space
->available
= LESS_BY(space
->limit
, vfs_used
);
154 space
->timestamp
= ts
;
158 static void patch_min_use(JournalStorage
*storage
) {
161 /* Let's bump the min_use limit to the current usage on disk. We do
162 * this when starting up and first opening the journal files. This way
163 * sudden spikes in disk usage will not cause journald to vacuum files
164 * without bounds. Note that this means that only a restart of journald
165 * will make it reset this value. */
167 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
170 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
176 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
178 r
= cache_space_refresh(s
, js
);
181 *available
= js
->space
.available
;
183 *limit
= js
->space
.limit
;
188 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
189 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
190 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
191 JournalMetrics
*metrics
;
196 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
198 if (cache_space_refresh(s
, storage
) < 0)
201 metrics
= &storage
->metrics
;
202 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
203 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
204 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
205 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
206 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
207 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
209 server_driver_message(s
, 0,
210 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
211 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
212 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
213 "JOURNAL_NAME=%s", storage
->name
,
214 "JOURNAL_PATH=%s", storage
->path
,
215 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
216 "CURRENT_USE_PRETTY=%s", fb1
,
217 "MAX_USE=%"PRIu64
, metrics
->max_use
,
218 "MAX_USE_PRETTY=%s", fb2
,
219 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
220 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
221 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
222 "DISK_AVAILABLE_PRETTY=%s", fb4
,
223 "LIMIT=%"PRIu64
, storage
->space
.limit
,
224 "LIMIT_PRETTY=%s", fb5
,
225 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
226 "AVAILABLE_PRETTY=%s", fb6
,
230 static bool uid_for_system_journal(uid_t uid
) {
232 /* Returns true if the specified UID shall get its data stored in the system journal*/
234 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
237 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
244 if (uid_for_system_journal(uid
))
247 r
= add_acls_for_user(f
->fd
, uid
);
249 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
253 static int open_journal(
259 JournalMetrics
*metrics
,
269 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
270 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
272 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
273 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
278 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
280 (void) journal_file_close(f
);
288 static bool flushed_flag_is_set(void) {
289 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
292 static int system_journal_open(Server
*s
, bool flush_requested
) {
296 if (!s
->system_journal
&&
297 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
298 (flush_requested
|| flushed_flag_is_set())) {
300 /* If in auto mode: first try to create the machine
301 * path, but not the prefix.
303 * If in persistent mode: create /var/log/journal and
304 * the machine path */
306 if (s
->storage
== STORAGE_PERSISTENT
)
307 (void) mkdir_p("/var/log/journal/", 0755);
309 (void) mkdir(s
->system_storage
.path
, 0755);
311 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
312 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
314 server_add_acls(s
->system_journal
, 0);
315 (void) cache_space_refresh(s
, &s
->system_storage
);
316 patch_min_use(&s
->system_storage
);
318 if (!IN_SET(r
, -ENOENT
, -EROFS
))
319 log_warning_errno(r
, "Failed to open system journal: %m");
324 /* If the runtime journal is open, and we're post-flush, we're
325 * recovering from a failed system journal rotate (ENOSPC)
326 * for which the runtime journal was reopened.
328 * Perform an implicit flush to var, leaving the runtime
329 * journal closed, now that the system journal is back.
331 if (!flush_requested
)
332 (void) server_flush_to_var(s
, true);
335 if (!s
->runtime_journal
&&
336 (s
->storage
!= STORAGE_NONE
)) {
338 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
340 if (s
->system_journal
) {
342 /* Try to open the runtime journal, but only
343 * if it already exists, so that we can flush
344 * it into the system journal */
346 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
349 log_warning_errno(r
, "Failed to open runtime journal: %m");
356 /* OK, we really need the runtime journal, so create
357 * it if necessary. */
359 (void) mkdir("/run/log", 0755);
360 (void) mkdir("/run/log/journal", 0755);
361 (void) mkdir_parents(fn
, 0750);
363 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
365 return log_error_errno(r
, "Failed to open runtime journal: %m");
368 if (s
->runtime_journal
) {
369 server_add_acls(s
->runtime_journal
, 0);
370 (void) cache_space_refresh(s
, &s
->runtime_storage
);
371 patch_min_use(&s
->runtime_storage
);
378 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
379 _cleanup_free_
char *p
= NULL
;
386 /* A rotate that fails to create the new journal (ENOSPC) leaves the
387 * rotated journal as NULL. Unless we revisit opening, even after
388 * space is made available we'll continue to return NULL indefinitely.
390 * system_journal_open() is a noop if the journals are already open, so
391 * we can just call it here to recover from failed rotates (or anything
392 * else that's left the journals as NULL).
394 * Fixes https://github.com/systemd/systemd/issues/3968 */
395 (void) system_journal_open(s
, false);
397 /* We split up user logs only on /var, not on /run. If the
398 * runtime file is open, we write to it exclusively, in order
399 * to guarantee proper order as soon as we flush /run to
400 * /var and close the runtime file. */
402 if (s
->runtime_journal
)
403 return s
->runtime_journal
;
405 if (uid_for_system_journal(uid
))
406 return s
->system_journal
;
408 r
= sd_id128_get_machine(&machine
);
410 return s
->system_journal
;
412 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
416 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
417 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
418 return s
->system_journal
;
420 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
421 /* Too many open? Then let's close one */
422 f
= ordered_hashmap_steal_first(s
->user_journals
);
424 (void) journal_file_close(f
);
427 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
429 return s
->system_journal
;
431 server_add_acls(f
, uid
);
433 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
435 (void) journal_file_close(f
);
436 return s
->system_journal
;
442 static int do_rotate(
455 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
458 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
460 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
463 server_add_acls(*f
, uid
);
468 void server_rotate(Server
*s
) {
474 log_debug("Rotating...");
476 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
477 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
479 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
480 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
482 ordered_hashmap_replace(s
->user_journals
, k
, f
);
484 /* Old file has been closed and deallocated */
485 ordered_hashmap_remove(s
->user_journals
, k
);
488 /* Perform any deferred closes which aren't still offlining. */
489 SET_FOREACH(f
, s
->deferred_closes
, i
)
490 if (!journal_file_is_offlining(f
)) {
491 (void) set_remove(s
->deferred_closes
, f
);
492 (void) journal_file_close(f
);
496 void server_sync(Server
*s
) {
501 if (s
->system_journal
) {
502 r
= journal_file_set_offline(s
->system_journal
, false);
504 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
507 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
508 r
= journal_file_set_offline(f
, false);
510 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
513 if (s
->sync_event_source
) {
514 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
516 log_error_errno(r
, "Failed to disable sync timer source: %m");
519 s
->sync_scheduled
= false;
522 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
529 (void) cache_space_refresh(s
, storage
);
532 server_space_usage_message(s
, storage
);
534 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
535 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
536 &s
->oldest_file_usec
, verbose
);
537 if (r
< 0 && r
!= -ENOENT
)
538 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
540 cache_space_invalidate(&storage
->space
);
543 int server_vacuum(Server
*s
, bool verbose
) {
546 log_debug("Vacuuming...");
548 s
->oldest_file_usec
= 0;
550 if (s
->system_journal
)
551 do_vacuum(s
, &s
->system_storage
, verbose
);
552 if (s
->runtime_journal
)
553 do_vacuum(s
, &s
->runtime_storage
, verbose
);
558 static void server_cache_machine_id(Server
*s
) {
564 r
= sd_id128_get_machine(&id
);
568 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
571 static void server_cache_boot_id(Server
*s
) {
577 r
= sd_id128_get_boot(&id
);
581 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
584 static void server_cache_hostname(Server
*s
) {
585 _cleanup_free_
char *t
= NULL
;
590 t
= gethostname_malloc();
594 x
= strappend("_HOSTNAME=", t
);
598 free(s
->hostname_field
);
599 s
->hostname_field
= x
;
602 static bool shall_try_append_again(JournalFile
*f
, int r
) {
605 case -E2BIG
: /* Hit configured limit */
606 case -EFBIG
: /* Hit fs limit */
607 case -EDQUOT
: /* Quota limit hit */
608 case -ENOSPC
: /* Disk full */
609 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
612 case -EIO
: /* I/O error of some kind (mmap) */
613 log_warning("%s: IO error, rotating.", f
->path
);
616 case -EHOSTDOWN
: /* Other machine */
617 log_info("%s: Journal file from other machine, rotating.", f
->path
);
620 case -EBUSY
: /* Unclean shutdown */
621 log_info("%s: Unclean shutdown, rotating.", f
->path
);
624 case -EPROTONOSUPPORT
: /* Unsupported feature */
625 log_info("%s: Unsupported feature, rotating.", f
->path
);
628 case -EBADMSG
: /* Corrupted */
629 case -ENODATA
: /* Truncated */
630 case -ESHUTDOWN
: /* Already archived */
631 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
634 case -EIDRM
: /* Journal file has been deleted */
635 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
638 case -ETXTBSY
: /* Journal file is from the future */
639 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
647 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
648 bool vacuumed
= false, rotate
= false;
649 struct dual_timestamp ts
;
657 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
658 * the source time, and not even the time the event was originally seen, but instead simply the time we started
659 * processing it, as we want strictly linear ordering in what we write out.) */
660 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
661 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
663 if (ts
.realtime
< s
->last_realtime_clock
) {
664 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
665 * regular operation. However, when it does happen, then we should make sure that we start fresh files
666 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
667 * bisection works correctly. */
669 log_debug("Time jumped backwards, rotating.");
673 f
= find_journal(s
, uid
);
677 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
678 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
685 server_vacuum(s
, false);
688 f
= find_journal(s
, uid
);
693 s
->last_realtime_clock
= ts
.realtime
;
695 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
697 server_schedule_sync(s
, priority
);
701 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
702 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
707 server_vacuum(s
, false);
709 f
= find_journal(s
, uid
);
713 log_debug("Retrying write.");
714 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
716 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
718 server_schedule_sync(s
, priority
);
721 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
722 if (isset(value)) { \
724 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
725 sprintf(k, field "=" format, value); \
726 iovec[n++] = IOVEC_MAKE_STRING(k); \
729 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
730 if (!isempty(value)) { \
732 k = strjoina(field "=", value); \
733 iovec[n++] = IOVEC_MAKE_STRING(k); \
736 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
737 if (!sd_id128_is_null(value)) { \
739 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
740 sd_id128_to_string(value, stpcpy(k, field "=")); \
741 iovec[n++] = IOVEC_MAKE_STRING(k); \
744 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
745 if (value_size > 0) { \
747 k = newa(char, STRLEN(field "=") + value_size + 1); \
748 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
749 iovec[n++] = IOVEC_MAKE_STRING(k); \
752 static void dispatch_message_real(
754 struct iovec
*iovec
, size_t n
, size_t m
,
755 const ClientContext
*c
,
756 const struct timeval
*tv
,
760 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
768 N_IOVEC_META_FIELDS
+
769 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
770 client_context_extra_fields_n_iovec(c
) <= m
);
773 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
774 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
775 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
777 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
778 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
779 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
780 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
782 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
784 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
785 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
787 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
789 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
792 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
793 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
795 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
797 if (c
->extra_fields_n_iovec
> 0) {
798 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
799 n
+= c
->extra_fields_n_iovec
;
805 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
807 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
808 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
809 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
811 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
812 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
813 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
814 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
816 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
818 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
819 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
821 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
822 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
823 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
824 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
825 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
826 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
827 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
829 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
835 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
836 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
839 /* Note that strictly speaking storing the boot id here is
840 * redundant since the entry includes this in-line
841 * anyway. However, we need this indexed, too. */
842 if (!isempty(s
->boot_id_field
))
843 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
845 if (!isempty(s
->machine_id_field
))
846 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
848 if (!isempty(s
->hostname_field
))
849 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
853 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
854 /* Split up strictly by (non-root) UID */
855 journal_uid
= c
->uid
;
856 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
857 /* Split up by login UIDs. We do this only if the
858 * realuid is not root, in order not to accidentally
859 * leak privileged information to the user that is
860 * logged by a privileged process that is part of an
861 * unprivileged session. */
862 journal_uid
= c
->owner_uid
;
866 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
869 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
879 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
880 iovec
= newa(struct iovec
, m
);
882 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
883 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
884 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
886 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
887 assert_cc(6 == LOG_INFO
);
888 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
891 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
894 va_start(ap
, format
);
895 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
896 /* Error handling below */
900 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
903 free(iovec
[k
++].iov_base
);
906 /* We failed to format the message. Emit a warning instead. */
909 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
912 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
913 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
914 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
918 void server_dispatch_message(
920 struct iovec
*iovec
, size_t n
, size_t m
,
922 const struct timeval
*tv
,
926 uint64_t available
= 0;
930 assert(iovec
|| n
== 0);
935 if (LOG_PRI(priority
) > s
->max_level_store
)
938 /* Stop early in case the information will not be stored
940 if (s
->storage
== STORAGE_NONE
)
944 (void) determine_space(s
, &available
, NULL
);
946 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
950 /* Write a suppression message if we suppressed something */
952 server_driver_message(s
, c
->pid
,
953 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
954 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
955 "N_DROPPED=%i", rl
- 1,
959 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
962 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
964 sd_journal
*j
= NULL
;
965 char ts
[FORMAT_TIMESPAN_MAX
];
972 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
975 if (!s
->runtime_journal
)
978 if (require_flag_file
&& !flushed_flag_is_set())
981 (void) system_journal_open(s
, true);
983 if (!s
->system_journal
)
986 log_debug("Flushing to /var...");
988 start
= now(CLOCK_MONOTONIC
);
990 r
= sd_id128_get_machine(&machine
);
994 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
996 return log_error_errno(r
, "Failed to read runtime journal: %m");
998 sd_journal_set_data_threshold(j
, 0);
1000 SD_JOURNAL_FOREACH(j
) {
1004 f
= j
->current_file
;
1005 assert(f
&& f
->current_offset
> 0);
1009 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1011 log_error_errno(r
, "Can't read entry: %m");
1015 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1019 if (!shall_try_append_again(s
->system_journal
, r
)) {
1020 log_error_errno(r
, "Can't write entry: %m");
1025 server_vacuum(s
, false);
1027 if (!s
->system_journal
) {
1028 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1033 log_debug("Retrying write.");
1034 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1036 log_error_errno(r
, "Can't write entry: %m");
1044 journal_file_post_change(s
->system_journal
);
1046 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1049 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1051 sd_journal_close(j
);
1053 server_driver_message(s
, 0, NULL
,
1054 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1055 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1062 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1063 Server
*s
= userdata
;
1064 struct ucred
*ucred
= NULL
;
1065 struct timeval
*tv
= NULL
;
1066 struct cmsghdr
*cmsg
;
1068 size_t label_len
= 0, m
;
1071 int *fds
= NULL
, v
= 0;
1075 struct cmsghdr cmsghdr
;
1077 /* We use NAME_MAX space for the SELinux label
1078 * here. The kernel currently enforces no
1079 * limit, but according to suggestions from
1080 * the SELinux people this will change and it
1081 * will probably be identical to NAME_MAX. For
1082 * now we use that, but this should be updated
1083 * one day when the final limit is known. */
1084 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1085 CMSG_SPACE(sizeof(struct timeval
)) +
1086 CMSG_SPACE(sizeof(int)) + /* fd */
1087 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1090 union sockaddr_union sa
= {};
1092 struct msghdr msghdr
= {
1095 .msg_control
= &control
,
1096 .msg_controllen
= sizeof(control
),
1098 .msg_namelen
= sizeof(sa
),
1102 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1104 if (revents
!= EPOLLIN
) {
1105 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1109 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1111 (void) ioctl(fd
, SIOCINQ
, &v
);
1113 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1114 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1116 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1118 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1121 iovec
.iov_base
= s
->buffer
;
1122 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1124 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1126 if (IN_SET(errno
, EINTR
, EAGAIN
))
1129 return log_error_errno(errno
, "recvmsg() failed: %m");
1132 CMSG_FOREACH(cmsg
, &msghdr
) {
1134 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1135 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1136 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1137 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1138 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1139 cmsg
->cmsg_type
== SCM_SECURITY
) {
1140 label
= (char*) CMSG_DATA(cmsg
);
1141 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1142 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1143 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1144 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1145 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1146 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1147 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1148 fds
= (int*) CMSG_DATA(cmsg
);
1149 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1153 /* And a trailing NUL, just in case */
1156 if (fd
== s
->syslog_fd
) {
1157 if (n
> 0 && n_fds
== 0)
1158 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1160 log_warning("Got file descriptors via syslog socket. Ignoring.");
1162 } else if (fd
== s
->native_fd
) {
1163 if (n
> 0 && n_fds
== 0)
1164 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1165 else if (n
== 0 && n_fds
== 1)
1166 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1168 log_warning("Got too many file descriptors via native socket. Ignoring.");
1171 assert(fd
== s
->audit_fd
);
1173 if (n
> 0 && n_fds
== 0)
1174 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1176 log_warning("Got file descriptors via audit socket. Ignoring.");
1179 close_many(fds
, n_fds
);
1183 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1184 Server
*s
= userdata
;
1189 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1191 (void) server_flush_to_var(s
, false);
1193 server_vacuum(s
, false);
1195 r
= touch("/run/systemd/journal/flushed");
1197 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1199 server_space_usage_message(s
, NULL
);
1203 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1204 Server
*s
= userdata
;
1209 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1211 server_vacuum(s
, true);
1213 if (s
->system_journal
)
1214 patch_min_use(&s
->system_storage
);
1215 if (s
->runtime_journal
)
1216 patch_min_use(&s
->runtime_storage
);
1218 /* Let clients know when the most recent rotation happened. */
1219 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1221 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1226 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1227 Server
*s
= userdata
;
1231 log_received_signal(LOG_INFO
, si
);
1233 sd_event_exit(s
->event
, 0);
1237 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1238 Server
*s
= userdata
;
1243 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1247 /* Let clients know when the most recent sync happened. */
1248 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1250 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1255 static int setup_signals(Server
*s
) {
1260 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1262 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1266 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1270 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1274 /* Let's process SIGTERM late, so that we flush all queued
1275 * messages to disk before we exit */
1276 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1280 /* When journald is invoked on the terminal (when debugging),
1281 * it's useful if C-c is handled equivalent to SIGTERM. */
1282 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1286 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1290 /* SIGRTMIN+1 causes an immediate sync. We process this very
1291 * late, so that everything else queued at this point is
1292 * really written to disk. Clients can watch
1293 * /run/systemd/journal/synced with inotify until its mtime
1294 * changes to see when a sync happened. */
1295 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1299 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1306 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1312 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1314 r
= value
? parse_boolean(value
) : true;
1316 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1318 s
->forward_to_syslog
= r
;
1320 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1322 r
= value
? parse_boolean(value
) : true;
1324 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1326 s
->forward_to_kmsg
= r
;
1328 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1330 r
= value
? parse_boolean(value
) : true;
1332 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1334 s
->forward_to_console
= r
;
1336 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1338 r
= value
? parse_boolean(value
) : true;
1340 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1342 s
->forward_to_wall
= r
;
1344 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1346 if (proc_cmdline_value_missing(key
, value
))
1349 r
= log_level_from_string(value
);
1351 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1353 s
->max_level_console
= r
;
1355 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1357 if (proc_cmdline_value_missing(key
, value
))
1360 r
= log_level_from_string(value
);
1362 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1364 s
->max_level_store
= r
;
1366 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1368 if (proc_cmdline_value_missing(key
, value
))
1371 r
= log_level_from_string(value
);
1373 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1375 s
->max_level_syslog
= r
;
1377 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1379 if (proc_cmdline_value_missing(key
, value
))
1382 r
= log_level_from_string(value
);
1384 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1386 s
->max_level_kmsg
= r
;
1388 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1390 if (proc_cmdline_value_missing(key
, value
))
1393 r
= log_level_from_string(value
);
1395 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1397 s
->max_level_wall
= r
;
1399 } else if (startswith(key
, "systemd.journald"))
1400 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1402 /* do not warn about state here, since probably systemd already did */
1406 static int server_parse_config_file(Server
*s
) {
1409 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1410 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1412 config_item_perf_lookup
, journald_gperf_lookup
,
1413 CONFIG_PARSE_WARN
, s
);
1416 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1417 Server
*s
= userdata
;
1425 int server_schedule_sync(Server
*s
, int priority
) {
1430 if (priority
<= LOG_CRIT
) {
1431 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1436 if (s
->sync_scheduled
)
1439 if (s
->sync_interval_usec
> 0) {
1442 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1446 when
+= s
->sync_interval_usec
;
1448 if (!s
->sync_event_source
) {
1449 r
= sd_event_add_time(
1451 &s
->sync_event_source
,
1454 server_dispatch_sync
, s
);
1458 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1460 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1464 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1469 s
->sync_scheduled
= true;
1475 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1476 Server
*s
= userdata
;
1480 server_cache_hostname(s
);
1484 static int server_open_hostname(Server
*s
) {
1489 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1490 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1491 if (s
->hostname_fd
< 0)
1492 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1494 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1496 /* kernels prior to 3.2 don't support polling this file. Ignore
1499 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1500 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1504 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1507 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1509 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1514 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1515 Server
*s
= userdata
;
1519 assert(s
->notify_event_source
== es
);
1520 assert(s
->notify_fd
== fd
);
1522 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1523 * message on it. Either it's the watchdog event, the initial
1524 * READY=1 event or an stdout stream event. If there's nothing
1525 * to write anymore, turn our event source off. The next time
1526 * there's something to send it will be turned on again. */
1528 if (!s
->sent_notify_ready
) {
1529 static const char p
[] =
1531 "STATUS=Processing requests...";
1534 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1536 if (errno
== EAGAIN
)
1539 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1542 s
->sent_notify_ready
= true;
1543 log_debug("Sent READY=1 notification.");
1545 } else if (s
->send_watchdog
) {
1547 static const char p
[] =
1552 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1554 if (errno
== EAGAIN
)
1557 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1560 s
->send_watchdog
= false;
1561 log_debug("Sent WATCHDOG=1 notification.");
1563 } else if (s
->stdout_streams_notify_queue
)
1564 /* Dispatch one stream notification event */
1565 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1567 /* Leave us enabled if there's still more to do. */
1568 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1571 /* There was nothing to do anymore, let's turn ourselves off. */
1572 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1574 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1579 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1580 Server
*s
= userdata
;
1585 s
->send_watchdog
= true;
1587 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1589 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1591 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1593 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1595 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1597 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1602 static int server_connect_notify(Server
*s
) {
1603 union sockaddr_union sa
= {
1604 .un
.sun_family
= AF_UNIX
,
1610 assert(s
->notify_fd
< 0);
1611 assert(!s
->notify_event_source
);
1614 So here's the problem: we'd like to send notification
1615 messages to PID 1, but we cannot do that via sd_notify(),
1616 since that's synchronous, and we might end up blocking on
1617 it. Specifically: given that PID 1 might block on
1618 dbus-daemon during IPC, and dbus-daemon is logging to us,
1619 and might hence block on us, we might end up in a deadlock
1620 if we block on sending PID 1 notification messages — by
1621 generating a full blocking circle. To avoid this, let's
1622 create a non-blocking socket, and connect it to the
1623 notification socket, and then wait for POLLOUT before we
1624 send anything. This should efficiently avoid any deadlocks,
1625 as we'll never block on PID 1, hence PID 1 can safely block
1626 on dbus-daemon which can safely block on us again.
1628 Don't think that this issue is real? It is, see:
1629 https://github.com/systemd/systemd/issues/1505
1632 e
= getenv("NOTIFY_SOCKET");
1636 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1637 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1641 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1642 log_error("NOTIFY_SOCKET path too long: %s", e
);
1646 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1647 if (s
->notify_fd
< 0)
1648 return log_error_errno(errno
, "Failed to create notify socket: %m");
1650 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1652 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1653 if (sa
.un
.sun_path
[0] == '@')
1654 sa
.un
.sun_path
[0] = 0;
1656 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1658 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1660 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1662 return log_error_errno(r
, "Failed to watch notification socket: %m");
1664 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1665 s
->send_watchdog
= true;
1667 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1669 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1672 /* This should fire pretty soon, which we'll use to send the
1678 int server_init(Server
*s
) {
1679 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1686 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1687 s
->compress
.enabled
= true;
1688 s
->compress
.threshold_bytes
= (uint64_t) -1;
1690 s
->read_kmsg
= true;
1692 s
->watchdog_usec
= USEC_INFINITY
;
1694 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1695 s
->sync_scheduled
= false;
1697 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1698 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1700 s
->forward_to_wall
= true;
1702 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1704 s
->max_level_store
= LOG_DEBUG
;
1705 s
->max_level_syslog
= LOG_DEBUG
;
1706 s
->max_level_kmsg
= LOG_NOTICE
;
1707 s
->max_level_console
= LOG_INFO
;
1708 s
->max_level_wall
= LOG_EMERG
;
1710 s
->line_max
= DEFAULT_LINE_MAX
;
1712 journal_reset_metrics(&s
->system_storage
.metrics
);
1713 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1715 server_parse_config_file(s
);
1717 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1719 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1721 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1722 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1723 s
->rate_limit_interval
, s
->rate_limit_burst
);
1724 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1727 (void) mkdir_p("/run/systemd/journal", 0755);
1729 s
->user_journals
= ordered_hashmap_new(NULL
);
1730 if (!s
->user_journals
)
1733 s
->mmap
= mmap_cache_new();
1737 s
->deferred_closes
= set_new(NULL
);
1738 if (!s
->deferred_closes
)
1741 r
= sd_event_default(&s
->event
);
1743 return log_error_errno(r
, "Failed to create event loop: %m");
1745 n
= sd_listen_fds(true);
1747 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1749 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1751 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1753 if (s
->native_fd
>= 0) {
1754 log_error("Too many native sockets passed.");
1760 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1762 if (s
->stdout_fd
>= 0) {
1763 log_error("Too many stdout sockets passed.");
1769 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1770 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1772 if (s
->syslog_fd
>= 0) {
1773 log_error("Too many /dev/log sockets passed.");
1779 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1781 if (s
->audit_fd
>= 0) {
1782 log_error("Too many audit sockets passed.");
1796 r
= fdset_put(fds
, fd
);
1802 /* Try to restore streams, but don't bother if this fails */
1803 (void) server_restore_streams(s
, fds
);
1805 if (fdset_size(fds
) > 0) {
1806 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1807 fds
= fdset_free(fds
);
1810 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1812 /* always open stdout, syslog, native, and kmsg sockets */
1814 /* systemd-journald.socket: /run/systemd/journal/stdout */
1815 r
= server_open_stdout_socket(s
);
1819 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1820 r
= server_open_syslog_socket(s
);
1824 /* systemd-journald.socket: /run/systemd/journal/socket */
1825 r
= server_open_native_socket(s
);
1830 r
= server_open_dev_kmsg(s
);
1834 /* Unless we got *some* sockets and not audit, open audit socket */
1835 if (s
->audit_fd
>= 0 || no_sockets
) {
1836 r
= server_open_audit(s
);
1841 r
= server_open_kernel_seqnum(s
);
1845 r
= server_open_hostname(s
);
1849 r
= setup_signals(s
);
1853 s
->udev
= udev_new();
1857 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1861 r
= cg_get_root_path(&s
->cgroup_root
);
1865 server_cache_hostname(s
);
1866 server_cache_boot_id(s
);
1867 server_cache_machine_id(s
);
1869 s
->runtime_storage
.name
= "Runtime journal";
1870 s
->system_storage
.name
= "System journal";
1872 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1873 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1874 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1877 (void) server_connect_notify(s
);
1879 (void) client_context_acquire_default(s
);
1881 return system_journal_open(s
, false);
1884 void server_maybe_append_tags(Server
*s
) {
1890 n
= now(CLOCK_REALTIME
);
1892 if (s
->system_journal
)
1893 journal_file_maybe_append_tag(s
->system_journal
, n
);
1895 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1896 journal_file_maybe_append_tag(f
, n
);
1900 void server_done(Server
*s
) {
1903 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1905 while (s
->stdout_streams
)
1906 stdout_stream_free(s
->stdout_streams
);
1908 client_context_flush_all(s
);
1910 if (s
->system_journal
)
1911 (void) journal_file_close(s
->system_journal
);
1913 if (s
->runtime_journal
)
1914 (void) journal_file_close(s
->runtime_journal
);
1916 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1918 sd_event_source_unref(s
->syslog_event_source
);
1919 sd_event_source_unref(s
->native_event_source
);
1920 sd_event_source_unref(s
->stdout_event_source
);
1921 sd_event_source_unref(s
->dev_kmsg_event_source
);
1922 sd_event_source_unref(s
->audit_event_source
);
1923 sd_event_source_unref(s
->sync_event_source
);
1924 sd_event_source_unref(s
->sigusr1_event_source
);
1925 sd_event_source_unref(s
->sigusr2_event_source
);
1926 sd_event_source_unref(s
->sigterm_event_source
);
1927 sd_event_source_unref(s
->sigint_event_source
);
1928 sd_event_source_unref(s
->sigrtmin1_event_source
);
1929 sd_event_source_unref(s
->hostname_event_source
);
1930 sd_event_source_unref(s
->notify_event_source
);
1931 sd_event_source_unref(s
->watchdog_event_source
);
1932 sd_event_unref(s
->event
);
1934 safe_close(s
->syslog_fd
);
1935 safe_close(s
->native_fd
);
1936 safe_close(s
->stdout_fd
);
1937 safe_close(s
->dev_kmsg_fd
);
1938 safe_close(s
->audit_fd
);
1939 safe_close(s
->hostname_fd
);
1940 safe_close(s
->notify_fd
);
1943 journal_rate_limit_free(s
->rate_limit
);
1945 if (s
->kernel_seqnum
)
1946 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1950 free(s
->cgroup_root
);
1951 free(s
->hostname_field
);
1952 free(s
->runtime_storage
.path
);
1953 free(s
->system_storage
.path
);
1956 mmap_cache_unref(s
->mmap
);
1958 udev_unref(s
->udev
);
1961 static const char* const storage_table
[_STORAGE_MAX
] = {
1962 [STORAGE_AUTO
] = "auto",
1963 [STORAGE_VOLATILE
] = "volatile",
1964 [STORAGE_PERSISTENT
] = "persistent",
1965 [STORAGE_NONE
] = "none"
1968 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1969 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1971 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1972 [SPLIT_LOGIN
] = "login",
1973 [SPLIT_UID
] = "uid",
1974 [SPLIT_NONE
] = "none",
1977 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1978 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1980 int config_parse_line_max(
1982 const char *filename
,
1984 const char *section
,
1985 unsigned section_line
,
2000 if (isempty(rvalue
))
2001 /* Empty assignment means default */
2002 *sz
= DEFAULT_LINE_MAX
;
2006 r
= parse_size(rvalue
, 1024, &v
);
2008 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2013 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2014 * terminal size is 80ch, and it might make sense to break one character before the natural
2015 * line break would occur on that. */
2016 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2018 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2019 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2020 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2021 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2022 * fail much earlier anyway. */
2023 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2032 int config_parse_compress(const char* unit
,
2033 const char *filename
,
2035 const char *section
,
2036 unsigned section_line
,
2042 JournalCompressOptions
* compress
= data
;
2045 if (streq(rvalue
, "1")) {
2046 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2047 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2048 compress
->enabled
= true;
2049 } else if (streq(rvalue
, "0")) {
2050 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2051 "Compress= ambiguously specified as 0, disabling compression");
2052 compress
->enabled
= false;
2053 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2054 compress
->enabled
= r
;
2055 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2056 compress
->enabled
= true;
2057 else if (isempty(rvalue
)) {
2058 compress
->enabled
= true;
2059 compress
->threshold_bytes
= (uint64_t) -1;
2061 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);