1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 Copyright 2011 Lennart Poettering
7 #include <selinux/selinux.h>
11 #include <sys/signalfd.h>
12 #include <sys/statvfs.h>
13 #include <linux/sockios.h>
16 #include "sd-daemon.h"
17 #include "sd-journal.h"
18 #include "sd-messages.h"
21 #include "alloc-util.h"
22 #include "audit-util.h"
23 #include "cgroup-util.h"
24 #include "conf-parser.h"
25 #include "dirent-util.h"
26 #include "extract-word.h"
29 #include "format-util.h"
32 #include "hostname-util.h"
33 #include "id128-util.h"
35 #include "journal-authenticate.h"
36 #include "journal-file.h"
37 #include "journal-internal.h"
38 #include "journal-vacuum.h"
39 #include "journald-audit.h"
40 #include "journald-context.h"
41 #include "journald-kmsg.h"
42 #include "journald-native.h"
43 #include "journald-rate-limit.h"
44 #include "journald-server.h"
45 #include "journald-stream.h"
46 #include "journald-syslog.h"
50 #include "parse-util.h"
51 #include "proc-cmdline.h"
52 #include "process-util.h"
54 #include "selinux-util.h"
55 #include "signal-util.h"
56 #include "socket-util.h"
57 #include "stdio-util.h"
58 #include "string-table.h"
59 #include "string-util.h"
60 #include "syslog-util.h"
61 #include "user-util.h"
63 #define USER_JOURNALS_MAX 1024
65 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
66 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
67 #define DEFAULT_RATE_LIMIT_BURST 10000
68 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
70 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
72 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
74 /* The period to insert between posting changes for coalescing */
75 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
77 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
78 * for a bit of additional metadata. */
79 #define DEFAULT_LINE_MAX (48*1024)
81 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
82 _cleanup_closedir_
DIR *d
= NULL
;
91 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
92 errno
, "Failed to open %s: %m", path
);
94 if (fstatvfs(dirfd(d
), &ss
) < 0)
95 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
97 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
99 FOREACH_DIRENT_ALL(de
, d
, break) {
102 if (!endswith(de
->d_name
, ".journal") &&
103 !endswith(de
->d_name
, ".journal~"))
106 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
107 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
111 if (!S_ISREG(st
.st_mode
))
114 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
120 static void cache_space_invalidate(JournalStorageSpace
*space
) {
124 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
125 JournalStorageSpace
*space
;
126 JournalMetrics
*metrics
;
127 uint64_t vfs_used
, vfs_avail
, avail
;
133 metrics
= &storage
->metrics
;
134 space
= &storage
->space
;
136 ts
= now(CLOCK_MONOTONIC
);
138 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
141 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
145 space
->vfs_used
= vfs_used
;
146 space
->vfs_available
= vfs_avail
;
148 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
150 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
151 space
->available
= LESS_BY(space
->limit
, vfs_used
);
152 space
->timestamp
= ts
;
156 static void patch_min_use(JournalStorage
*storage
) {
159 /* Let's bump the min_use limit to the current usage on disk. We do
160 * this when starting up and first opening the journal files. This way
161 * sudden spikes in disk usage will not cause journald to vacuum files
162 * without bounds. Note that this means that only a restart of journald
163 * will make it reset this value. */
165 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
168 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
174 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
176 r
= cache_space_refresh(s
, js
);
179 *available
= js
->space
.available
;
181 *limit
= js
->space
.limit
;
186 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
187 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
188 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
189 JournalMetrics
*metrics
;
194 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
196 if (cache_space_refresh(s
, storage
) < 0)
199 metrics
= &storage
->metrics
;
200 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
201 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
202 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
203 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
204 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
205 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
207 server_driver_message(s
, 0,
208 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
209 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
210 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
211 "JOURNAL_NAME=%s", storage
->name
,
212 "JOURNAL_PATH=%s", storage
->path
,
213 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
214 "CURRENT_USE_PRETTY=%s", fb1
,
215 "MAX_USE=%"PRIu64
, metrics
->max_use
,
216 "MAX_USE_PRETTY=%s", fb2
,
217 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
218 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
219 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
220 "DISK_AVAILABLE_PRETTY=%s", fb4
,
221 "LIMIT=%"PRIu64
, storage
->space
.limit
,
222 "LIMIT_PRETTY=%s", fb5
,
223 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
224 "AVAILABLE_PRETTY=%s", fb6
,
228 static bool uid_for_system_journal(uid_t uid
) {
230 /* Returns true if the specified UID shall get its data stored in the system journal*/
232 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
235 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
242 if (uid_for_system_journal(uid
))
245 r
= add_acls_for_user(f
->fd
, uid
);
247 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
251 static int open_journal(
257 JournalMetrics
*metrics
,
267 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
268 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
270 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
271 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
276 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
278 (void) journal_file_close(f
);
286 static bool flushed_flag_is_set(void) {
287 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
290 static int system_journal_open(Server
*s
, bool flush_requested
) {
294 if (!s
->system_journal
&&
295 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
296 (flush_requested
|| flushed_flag_is_set())) {
298 /* If in auto mode: first try to create the machine
299 * path, but not the prefix.
301 * If in persistent mode: create /var/log/journal and
302 * the machine path */
304 if (s
->storage
== STORAGE_PERSISTENT
)
305 (void) mkdir_p("/var/log/journal/", 0755);
307 (void) mkdir(s
->system_storage
.path
, 0755);
309 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
310 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
312 server_add_acls(s
->system_journal
, 0);
313 (void) cache_space_refresh(s
, &s
->system_storage
);
314 patch_min_use(&s
->system_storage
);
316 if (!IN_SET(r
, -ENOENT
, -EROFS
))
317 log_warning_errno(r
, "Failed to open system journal: %m");
322 /* If the runtime journal is open, and we're post-flush, we're
323 * recovering from a failed system journal rotate (ENOSPC)
324 * for which the runtime journal was reopened.
326 * Perform an implicit flush to var, leaving the runtime
327 * journal closed, now that the system journal is back.
329 if (!flush_requested
)
330 (void) server_flush_to_var(s
, true);
333 if (!s
->runtime_journal
&&
334 (s
->storage
!= STORAGE_NONE
)) {
336 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
338 if (s
->system_journal
) {
340 /* Try to open the runtime journal, but only
341 * if it already exists, so that we can flush
342 * it into the system journal */
344 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
347 log_warning_errno(r
, "Failed to open runtime journal: %m");
354 /* OK, we really need the runtime journal, so create
355 * it if necessary. */
357 (void) mkdir("/run/log", 0755);
358 (void) mkdir("/run/log/journal", 0755);
359 (void) mkdir_parents(fn
, 0750);
361 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
363 return log_error_errno(r
, "Failed to open runtime journal: %m");
366 if (s
->runtime_journal
) {
367 server_add_acls(s
->runtime_journal
, 0);
368 (void) cache_space_refresh(s
, &s
->runtime_storage
);
369 patch_min_use(&s
->runtime_storage
);
376 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
377 _cleanup_free_
char *p
= NULL
;
384 /* A rotate that fails to create the new journal (ENOSPC) leaves the
385 * rotated journal as NULL. Unless we revisit opening, even after
386 * space is made available we'll continue to return NULL indefinitely.
388 * system_journal_open() is a noop if the journals are already open, so
389 * we can just call it here to recover from failed rotates (or anything
390 * else that's left the journals as NULL).
392 * Fixes https://github.com/systemd/systemd/issues/3968 */
393 (void) system_journal_open(s
, false);
395 /* We split up user logs only on /var, not on /run. If the
396 * runtime file is open, we write to it exclusively, in order
397 * to guarantee proper order as soon as we flush /run to
398 * /var and close the runtime file. */
400 if (s
->runtime_journal
)
401 return s
->runtime_journal
;
403 if (uid_for_system_journal(uid
))
404 return s
->system_journal
;
406 r
= sd_id128_get_machine(&machine
);
408 return s
->system_journal
;
410 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
414 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
415 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
416 return s
->system_journal
;
418 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
419 /* Too many open? Then let's close one */
420 f
= ordered_hashmap_steal_first(s
->user_journals
);
422 (void) journal_file_close(f
);
425 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
427 return s
->system_journal
;
429 server_add_acls(f
, uid
);
431 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
433 (void) journal_file_close(f
);
434 return s
->system_journal
;
440 static int do_rotate(
453 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
456 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
458 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
461 server_add_acls(*f
, uid
);
466 void server_rotate(Server
*s
) {
472 log_debug("Rotating...");
474 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
475 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
477 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
478 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
480 ordered_hashmap_replace(s
->user_journals
, k
, f
);
482 /* Old file has been closed and deallocated */
483 ordered_hashmap_remove(s
->user_journals
, k
);
486 /* Perform any deferred closes which aren't still offlining. */
487 SET_FOREACH(f
, s
->deferred_closes
, i
)
488 if (!journal_file_is_offlining(f
)) {
489 (void) set_remove(s
->deferred_closes
, f
);
490 (void) journal_file_close(f
);
494 void server_sync(Server
*s
) {
499 if (s
->system_journal
) {
500 r
= journal_file_set_offline(s
->system_journal
, false);
502 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
505 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
506 r
= journal_file_set_offline(f
, false);
508 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
511 if (s
->sync_event_source
) {
512 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
514 log_error_errno(r
, "Failed to disable sync timer source: %m");
517 s
->sync_scheduled
= false;
520 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
527 (void) cache_space_refresh(s
, storage
);
530 server_space_usage_message(s
, storage
);
532 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
533 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
534 &s
->oldest_file_usec
, verbose
);
535 if (r
< 0 && r
!= -ENOENT
)
536 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
538 cache_space_invalidate(&storage
->space
);
541 int server_vacuum(Server
*s
, bool verbose
) {
544 log_debug("Vacuuming...");
546 s
->oldest_file_usec
= 0;
548 if (s
->system_journal
)
549 do_vacuum(s
, &s
->system_storage
, verbose
);
550 if (s
->runtime_journal
)
551 do_vacuum(s
, &s
->runtime_storage
, verbose
);
556 static void server_cache_machine_id(Server
*s
) {
562 r
= sd_id128_get_machine(&id
);
566 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
569 static void server_cache_boot_id(Server
*s
) {
575 r
= sd_id128_get_boot(&id
);
579 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
582 static void server_cache_hostname(Server
*s
) {
583 _cleanup_free_
char *t
= NULL
;
588 t
= gethostname_malloc();
592 x
= strappend("_HOSTNAME=", t
);
596 free(s
->hostname_field
);
597 s
->hostname_field
= x
;
600 static bool shall_try_append_again(JournalFile
*f
, int r
) {
603 case -E2BIG
: /* Hit configured limit */
604 case -EFBIG
: /* Hit fs limit */
605 case -EDQUOT
: /* Quota limit hit */
606 case -ENOSPC
: /* Disk full */
607 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
610 case -EIO
: /* I/O error of some kind (mmap) */
611 log_warning("%s: IO error, rotating.", f
->path
);
614 case -EHOSTDOWN
: /* Other machine */
615 log_info("%s: Journal file from other machine, rotating.", f
->path
);
618 case -EBUSY
: /* Unclean shutdown */
619 log_info("%s: Unclean shutdown, rotating.", f
->path
);
622 case -EPROTONOSUPPORT
: /* Unsupported feature */
623 log_info("%s: Unsupported feature, rotating.", f
->path
);
626 case -EBADMSG
: /* Corrupted */
627 case -ENODATA
: /* Truncated */
628 case -ESHUTDOWN
: /* Already archived */
629 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
632 case -EIDRM
: /* Journal file has been deleted */
633 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
636 case -ETXTBSY
: /* Journal file is from the future */
637 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
645 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, size_t n
, int priority
) {
646 bool vacuumed
= false, rotate
= false;
647 struct dual_timestamp ts
;
655 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
656 * the source time, and not even the time the event was originally seen, but instead simply the time we started
657 * processing it, as we want strictly linear ordering in what we write out.) */
658 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
659 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
661 if (ts
.realtime
< s
->last_realtime_clock
) {
662 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
663 * regular operation. However, when it does happen, then we should make sure that we start fresh files
664 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
665 * bisection works correctly. */
667 log_debug("Time jumped backwards, rotating.");
671 f
= find_journal(s
, uid
);
675 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
676 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
683 server_vacuum(s
, false);
686 f
= find_journal(s
, uid
);
691 s
->last_realtime_clock
= ts
.realtime
;
693 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
695 server_schedule_sync(s
, priority
);
699 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
700 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
705 server_vacuum(s
, false);
707 f
= find_journal(s
, uid
);
711 log_debug("Retrying write.");
712 r
= journal_file_append_entry(f
, &ts
, NULL
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
714 log_error_errno(r
, "Failed to write entry (%zu items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
716 server_schedule_sync(s
, priority
);
719 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
720 if (isset(value)) { \
722 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
723 sprintf(k, field "=" format, value); \
724 iovec[n++] = IOVEC_MAKE_STRING(k); \
727 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
728 if (!isempty(value)) { \
730 k = strjoina(field "=", value); \
731 iovec[n++] = IOVEC_MAKE_STRING(k); \
734 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
735 if (!sd_id128_is_null(value)) { \
737 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
738 sd_id128_to_string(value, stpcpy(k, field "=")); \
739 iovec[n++] = IOVEC_MAKE_STRING(k); \
742 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
743 if (value_size > 0) { \
745 k = newa(char, STRLEN(field "=") + value_size + 1); \
746 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
747 iovec[n++] = IOVEC_MAKE_STRING(k); \
750 static void dispatch_message_real(
752 struct iovec
*iovec
, size_t n
, size_t m
,
753 const ClientContext
*c
,
754 const struct timeval
*tv
,
758 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
766 N_IOVEC_META_FIELDS
+
767 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
768 client_context_extra_fields_n_iovec(c
) <= m
);
771 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
772 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
773 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
775 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
776 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
777 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
778 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
780 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
782 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
783 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
785 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
786 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
787 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
789 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
793 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
795 if (c
->extra_fields_n_iovec
> 0) {
796 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
797 n
+= c
->extra_fields_n_iovec
;
803 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
805 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
806 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
807 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
809 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
810 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
811 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
812 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
814 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
816 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
817 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
819 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
820 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
821 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
822 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
823 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
824 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
825 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
827 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
833 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
834 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
837 /* Note that strictly speaking storing the boot id here is
838 * redundant since the entry includes this in-line
839 * anyway. However, we need this indexed, too. */
840 if (!isempty(s
->boot_id_field
))
841 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
843 if (!isempty(s
->machine_id_field
))
844 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
846 if (!isempty(s
->hostname_field
))
847 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
851 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
852 /* Split up strictly by (non-root) UID */
853 journal_uid
= c
->uid
;
854 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
855 /* Split up by login UIDs. We do this only if the
856 * realuid is not root, in order not to accidentally
857 * leak privileged information to the user that is
858 * logged by a privileged process that is part of an
859 * unprivileged session. */
860 journal_uid
= c
->owner_uid
;
864 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
867 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
877 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
878 iovec
= newa(struct iovec
, m
);
880 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
881 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
882 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
884 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
885 assert_cc(6 == LOG_INFO
);
886 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
889 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
892 va_start(ap
, format
);
893 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
894 /* Error handling below */
898 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
901 free(iovec
[k
++].iov_base
);
904 /* We failed to format the message. Emit a warning instead. */
907 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
910 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
911 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
912 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
916 void server_dispatch_message(
918 struct iovec
*iovec
, size_t n
, size_t m
,
920 const struct timeval
*tv
,
924 uint64_t available
= 0;
928 assert(iovec
|| n
== 0);
933 if (LOG_PRI(priority
) > s
->max_level_store
)
936 /* Stop early in case the information will not be stored
938 if (s
->storage
== STORAGE_NONE
)
942 (void) determine_space(s
, &available
, NULL
);
944 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
948 /* Write a suppression message if we suppressed something */
950 server_driver_message(s
, c
->pid
,
951 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
952 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
953 "N_DROPPED=%i", rl
- 1,
957 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
960 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
962 sd_journal
*j
= NULL
;
963 char ts
[FORMAT_TIMESPAN_MAX
];
970 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
973 if (!s
->runtime_journal
)
976 if (require_flag_file
&& !flushed_flag_is_set())
979 (void) system_journal_open(s
, true);
981 if (!s
->system_journal
)
984 log_debug("Flushing to /var...");
986 start
= now(CLOCK_MONOTONIC
);
988 r
= sd_id128_get_machine(&machine
);
992 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
994 return log_error_errno(r
, "Failed to read runtime journal: %m");
996 sd_journal_set_data_threshold(j
, 0);
998 SD_JOURNAL_FOREACH(j
) {
1002 f
= j
->current_file
;
1003 assert(f
&& f
->current_offset
> 0);
1007 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1009 log_error_errno(r
, "Can't read entry: %m");
1013 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1017 if (!shall_try_append_again(s
->system_journal
, r
)) {
1018 log_error_errno(r
, "Can't write entry: %m");
1023 server_vacuum(s
, false);
1025 if (!s
->system_journal
) {
1026 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1031 log_debug("Retrying write.");
1032 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
);
1034 log_error_errno(r
, "Can't write entry: %m");
1042 journal_file_post_change(s
->system_journal
);
1044 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1047 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1049 sd_journal_close(j
);
1051 server_driver_message(s
, 0, NULL
,
1052 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1053 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1060 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1061 Server
*s
= userdata
;
1062 struct ucred
*ucred
= NULL
;
1063 struct timeval
*tv
= NULL
;
1064 struct cmsghdr
*cmsg
;
1066 size_t label_len
= 0, m
;
1069 int *fds
= NULL
, v
= 0;
1073 struct cmsghdr cmsghdr
;
1075 /* We use NAME_MAX space for the SELinux label
1076 * here. The kernel currently enforces no
1077 * limit, but according to suggestions from
1078 * the SELinux people this will change and it
1079 * will probably be identical to NAME_MAX. For
1080 * now we use that, but this should be updated
1081 * one day when the final limit is known. */
1082 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1083 CMSG_SPACE(sizeof(struct timeval
)) +
1084 CMSG_SPACE(sizeof(int)) + /* fd */
1085 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1088 union sockaddr_union sa
= {};
1090 struct msghdr msghdr
= {
1093 .msg_control
= &control
,
1094 .msg_controllen
= sizeof(control
),
1096 .msg_namelen
= sizeof(sa
),
1100 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1102 if (revents
!= EPOLLIN
) {
1103 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1107 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1109 (void) ioctl(fd
, SIOCINQ
, &v
);
1111 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1112 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1114 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1116 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1119 iovec
.iov_base
= s
->buffer
;
1120 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1122 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1124 if (IN_SET(errno
, EINTR
, EAGAIN
))
1127 return log_error_errno(errno
, "recvmsg() failed: %m");
1130 CMSG_FOREACH(cmsg
, &msghdr
) {
1132 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1133 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1134 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1135 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1136 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1137 cmsg
->cmsg_type
== SCM_SECURITY
) {
1138 label
= (char*) CMSG_DATA(cmsg
);
1139 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1140 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1141 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1142 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1143 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1144 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1145 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1146 fds
= (int*) CMSG_DATA(cmsg
);
1147 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1151 /* And a trailing NUL, just in case */
1154 if (fd
== s
->syslog_fd
) {
1155 if (n
> 0 && n_fds
== 0)
1156 server_process_syslog_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1158 log_warning("Got file descriptors via syslog socket. Ignoring.");
1160 } else if (fd
== s
->native_fd
) {
1161 if (n
> 0 && n_fds
== 0)
1162 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1163 else if (n
== 0 && n_fds
== 1)
1164 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1166 log_warning("Got too many file descriptors via native socket. Ignoring.");
1169 assert(fd
== s
->audit_fd
);
1171 if (n
> 0 && n_fds
== 0)
1172 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1174 log_warning("Got file descriptors via audit socket. Ignoring.");
1177 close_many(fds
, n_fds
);
1181 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1182 Server
*s
= userdata
;
1187 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1189 (void) server_flush_to_var(s
, false);
1191 server_vacuum(s
, false);
1193 r
= touch("/run/systemd/journal/flushed");
1195 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1197 server_space_usage_message(s
, NULL
);
1201 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1202 Server
*s
= userdata
;
1207 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1209 server_vacuum(s
, true);
1211 if (s
->system_journal
)
1212 patch_min_use(&s
->system_storage
);
1213 if (s
->runtime_journal
)
1214 patch_min_use(&s
->runtime_storage
);
1216 /* Let clients know when the most recent rotation happened. */
1217 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1219 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1224 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1225 Server
*s
= userdata
;
1229 log_received_signal(LOG_INFO
, si
);
1231 sd_event_exit(s
->event
, 0);
1235 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1236 Server
*s
= userdata
;
1241 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1245 /* Let clients know when the most recent sync happened. */
1246 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1248 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1253 static int setup_signals(Server
*s
) {
1258 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1260 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1264 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1268 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1272 /* Let's process SIGTERM late, so that we flush all queued
1273 * messages to disk before we exit */
1274 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1278 /* When journald is invoked on the terminal (when debugging),
1279 * it's useful if C-c is handled equivalent to SIGTERM. */
1280 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1284 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1288 /* SIGRTMIN+1 causes an immediate sync. We process this very
1289 * late, so that everything else queued at this point is
1290 * really written to disk. Clients can watch
1291 * /run/systemd/journal/synced with inotify until its mtime
1292 * changes to see when a sync happened. */
1293 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1297 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1304 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1310 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1312 r
= value
? parse_boolean(value
) : true;
1314 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1316 s
->forward_to_syslog
= r
;
1318 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1320 r
= value
? parse_boolean(value
) : true;
1322 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1324 s
->forward_to_kmsg
= r
;
1326 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1328 r
= value
? parse_boolean(value
) : true;
1330 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1332 s
->forward_to_console
= r
;
1334 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1336 r
= value
? parse_boolean(value
) : true;
1338 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1340 s
->forward_to_wall
= r
;
1342 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1344 if (proc_cmdline_value_missing(key
, value
))
1347 r
= log_level_from_string(value
);
1349 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1351 s
->max_level_console
= r
;
1353 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1355 if (proc_cmdline_value_missing(key
, value
))
1358 r
= log_level_from_string(value
);
1360 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1362 s
->max_level_store
= r
;
1364 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1366 if (proc_cmdline_value_missing(key
, value
))
1369 r
= log_level_from_string(value
);
1371 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1373 s
->max_level_syslog
= r
;
1375 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1377 if (proc_cmdline_value_missing(key
, value
))
1380 r
= log_level_from_string(value
);
1382 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1384 s
->max_level_kmsg
= r
;
1386 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1388 if (proc_cmdline_value_missing(key
, value
))
1391 r
= log_level_from_string(value
);
1393 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1395 s
->max_level_wall
= r
;
1397 } else if (startswith(key
, "systemd.journald"))
1398 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1400 /* do not warn about state here, since probably systemd already did */
1404 static int server_parse_config_file(Server
*s
) {
1407 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1408 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1410 config_item_perf_lookup
, journald_gperf_lookup
,
1411 CONFIG_PARSE_WARN
, s
);
1414 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1415 Server
*s
= userdata
;
1423 int server_schedule_sync(Server
*s
, int priority
) {
1428 if (priority
<= LOG_CRIT
) {
1429 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1434 if (s
->sync_scheduled
)
1437 if (s
->sync_interval_usec
> 0) {
1440 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1444 when
+= s
->sync_interval_usec
;
1446 if (!s
->sync_event_source
) {
1447 r
= sd_event_add_time(
1449 &s
->sync_event_source
,
1452 server_dispatch_sync
, s
);
1456 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1458 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1462 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1467 s
->sync_scheduled
= true;
1473 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1474 Server
*s
= userdata
;
1478 server_cache_hostname(s
);
1482 static int server_open_hostname(Server
*s
) {
1487 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1488 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1489 if (s
->hostname_fd
< 0)
1490 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1492 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1494 /* kernels prior to 3.2 don't support polling this file. Ignore
1497 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1498 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1502 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1505 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1507 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1512 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1513 Server
*s
= userdata
;
1517 assert(s
->notify_event_source
== es
);
1518 assert(s
->notify_fd
== fd
);
1520 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1521 * message on it. Either it's the watchdog event, the initial
1522 * READY=1 event or an stdout stream event. If there's nothing
1523 * to write anymore, turn our event source off. The next time
1524 * there's something to send it will be turned on again. */
1526 if (!s
->sent_notify_ready
) {
1527 static const char p
[] =
1529 "STATUS=Processing requests...";
1532 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1534 if (errno
== EAGAIN
)
1537 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1540 s
->sent_notify_ready
= true;
1541 log_debug("Sent READY=1 notification.");
1543 } else if (s
->send_watchdog
) {
1545 static const char p
[] =
1550 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1552 if (errno
== EAGAIN
)
1555 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1558 s
->send_watchdog
= false;
1559 log_debug("Sent WATCHDOG=1 notification.");
1561 } else if (s
->stdout_streams_notify_queue
)
1562 /* Dispatch one stream notification event */
1563 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1565 /* Leave us enabled if there's still more to do. */
1566 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1569 /* There was nothing to do anymore, let's turn ourselves off. */
1570 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1572 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1577 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1578 Server
*s
= userdata
;
1583 s
->send_watchdog
= true;
1585 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1587 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1589 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1591 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1593 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1595 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1600 static int server_connect_notify(Server
*s
) {
1601 union sockaddr_union sa
= {
1602 .un
.sun_family
= AF_UNIX
,
1608 assert(s
->notify_fd
< 0);
1609 assert(!s
->notify_event_source
);
1612 So here's the problem: we'd like to send notification
1613 messages to PID 1, but we cannot do that via sd_notify(),
1614 since that's synchronous, and we might end up blocking on
1615 it. Specifically: given that PID 1 might block on
1616 dbus-daemon during IPC, and dbus-daemon is logging to us,
1617 and might hence block on us, we might end up in a deadlock
1618 if we block on sending PID 1 notification messages — by
1619 generating a full blocking circle. To avoid this, let's
1620 create a non-blocking socket, and connect it to the
1621 notification socket, and then wait for POLLOUT before we
1622 send anything. This should efficiently avoid any deadlocks,
1623 as we'll never block on PID 1, hence PID 1 can safely block
1624 on dbus-daemon which can safely block on us again.
1626 Don't think that this issue is real? It is, see:
1627 https://github.com/systemd/systemd/issues/1505
1630 e
= getenv("NOTIFY_SOCKET");
1634 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1635 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1639 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1640 log_error("NOTIFY_SOCKET path too long: %s", e
);
1644 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1645 if (s
->notify_fd
< 0)
1646 return log_error_errno(errno
, "Failed to create notify socket: %m");
1648 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1650 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1651 if (sa
.un
.sun_path
[0] == '@')
1652 sa
.un
.sun_path
[0] = 0;
1654 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1656 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1658 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1660 return log_error_errno(r
, "Failed to watch notification socket: %m");
1662 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1663 s
->send_watchdog
= true;
1665 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1667 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1670 /* This should fire pretty soon, which we'll use to send the
1676 int server_init(Server
*s
) {
1677 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1684 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1685 s
->compress
.enabled
= true;
1686 s
->compress
.threshold_bytes
= (uint64_t) -1;
1688 s
->read_kmsg
= true;
1690 s
->watchdog_usec
= USEC_INFINITY
;
1692 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1693 s
->sync_scheduled
= false;
1695 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1696 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1698 s
->forward_to_wall
= true;
1700 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1702 s
->max_level_store
= LOG_DEBUG
;
1703 s
->max_level_syslog
= LOG_DEBUG
;
1704 s
->max_level_kmsg
= LOG_NOTICE
;
1705 s
->max_level_console
= LOG_INFO
;
1706 s
->max_level_wall
= LOG_EMERG
;
1708 s
->line_max
= DEFAULT_LINE_MAX
;
1710 journal_reset_metrics(&s
->system_storage
.metrics
);
1711 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1713 server_parse_config_file(s
);
1715 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1717 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1719 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1720 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1721 s
->rate_limit_interval
, s
->rate_limit_burst
);
1722 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1725 (void) mkdir_p("/run/systemd/journal", 0755);
1727 s
->user_journals
= ordered_hashmap_new(NULL
);
1728 if (!s
->user_journals
)
1731 s
->mmap
= mmap_cache_new();
1735 s
->deferred_closes
= set_new(NULL
);
1736 if (!s
->deferred_closes
)
1739 r
= sd_event_default(&s
->event
);
1741 return log_error_errno(r
, "Failed to create event loop: %m");
1743 n
= sd_listen_fds(true);
1745 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1747 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1749 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1751 if (s
->native_fd
>= 0) {
1752 log_error("Too many native sockets passed.");
1758 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1760 if (s
->stdout_fd
>= 0) {
1761 log_error("Too many stdout sockets passed.");
1767 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1768 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1770 if (s
->syslog_fd
>= 0) {
1771 log_error("Too many /dev/log sockets passed.");
1777 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1779 if (s
->audit_fd
>= 0) {
1780 log_error("Too many audit sockets passed.");
1794 r
= fdset_put(fds
, fd
);
1800 /* Try to restore streams, but don't bother if this fails */
1801 (void) server_restore_streams(s
, fds
);
1803 if (fdset_size(fds
) > 0) {
1804 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1805 fds
= fdset_free(fds
);
1808 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1810 /* always open stdout, syslog, native, and kmsg sockets */
1812 /* systemd-journald.socket: /run/systemd/journal/stdout */
1813 r
= server_open_stdout_socket(s
);
1817 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1818 r
= server_open_syslog_socket(s
);
1822 /* systemd-journald.socket: /run/systemd/journal/socket */
1823 r
= server_open_native_socket(s
);
1828 r
= server_open_dev_kmsg(s
);
1832 /* Unless we got *some* sockets and not audit, open audit socket */
1833 if (s
->audit_fd
>= 0 || no_sockets
) {
1834 r
= server_open_audit(s
);
1839 r
= server_open_kernel_seqnum(s
);
1843 r
= server_open_hostname(s
);
1847 r
= setup_signals(s
);
1851 s
->udev
= udev_new();
1855 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1859 r
= cg_get_root_path(&s
->cgroup_root
);
1863 server_cache_hostname(s
);
1864 server_cache_boot_id(s
);
1865 server_cache_machine_id(s
);
1867 s
->runtime_storage
.name
= "Runtime journal";
1868 s
->system_storage
.name
= "System journal";
1870 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1871 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1872 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1875 (void) server_connect_notify(s
);
1877 (void) client_context_acquire_default(s
);
1879 return system_journal_open(s
, false);
1882 void server_maybe_append_tags(Server
*s
) {
1888 n
= now(CLOCK_REALTIME
);
1890 if (s
->system_journal
)
1891 journal_file_maybe_append_tag(s
->system_journal
, n
);
1893 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1894 journal_file_maybe_append_tag(f
, n
);
1898 void server_done(Server
*s
) {
1901 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1903 while (s
->stdout_streams
)
1904 stdout_stream_free(s
->stdout_streams
);
1906 client_context_flush_all(s
);
1908 if (s
->system_journal
)
1909 (void) journal_file_close(s
->system_journal
);
1911 if (s
->runtime_journal
)
1912 (void) journal_file_close(s
->runtime_journal
);
1914 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1916 sd_event_source_unref(s
->syslog_event_source
);
1917 sd_event_source_unref(s
->native_event_source
);
1918 sd_event_source_unref(s
->stdout_event_source
);
1919 sd_event_source_unref(s
->dev_kmsg_event_source
);
1920 sd_event_source_unref(s
->audit_event_source
);
1921 sd_event_source_unref(s
->sync_event_source
);
1922 sd_event_source_unref(s
->sigusr1_event_source
);
1923 sd_event_source_unref(s
->sigusr2_event_source
);
1924 sd_event_source_unref(s
->sigterm_event_source
);
1925 sd_event_source_unref(s
->sigint_event_source
);
1926 sd_event_source_unref(s
->sigrtmin1_event_source
);
1927 sd_event_source_unref(s
->hostname_event_source
);
1928 sd_event_source_unref(s
->notify_event_source
);
1929 sd_event_source_unref(s
->watchdog_event_source
);
1930 sd_event_unref(s
->event
);
1932 safe_close(s
->syslog_fd
);
1933 safe_close(s
->native_fd
);
1934 safe_close(s
->stdout_fd
);
1935 safe_close(s
->dev_kmsg_fd
);
1936 safe_close(s
->audit_fd
);
1937 safe_close(s
->hostname_fd
);
1938 safe_close(s
->notify_fd
);
1941 journal_rate_limit_free(s
->rate_limit
);
1943 if (s
->kernel_seqnum
)
1944 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1948 free(s
->cgroup_root
);
1949 free(s
->hostname_field
);
1950 free(s
->runtime_storage
.path
);
1951 free(s
->system_storage
.path
);
1954 mmap_cache_unref(s
->mmap
);
1956 udev_unref(s
->udev
);
1959 static const char* const storage_table
[_STORAGE_MAX
] = {
1960 [STORAGE_AUTO
] = "auto",
1961 [STORAGE_VOLATILE
] = "volatile",
1962 [STORAGE_PERSISTENT
] = "persistent",
1963 [STORAGE_NONE
] = "none"
1966 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1967 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1969 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1970 [SPLIT_LOGIN
] = "login",
1971 [SPLIT_UID
] = "uid",
1972 [SPLIT_NONE
] = "none",
1975 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1976 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1978 int config_parse_line_max(
1980 const char *filename
,
1982 const char *section
,
1983 unsigned section_line
,
1998 if (isempty(rvalue
))
1999 /* Empty assignment means default */
2000 *sz
= DEFAULT_LINE_MAX
;
2004 r
= parse_size(rvalue
, 1024, &v
);
2006 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2011 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2012 * terminal size is 80ch, and it might make sense to break one character before the natural
2013 * line break would occur on that. */
2014 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2016 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2017 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2018 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2019 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2020 * fail much earlier anyway. */
2021 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2030 int config_parse_compress(const char* unit
,
2031 const char *filename
,
2033 const char *section
,
2034 unsigned section_line
,
2040 JournalCompressOptions
* compress
= data
;
2043 if (streq(rvalue
, "1")) {
2044 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2045 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2046 compress
->enabled
= true;
2047 } else if (streq(rvalue
, "0")) {
2048 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2049 "Compress= ambiguously specified as 0, disabling compression");
2050 compress
->enabled
= false;
2051 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2052 compress
->enabled
= r
;
2053 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2054 compress
->enabled
= true;
2055 else if (isempty(rvalue
)) {
2056 compress
->enabled
= true;
2057 compress
->threshold_bytes
= (uint64_t) -1;
2059 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);