1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <selinux/selinux.h>
24 #include <sys/ioctl.h>
26 #include <sys/signalfd.h>
27 #include <sys/statvfs.h>
28 #include <linux/sockios.h>
31 #include "sd-daemon.h"
32 #include "sd-journal.h"
33 #include "sd-messages.h"
36 #include "alloc-util.h"
37 #include "audit-util.h"
38 #include "cgroup-util.h"
39 #include "conf-parser.h"
40 #include "dirent-util.h"
41 #include "extract-word.h"
44 #include "format-util.h"
47 #include "hostname-util.h"
48 #include "id128-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-context.h"
56 #include "journald-kmsg.h"
57 #include "journald-native.h"
58 #include "journald-rate-limit.h"
59 #include "journald-server.h"
60 #include "journald-stream.h"
61 #include "journald-syslog.h"
65 #include "parse-util.h"
66 #include "proc-cmdline.h"
67 #include "process-util.h"
69 #include "selinux-util.h"
70 #include "signal-util.h"
71 #include "socket-util.h"
72 #include "stdio-util.h"
73 #include "string-table.h"
74 #include "string-util.h"
75 #include "syslog-util.h"
76 #include "user-util.h"
78 #define USER_JOURNALS_MAX 1024
80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82 #define DEFAULT_RATE_LIMIT_BURST 1000
83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
89 /* The period to insert between posting changes for coalescing */
90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94 #define DEFAULT_LINE_MAX (48*1024)
96 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
97 _cleanup_closedir_
DIR *d
= NULL
;
106 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
107 errno
, "Failed to open %s: %m", path
);
109 if (fstatvfs(dirfd(d
), &ss
) < 0)
110 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
112 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
114 FOREACH_DIRENT_ALL(de
, d
, break) {
117 if (!endswith(de
->d_name
, ".journal") &&
118 !endswith(de
->d_name
, ".journal~"))
121 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
122 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
126 if (!S_ISREG(st
.st_mode
))
129 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
135 static void cache_space_invalidate(JournalStorageSpace
*space
) {
139 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
140 JournalStorageSpace
*space
;
141 JournalMetrics
*metrics
;
142 uint64_t vfs_used
, vfs_avail
, avail
;
148 metrics
= &storage
->metrics
;
149 space
= &storage
->space
;
151 ts
= now(CLOCK_MONOTONIC
);
153 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
156 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
160 space
->vfs_used
= vfs_used
;
161 space
->vfs_available
= vfs_avail
;
163 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
165 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
166 space
->available
= LESS_BY(space
->limit
, vfs_used
);
167 space
->timestamp
= ts
;
171 static void patch_min_use(JournalStorage
*storage
) {
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
180 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
184 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
190 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
192 r
= cache_space_refresh(s
, js
);
195 *available
= js
->space
.available
;
197 *limit
= js
->space
.limit
;
202 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
203 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
204 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
205 JournalMetrics
*metrics
;
210 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
212 if (cache_space_refresh(s
, storage
) < 0)
215 metrics
= &storage
->metrics
;
216 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
217 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
218 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
219 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
220 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
221 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
223 server_driver_message(s
, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
227 "JOURNAL_NAME=%s", storage
->name
,
228 "JOURNAL_PATH=%s", storage
->path
,
229 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
230 "CURRENT_USE_PRETTY=%s", fb1
,
231 "MAX_USE=%"PRIu64
, metrics
->max_use
,
232 "MAX_USE_PRETTY=%s", fb2
,
233 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
235 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
236 "DISK_AVAILABLE_PRETTY=%s", fb4
,
237 "LIMIT=%"PRIu64
, storage
->space
.limit
,
238 "LIMIT_PRETTY=%s", fb5
,
239 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
240 "AVAILABLE_PRETTY=%s", fb6
,
244 static bool uid_for_system_journal(uid_t uid
) {
246 /* Returns true if the specified UID shall get its data stored in the system journal*/
248 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
251 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
258 if (uid_for_system_journal(uid
))
261 r
= add_acls_for_user(f
->fd
, uid
);
263 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
267 static int open_journal(
273 JournalMetrics
*metrics
,
283 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
285 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
289 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
291 (void) journal_file_close(f
);
299 static bool flushed_flag_is_set(void) {
300 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
303 static int system_journal_open(Server
*s
, bool flush_requested
) {
307 if (!s
->system_journal
&&
308 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
309 (flush_requested
|| flushed_flag_is_set())) {
311 /* If in auto mode: first try to create the machine
312 * path, but not the prefix.
314 * If in persistent mode: create /var/log/journal and
315 * the machine path */
317 if (s
->storage
== STORAGE_PERSISTENT
)
318 (void) mkdir_p("/var/log/journal/", 0755);
320 (void) mkdir(s
->system_storage
.path
, 0755);
322 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
323 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
325 server_add_acls(s
->system_journal
, 0);
326 (void) cache_space_refresh(s
, &s
->system_storage
);
327 patch_min_use(&s
->system_storage
);
329 if (!IN_SET(r
, -ENOENT
, -EROFS
))
330 log_warning_errno(r
, "Failed to open system journal: %m");
335 /* If the runtime journal is open, and we're post-flush, we're
336 * recovering from a failed system journal rotate (ENOSPC)
337 * for which the runtime journal was reopened.
339 * Perform an implicit flush to var, leaving the runtime
340 * journal closed, now that the system journal is back.
342 if (!flush_requested
)
343 (void) server_flush_to_var(s
, true);
346 if (!s
->runtime_journal
&&
347 (s
->storage
!= STORAGE_NONE
)) {
349 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
351 if (s
->system_journal
) {
353 /* Try to open the runtime journal, but only
354 * if it already exists, so that we can flush
355 * it into the system journal */
357 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
360 log_warning_errno(r
, "Failed to open runtime journal: %m");
367 /* OK, we really need the runtime journal, so create
368 * it if necessary. */
370 (void) mkdir("/run/log", 0755);
371 (void) mkdir("/run/log/journal", 0755);
372 (void) mkdir_parents(fn
, 0750);
374 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
376 return log_error_errno(r
, "Failed to open runtime journal: %m");
379 if (s
->runtime_journal
) {
380 server_add_acls(s
->runtime_journal
, 0);
381 (void) cache_space_refresh(s
, &s
->runtime_storage
);
382 patch_min_use(&s
->runtime_storage
);
389 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
390 _cleanup_free_
char *p
= NULL
;
397 /* A rotate that fails to create the new journal (ENOSPC) leaves the
398 * rotated journal as NULL. Unless we revisit opening, even after
399 * space is made available we'll continue to return NULL indefinitely.
401 * system_journal_open() is a noop if the journals are already open, so
402 * we can just call it here to recover from failed rotates (or anything
403 * else that's left the journals as NULL).
405 * Fixes https://github.com/systemd/systemd/issues/3968 */
406 (void) system_journal_open(s
, false);
408 /* We split up user logs only on /var, not on /run. If the
409 * runtime file is open, we write to it exclusively, in order
410 * to guarantee proper order as soon as we flush /run to
411 * /var and close the runtime file. */
413 if (s
->runtime_journal
)
414 return s
->runtime_journal
;
416 if (uid_for_system_journal(uid
))
417 return s
->system_journal
;
419 r
= sd_id128_get_machine(&machine
);
421 return s
->system_journal
;
423 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
427 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
428 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
429 return s
->system_journal
;
431 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
432 /* Too many open? Then let's close one */
433 f
= ordered_hashmap_steal_first(s
->user_journals
);
435 (void) journal_file_close(f
);
438 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
440 return s
->system_journal
;
442 server_add_acls(f
, uid
);
444 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
446 (void) journal_file_close(f
);
447 return s
->system_journal
;
453 static int do_rotate(
466 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
469 log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
471 log_error_errno(r
, "Failed to create new %s journal: %m", name
);
473 server_add_acls(*f
, uid
);
478 void server_rotate(Server
*s
) {
484 log_debug("Rotating...");
486 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
487 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
489 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
490 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
492 ordered_hashmap_replace(s
->user_journals
, k
, f
);
494 /* Old file has been closed and deallocated */
495 ordered_hashmap_remove(s
->user_journals
, k
);
498 /* Perform any deferred closes which aren't still offlining. */
499 SET_FOREACH(f
, s
->deferred_closes
, i
)
500 if (!journal_file_is_offlining(f
)) {
501 (void) set_remove(s
->deferred_closes
, f
);
502 (void) journal_file_close(f
);
506 void server_sync(Server
*s
) {
511 if (s
->system_journal
) {
512 r
= journal_file_set_offline(s
->system_journal
, false);
514 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
517 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
518 r
= journal_file_set_offline(f
, false);
520 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
523 if (s
->sync_event_source
) {
524 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
526 log_error_errno(r
, "Failed to disable sync timer source: %m");
529 s
->sync_scheduled
= false;
532 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
539 (void) cache_space_refresh(s
, storage
);
542 server_space_usage_message(s
, storage
);
544 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
545 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
546 &s
->oldest_file_usec
, verbose
);
547 if (r
< 0 && r
!= -ENOENT
)
548 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
550 cache_space_invalidate(&storage
->space
);
553 int server_vacuum(Server
*s
, bool verbose
) {
556 log_debug("Vacuuming...");
558 s
->oldest_file_usec
= 0;
560 if (s
->system_journal
)
561 do_vacuum(s
, &s
->system_storage
, verbose
);
562 if (s
->runtime_journal
)
563 do_vacuum(s
, &s
->runtime_storage
, verbose
);
568 static void server_cache_machine_id(Server
*s
) {
574 r
= sd_id128_get_machine(&id
);
578 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
581 static void server_cache_boot_id(Server
*s
) {
587 r
= sd_id128_get_boot(&id
);
591 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
594 static void server_cache_hostname(Server
*s
) {
595 _cleanup_free_
char *t
= NULL
;
600 t
= gethostname_malloc();
604 x
= strappend("_HOSTNAME=", t
);
608 free(s
->hostname_field
);
609 s
->hostname_field
= x
;
612 static bool shall_try_append_again(JournalFile
*f
, int r
) {
615 case -E2BIG
: /* Hit configured limit */
616 case -EFBIG
: /* Hit fs limit */
617 case -EDQUOT
: /* Quota limit hit */
618 case -ENOSPC
: /* Disk full */
619 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
622 case -EIO
: /* I/O error of some kind (mmap) */
623 log_warning("%s: IO error, rotating.", f
->path
);
626 case -EHOSTDOWN
: /* Other machine */
627 log_info("%s: Journal file from other machine, rotating.", f
->path
);
630 case -EBUSY
: /* Unclean shutdown */
631 log_info("%s: Unclean shutdown, rotating.", f
->path
);
634 case -EPROTONOSUPPORT
: /* Unsupported feature */
635 log_info("%s: Unsupported feature, rotating.", f
->path
);
638 case -EBADMSG
: /* Corrupted */
639 case -ENODATA
: /* Truncated */
640 case -ESHUTDOWN
: /* Already archived */
641 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
644 case -EIDRM
: /* Journal file has been deleted */
645 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
648 case -ETXTBSY
: /* Journal file is from the future */
649 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
657 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
658 bool vacuumed
= false, rotate
= false;
659 struct dual_timestamp ts
;
667 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
668 * the source time, and not even the time the event was originally seen, but instead simply the time we started
669 * processing it, as we want strictly linear ordering in what we write out.) */
670 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
671 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
673 if (ts
.realtime
< s
->last_realtime_clock
) {
674 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
675 * regular operation. However, when it does happen, then we should make sure that we start fresh files
676 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
677 * bisection works correctly. */
679 log_debug("Time jumped backwards, rotating.");
683 f
= find_journal(s
, uid
);
687 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
688 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
695 server_vacuum(s
, false);
698 f
= find_journal(s
, uid
);
703 s
->last_realtime_clock
= ts
.realtime
;
705 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
707 server_schedule_sync(s
, priority
);
711 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
712 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
717 server_vacuum(s
, false);
719 f
= find_journal(s
, uid
);
723 log_debug("Retrying write.");
724 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
726 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
728 server_schedule_sync(s
, priority
);
731 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
732 if (isset(value)) { \
734 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
735 sprintf(k, field "=" format, value); \
736 iovec[n++] = IOVEC_MAKE_STRING(k); \
739 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
740 if (!isempty(value)) { \
742 k = strjoina(field "=", value); \
743 iovec[n++] = IOVEC_MAKE_STRING(k); \
746 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
747 if (!sd_id128_is_null(value)) { \
749 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
750 sd_id128_to_string(value, stpcpy(k, field "=")); \
751 iovec[n++] = IOVEC_MAKE_STRING(k); \
754 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
755 if (value_size > 0) { \
757 k = newa(char, STRLEN(field "=") + value_size + 1); \
758 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
759 iovec[n++] = IOVEC_MAKE_STRING(k); \
762 static void dispatch_message_real(
764 struct iovec
*iovec
, size_t n
, size_t m
,
765 const ClientContext
*c
,
766 const struct timeval
*tv
,
770 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
778 N_IOVEC_META_FIELDS
+
779 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
780 client_context_extra_fields_n_iovec(c
) <= m
);
783 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
784 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
785 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
787 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
789 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
792 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
794 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
795 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
797 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
798 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
799 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
800 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
801 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
802 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
803 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
805 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
807 if (c
->extra_fields_n_iovec
> 0) {
808 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
809 n
+= c
->extra_fields_n_iovec
;
815 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
817 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
818 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
819 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
821 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
822 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
823 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
824 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
826 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
828 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
829 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
831 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
832 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
833 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
834 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
835 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
836 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
837 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
839 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
845 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
846 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
849 /* Note that strictly speaking storing the boot id here is
850 * redundant since the entry includes this in-line
851 * anyway. However, we need this indexed, too. */
852 if (!isempty(s
->boot_id_field
))
853 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
855 if (!isempty(s
->machine_id_field
))
856 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
858 if (!isempty(s
->hostname_field
))
859 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
863 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
864 /* Split up strictly by (non-root) UID */
865 journal_uid
= c
->uid
;
866 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
867 /* Split up by login UIDs. We do this only if the
868 * realuid is not root, in order not to accidentally
869 * leak privileged information to the user that is
870 * logged by a privileged process that is part of an
871 * unprivileged session. */
872 journal_uid
= c
->owner_uid
;
876 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
879 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
889 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
890 iovec
= newa(struct iovec
, m
);
892 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
893 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
894 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
896 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
897 assert_cc(6 == LOG_INFO
);
898 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
901 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
904 va_start(ap
, format
);
905 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
906 /* Error handling below */
910 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
913 free(iovec
[k
++].iov_base
);
916 /* We failed to format the message. Emit a warning instead. */
919 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
922 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
923 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
924 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
928 void server_dispatch_message(
930 struct iovec
*iovec
, size_t n
, size_t m
,
932 const struct timeval
*tv
,
936 uint64_t available
= 0;
940 assert(iovec
|| n
== 0);
945 if (LOG_PRI(priority
) > s
->max_level_store
)
948 /* Stop early in case the information will not be stored
950 if (s
->storage
== STORAGE_NONE
)
954 (void) determine_space(s
, &available
, NULL
);
956 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
960 /* Write a suppression message if we suppressed something */
962 server_driver_message(s
, c
->pid
,
963 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
964 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
965 "N_DROPPED=%i", rl
- 1,
969 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
972 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
974 sd_journal
*j
= NULL
;
975 char ts
[FORMAT_TIMESPAN_MAX
];
982 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
985 if (!s
->runtime_journal
)
988 if (require_flag_file
&& !flushed_flag_is_set())
991 (void) system_journal_open(s
, true);
993 if (!s
->system_journal
)
996 log_debug("Flushing to /var...");
998 start
= now(CLOCK_MONOTONIC
);
1000 r
= sd_id128_get_machine(&machine
);
1004 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1006 return log_error_errno(r
, "Failed to read runtime journal: %m");
1008 sd_journal_set_data_threshold(j
, 0);
1010 SD_JOURNAL_FOREACH(j
) {
1014 f
= j
->current_file
;
1015 assert(f
&& f
->current_offset
> 0);
1019 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1021 log_error_errno(r
, "Can't read entry: %m");
1025 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1029 if (!shall_try_append_again(s
->system_journal
, r
)) {
1030 log_error_errno(r
, "Can't write entry: %m");
1035 server_vacuum(s
, false);
1037 if (!s
->system_journal
) {
1038 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1043 log_debug("Retrying write.");
1044 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1046 log_error_errno(r
, "Can't write entry: %m");
1054 journal_file_post_change(s
->system_journal
);
1056 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1059 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1061 sd_journal_close(j
);
1063 server_driver_message(s
, 0, NULL
,
1064 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1065 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1072 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1073 Server
*s
= userdata
;
1074 struct ucred
*ucred
= NULL
;
1075 struct timeval
*tv
= NULL
;
1076 struct cmsghdr
*cmsg
;
1078 size_t label_len
= 0, m
;
1081 int *fds
= NULL
, v
= 0;
1085 struct cmsghdr cmsghdr
;
1087 /* We use NAME_MAX space for the SELinux label
1088 * here. The kernel currently enforces no
1089 * limit, but according to suggestions from
1090 * the SELinux people this will change and it
1091 * will probably be identical to NAME_MAX. For
1092 * now we use that, but this should be updated
1093 * one day when the final limit is known. */
1094 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1095 CMSG_SPACE(sizeof(struct timeval
)) +
1096 CMSG_SPACE(sizeof(int)) + /* fd */
1097 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1100 union sockaddr_union sa
= {};
1102 struct msghdr msghdr
= {
1105 .msg_control
= &control
,
1106 .msg_controllen
= sizeof(control
),
1108 .msg_namelen
= sizeof(sa
),
1112 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1114 if (revents
!= EPOLLIN
) {
1115 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1119 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1121 (void) ioctl(fd
, SIOCINQ
, &v
);
1123 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1124 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1126 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1128 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1131 iovec
.iov_base
= s
->buffer
;
1132 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1134 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1136 if (IN_SET(errno
, EINTR
, EAGAIN
))
1139 return log_error_errno(errno
, "recvmsg() failed: %m");
1142 CMSG_FOREACH(cmsg
, &msghdr
) {
1144 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1145 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1146 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1147 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1148 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1149 cmsg
->cmsg_type
== SCM_SECURITY
) {
1150 label
= (char*) CMSG_DATA(cmsg
);
1151 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1152 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1153 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1154 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1155 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1156 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1157 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1158 fds
= (int*) CMSG_DATA(cmsg
);
1159 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1163 /* And a trailing NUL, just in case */
1166 if (fd
== s
->syslog_fd
) {
1167 if (n
> 0 && n_fds
== 0)
1168 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1170 log_warning("Got file descriptors via syslog socket. Ignoring.");
1172 } else if (fd
== s
->native_fd
) {
1173 if (n
> 0 && n_fds
== 0)
1174 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1175 else if (n
== 0 && n_fds
== 1)
1176 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1178 log_warning("Got too many file descriptors via native socket. Ignoring.");
1181 assert(fd
== s
->audit_fd
);
1183 if (n
> 0 && n_fds
== 0)
1184 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1186 log_warning("Got file descriptors via audit socket. Ignoring.");
1189 close_many(fds
, n_fds
);
1193 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1194 Server
*s
= userdata
;
1199 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1201 (void) server_flush_to_var(s
, false);
1203 server_vacuum(s
, false);
1205 r
= touch("/run/systemd/journal/flushed");
1207 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1209 server_space_usage_message(s
, NULL
);
1213 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1214 Server
*s
= userdata
;
1219 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1221 server_vacuum(s
, true);
1223 if (s
->system_journal
)
1224 patch_min_use(&s
->system_storage
);
1225 if (s
->runtime_journal
)
1226 patch_min_use(&s
->runtime_storage
);
1228 /* Let clients know when the most recent rotation happened. */
1229 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1231 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1236 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1237 Server
*s
= userdata
;
1241 log_received_signal(LOG_INFO
, si
);
1243 sd_event_exit(s
->event
, 0);
1247 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1248 Server
*s
= userdata
;
1253 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1257 /* Let clients know when the most recent sync happened. */
1258 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1260 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1265 static int setup_signals(Server
*s
) {
1270 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1272 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1276 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1280 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1284 /* Let's process SIGTERM late, so that we flush all queued
1285 * messages to disk before we exit */
1286 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1290 /* When journald is invoked on the terminal (when debugging),
1291 * it's useful if C-c is handled equivalent to SIGTERM. */
1292 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1296 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1300 /* SIGRTMIN+1 causes an immediate sync. We process this very
1301 * late, so that everything else queued at this point is
1302 * really written to disk. Clients can watch
1303 * /run/systemd/journal/synced with inotify until its mtime
1304 * changes to see when a sync happened. */
1305 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1309 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1316 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1322 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1324 r
= value
? parse_boolean(value
) : true;
1326 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1328 s
->forward_to_syslog
= r
;
1330 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1332 r
= value
? parse_boolean(value
) : true;
1334 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1336 s
->forward_to_kmsg
= r
;
1338 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1340 r
= value
? parse_boolean(value
) : true;
1342 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1344 s
->forward_to_console
= r
;
1346 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1348 r
= value
? parse_boolean(value
) : true;
1350 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1352 s
->forward_to_wall
= r
;
1354 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1356 if (proc_cmdline_value_missing(key
, value
))
1359 r
= log_level_from_string(value
);
1361 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1363 s
->max_level_console
= r
;
1365 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1367 if (proc_cmdline_value_missing(key
, value
))
1370 r
= log_level_from_string(value
);
1372 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1374 s
->max_level_store
= r
;
1376 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1378 if (proc_cmdline_value_missing(key
, value
))
1381 r
= log_level_from_string(value
);
1383 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1385 s
->max_level_syslog
= r
;
1387 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1389 if (proc_cmdline_value_missing(key
, value
))
1392 r
= log_level_from_string(value
);
1394 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1396 s
->max_level_kmsg
= r
;
1398 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1400 if (proc_cmdline_value_missing(key
, value
))
1403 r
= log_level_from_string(value
);
1405 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1407 s
->max_level_wall
= r
;
1409 } else if (startswith(key
, "systemd.journald"))
1410 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1412 /* do not warn about state here, since probably systemd already did */
1416 static int server_parse_config_file(Server
*s
) {
1419 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1420 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1422 config_item_perf_lookup
, journald_gperf_lookup
,
1423 CONFIG_PARSE_WARN
, s
);
1426 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1427 Server
*s
= userdata
;
1435 int server_schedule_sync(Server
*s
, int priority
) {
1440 if (priority
<= LOG_CRIT
) {
1441 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1446 if (s
->sync_scheduled
)
1449 if (s
->sync_interval_usec
> 0) {
1452 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1456 when
+= s
->sync_interval_usec
;
1458 if (!s
->sync_event_source
) {
1459 r
= sd_event_add_time(
1461 &s
->sync_event_source
,
1464 server_dispatch_sync
, s
);
1468 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1470 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1474 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1479 s
->sync_scheduled
= true;
1485 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1486 Server
*s
= userdata
;
1490 server_cache_hostname(s
);
1494 static int server_open_hostname(Server
*s
) {
1499 s
->hostname_fd
= open("/proc/sys/kernel/hostname", O_RDONLY
|O_CLOEXEC
|O_NDELAY
|O_NOCTTY
);
1500 if (s
->hostname_fd
< 0)
1501 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1503 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1505 /* kernels prior to 3.2 don't support polling this file. Ignore
1508 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1509 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1513 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1516 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1518 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1523 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1524 Server
*s
= userdata
;
1528 assert(s
->notify_event_source
== es
);
1529 assert(s
->notify_fd
== fd
);
1531 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1532 * message on it. Either it's the watchdog event, the initial
1533 * READY=1 event or an stdout stream event. If there's nothing
1534 * to write anymore, turn our event source off. The next time
1535 * there's something to send it will be turned on again. */
1537 if (!s
->sent_notify_ready
) {
1538 static const char p
[] =
1540 "STATUS=Processing requests...";
1543 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1545 if (errno
== EAGAIN
)
1548 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1551 s
->sent_notify_ready
= true;
1552 log_debug("Sent READY=1 notification.");
1554 } else if (s
->send_watchdog
) {
1556 static const char p
[] =
1561 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1563 if (errno
== EAGAIN
)
1566 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1569 s
->send_watchdog
= false;
1570 log_debug("Sent WATCHDOG=1 notification.");
1572 } else if (s
->stdout_streams_notify_queue
)
1573 /* Dispatch one stream notification event */
1574 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1576 /* Leave us enabled if there's still more to do. */
1577 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1580 /* There was nothing to do anymore, let's turn ourselves off. */
1581 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1583 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1588 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1589 Server
*s
= userdata
;
1594 s
->send_watchdog
= true;
1596 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1598 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1600 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1602 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1604 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1606 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1611 static int server_connect_notify(Server
*s
) {
1612 union sockaddr_union sa
= {
1613 .un
.sun_family
= AF_UNIX
,
1619 assert(s
->notify_fd
< 0);
1620 assert(!s
->notify_event_source
);
1623 So here's the problem: we'd like to send notification
1624 messages to PID 1, but we cannot do that via sd_notify(),
1625 since that's synchronous, and we might end up blocking on
1626 it. Specifically: given that PID 1 might block on
1627 dbus-daemon during IPC, and dbus-daemon is logging to us,
1628 and might hence block on us, we might end up in a deadlock
1629 if we block on sending PID 1 notification messages — by
1630 generating a full blocking circle. To avoid this, let's
1631 create a non-blocking socket, and connect it to the
1632 notification socket, and then wait for POLLOUT before we
1633 send anything. This should efficiently avoid any deadlocks,
1634 as we'll never block on PID 1, hence PID 1 can safely block
1635 on dbus-daemon which can safely block on us again.
1637 Don't think that this issue is real? It is, see:
1638 https://github.com/systemd/systemd/issues/1505
1641 e
= getenv("NOTIFY_SOCKET");
1645 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1646 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1650 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1651 log_error("NOTIFY_SOCKET path too long: %s", e
);
1655 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1656 if (s
->notify_fd
< 0)
1657 return log_error_errno(errno
, "Failed to create notify socket: %m");
1659 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1661 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1662 if (sa
.un
.sun_path
[0] == '@')
1663 sa
.un
.sun_path
[0] = 0;
1665 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1667 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1669 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1671 return log_error_errno(r
, "Failed to watch notification socket: %m");
1673 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1674 s
->send_watchdog
= true;
1676 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1678 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1681 /* This should fire pretty soon, which we'll use to send the
1687 int server_init(Server
*s
) {
1688 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1695 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1698 s
->read_kmsg
= true;
1700 s
->watchdog_usec
= USEC_INFINITY
;
1702 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1703 s
->sync_scheduled
= false;
1705 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1706 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1708 s
->forward_to_wall
= true;
1710 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1712 s
->max_level_store
= LOG_DEBUG
;
1713 s
->max_level_syslog
= LOG_DEBUG
;
1714 s
->max_level_kmsg
= LOG_NOTICE
;
1715 s
->max_level_console
= LOG_INFO
;
1716 s
->max_level_wall
= LOG_EMERG
;
1718 s
->line_max
= DEFAULT_LINE_MAX
;
1720 journal_reset_metrics(&s
->system_storage
.metrics
);
1721 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1723 server_parse_config_file(s
);
1725 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1727 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1729 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1730 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1731 s
->rate_limit_interval
, s
->rate_limit_burst
);
1732 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1735 (void) mkdir_p("/run/systemd/journal", 0755);
1737 s
->user_journals
= ordered_hashmap_new(NULL
);
1738 if (!s
->user_journals
)
1741 s
->mmap
= mmap_cache_new();
1745 s
->deferred_closes
= set_new(NULL
);
1746 if (!s
->deferred_closes
)
1749 r
= sd_event_default(&s
->event
);
1751 return log_error_errno(r
, "Failed to create event loop: %m");
1753 n
= sd_listen_fds(true);
1755 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1757 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1759 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1761 if (s
->native_fd
>= 0) {
1762 log_error("Too many native sockets passed.");
1768 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1770 if (s
->stdout_fd
>= 0) {
1771 log_error("Too many stdout sockets passed.");
1777 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1778 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1780 if (s
->syslog_fd
>= 0) {
1781 log_error("Too many /dev/log sockets passed.");
1787 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1789 if (s
->audit_fd
>= 0) {
1790 log_error("Too many audit sockets passed.");
1804 r
= fdset_put(fds
, fd
);
1810 /* Try to restore streams, but don't bother if this fails */
1811 (void) server_restore_streams(s
, fds
);
1813 if (fdset_size(fds
) > 0) {
1814 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1815 fds
= fdset_free(fds
);
1818 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1820 /* always open stdout, syslog, native, and kmsg sockets */
1822 /* systemd-journald.socket: /run/systemd/journal/stdout */
1823 r
= server_open_stdout_socket(s
);
1827 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1828 r
= server_open_syslog_socket(s
);
1832 /* systemd-journald.socket: /run/systemd/journal/socket */
1833 r
= server_open_native_socket(s
);
1838 r
= server_open_dev_kmsg(s
);
1842 /* Unless we got *some* sockets and not audit, open audit socket */
1843 if (s
->audit_fd
>= 0 || no_sockets
) {
1844 r
= server_open_audit(s
);
1849 r
= server_open_kernel_seqnum(s
);
1853 r
= server_open_hostname(s
);
1857 r
= setup_signals(s
);
1861 s
->udev
= udev_new();
1865 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1869 r
= cg_get_root_path(&s
->cgroup_root
);
1873 server_cache_hostname(s
);
1874 server_cache_boot_id(s
);
1875 server_cache_machine_id(s
);
1877 s
->runtime_storage
.name
= "Runtime journal";
1878 s
->system_storage
.name
= "System journal";
1880 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1881 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1882 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1885 (void) server_connect_notify(s
);
1887 (void) client_context_acquire_default(s
);
1889 return system_journal_open(s
, false);
1892 void server_maybe_append_tags(Server
*s
) {
1898 n
= now(CLOCK_REALTIME
);
1900 if (s
->system_journal
)
1901 journal_file_maybe_append_tag(s
->system_journal
, n
);
1903 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1904 journal_file_maybe_append_tag(f
, n
);
1908 void server_done(Server
*s
) {
1911 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1913 while (s
->stdout_streams
)
1914 stdout_stream_free(s
->stdout_streams
);
1916 client_context_flush_all(s
);
1918 if (s
->system_journal
)
1919 (void) journal_file_close(s
->system_journal
);
1921 if (s
->runtime_journal
)
1922 (void) journal_file_close(s
->runtime_journal
);
1924 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1926 sd_event_source_unref(s
->syslog_event_source
);
1927 sd_event_source_unref(s
->native_event_source
);
1928 sd_event_source_unref(s
->stdout_event_source
);
1929 sd_event_source_unref(s
->dev_kmsg_event_source
);
1930 sd_event_source_unref(s
->audit_event_source
);
1931 sd_event_source_unref(s
->sync_event_source
);
1932 sd_event_source_unref(s
->sigusr1_event_source
);
1933 sd_event_source_unref(s
->sigusr2_event_source
);
1934 sd_event_source_unref(s
->sigterm_event_source
);
1935 sd_event_source_unref(s
->sigint_event_source
);
1936 sd_event_source_unref(s
->sigrtmin1_event_source
);
1937 sd_event_source_unref(s
->hostname_event_source
);
1938 sd_event_source_unref(s
->notify_event_source
);
1939 sd_event_source_unref(s
->watchdog_event_source
);
1940 sd_event_unref(s
->event
);
1942 safe_close(s
->syslog_fd
);
1943 safe_close(s
->native_fd
);
1944 safe_close(s
->stdout_fd
);
1945 safe_close(s
->dev_kmsg_fd
);
1946 safe_close(s
->audit_fd
);
1947 safe_close(s
->hostname_fd
);
1948 safe_close(s
->notify_fd
);
1951 journal_rate_limit_free(s
->rate_limit
);
1953 if (s
->kernel_seqnum
)
1954 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1958 free(s
->cgroup_root
);
1959 free(s
->hostname_field
);
1960 free(s
->runtime_storage
.path
);
1961 free(s
->system_storage
.path
);
1964 mmap_cache_unref(s
->mmap
);
1966 udev_unref(s
->udev
);
1969 static const char* const storage_table
[_STORAGE_MAX
] = {
1970 [STORAGE_AUTO
] = "auto",
1971 [STORAGE_VOLATILE
] = "volatile",
1972 [STORAGE_PERSISTENT
] = "persistent",
1973 [STORAGE_NONE
] = "none"
1976 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1977 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1979 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1980 [SPLIT_LOGIN
] = "login",
1981 [SPLIT_UID
] = "uid",
1982 [SPLIT_NONE
] = "none",
1985 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1986 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1988 int config_parse_line_max(
1990 const char *filename
,
1992 const char *section
,
1993 unsigned section_line
,
2008 if (isempty(rvalue
))
2009 /* Empty assignment means default */
2010 *sz
= DEFAULT_LINE_MAX
;
2014 r
= parse_size(rvalue
, 1024, &v
);
2016 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2021 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2022 * terminal size is 80ch, and it might make sense to break one character before the natural
2023 * line break would occur on that. */
2024 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2026 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2027 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2028 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2029 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2030 * fail much earlier anyway. */
2031 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);