1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <selinux/selinux.h>
24 #include <sys/ioctl.h>
26 #include <sys/signalfd.h>
27 #include <sys/statvfs.h>
28 #include <linux/sockios.h>
31 #include "sd-daemon.h"
32 #include "sd-journal.h"
33 #include "sd-messages.h"
36 #include "alloc-util.h"
37 #include "audit-util.h"
38 #include "cgroup-util.h"
39 #include "conf-parser.h"
40 #include "dirent-util.h"
41 #include "extract-word.h"
44 #include "format-util.h"
47 #include "hostname-util.h"
48 #include "id128-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-context.h"
56 #include "journald-kmsg.h"
57 #include "journald-native.h"
58 #include "journald-rate-limit.h"
59 #include "journald-server.h"
60 #include "journald-stream.h"
61 #include "journald-syslog.h"
65 #include "parse-util.h"
66 #include "proc-cmdline.h"
67 #include "process-util.h"
69 #include "selinux-util.h"
70 #include "signal-util.h"
71 #include "socket-util.h"
72 #include "stdio-util.h"
73 #include "string-table.h"
74 #include "string-util.h"
75 #include "syslog-util.h"
76 #include "user-util.h"
78 #define USER_JOURNALS_MAX 1024
80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82 #define DEFAULT_RATE_LIMIT_BURST 1000
83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
89 /* The period to insert between posting changes for coalescing */
90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94 #define DEFAULT_LINE_MAX (48*1024)
96 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
97 _cleanup_closedir_
DIR *d
= NULL
;
106 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
107 errno
, "Failed to open %s: %m", path
);
109 if (fstatvfs(dirfd(d
), &ss
) < 0)
110 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
112 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
114 FOREACH_DIRENT_ALL(de
, d
, break) {
117 if (!endswith(de
->d_name
, ".journal") &&
118 !endswith(de
->d_name
, ".journal~"))
121 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
122 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
126 if (!S_ISREG(st
.st_mode
))
129 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
135 static void cache_space_invalidate(JournalStorageSpace
*space
) {
139 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
140 JournalStorageSpace
*space
;
141 JournalMetrics
*metrics
;
142 uint64_t vfs_used
, vfs_avail
, avail
;
148 metrics
= &storage
->metrics
;
149 space
= &storage
->space
;
151 ts
= now(CLOCK_MONOTONIC
);
153 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
156 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
160 space
->vfs_used
= vfs_used
;
161 space
->vfs_available
= vfs_avail
;
163 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
165 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
166 space
->available
= LESS_BY(space
->limit
, vfs_used
);
167 space
->timestamp
= ts
;
171 static void patch_min_use(JournalStorage
*storage
) {
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
180 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
184 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
190 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
192 r
= cache_space_refresh(s
, js
);
195 *available
= js
->space
.available
;
197 *limit
= js
->space
.limit
;
202 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
203 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
204 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
205 JournalMetrics
*metrics
;
210 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
212 if (cache_space_refresh(s
, storage
) < 0)
215 metrics
= &storage
->metrics
;
216 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
217 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
218 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
219 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
220 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
221 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
223 server_driver_message(s
, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
227 "JOURNAL_NAME=%s", storage
->name
,
228 "JOURNAL_PATH=%s", storage
->path
,
229 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
230 "CURRENT_USE_PRETTY=%s", fb1
,
231 "MAX_USE=%"PRIu64
, metrics
->max_use
,
232 "MAX_USE_PRETTY=%s", fb2
,
233 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
235 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
236 "DISK_AVAILABLE_PRETTY=%s", fb4
,
237 "LIMIT=%"PRIu64
, storage
->space
.limit
,
238 "LIMIT_PRETTY=%s", fb5
,
239 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
240 "AVAILABLE_PRETTY=%s", fb6
,
244 static bool uid_for_system_journal(uid_t uid
) {
246 /* Returns true if the specified UID shall get its data stored in the system journal*/
248 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
251 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
258 if (uid_for_system_journal(uid
))
261 r
= add_acls_for_user(f
->fd
, uid
);
263 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
267 static int open_journal(
273 JournalMetrics
*metrics
,
283 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
285 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
, seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
289 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
291 (void) journal_file_close(f
);
299 static bool flushed_flag_is_set(void) {
300 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
303 static int system_journal_open(Server
*s
, bool flush_requested
) {
307 if (!s
->system_journal
&&
308 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
309 (flush_requested
|| flushed_flag_is_set())) {
311 /* If in auto mode: first try to create the machine
312 * path, but not the prefix.
314 * If in persistent mode: create /var/log/journal and
315 * the machine path */
317 if (s
->storage
== STORAGE_PERSISTENT
)
318 (void) mkdir_p("/var/log/journal/", 0755);
320 (void) mkdir(s
->system_storage
.path
, 0755);
322 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
323 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
325 server_add_acls(s
->system_journal
, 0);
326 (void) cache_space_refresh(s
, &s
->system_storage
);
327 patch_min_use(&s
->system_storage
);
329 if (!IN_SET(r
, -ENOENT
, -EROFS
))
330 log_warning_errno(r
, "Failed to open system journal: %m");
335 /* If the runtime journal is open, and we're post-flush, we're
336 * recovering from a failed system journal rotate (ENOSPC)
337 * for which the runtime journal was reopened.
339 * Perform an implicit flush to var, leaving the runtime
340 * journal closed, now that the system journal is back.
342 if (!flush_requested
)
343 (void) server_flush_to_var(s
, true);
346 if (!s
->runtime_journal
&&
347 (s
->storage
!= STORAGE_NONE
)) {
349 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
351 if (s
->system_journal
) {
353 /* Try to open the runtime journal, but only
354 * if it already exists, so that we can flush
355 * it into the system journal */
357 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
360 log_warning_errno(r
, "Failed to open runtime journal: %m");
367 /* OK, we really need the runtime journal, so create
368 * it if necessary. */
370 (void) mkdir("/run/log", 0755);
371 (void) mkdir("/run/log/journal", 0755);
372 (void) mkdir_parents(fn
, 0750);
374 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
376 return log_error_errno(r
, "Failed to open runtime journal: %m");
379 if (s
->runtime_journal
) {
380 server_add_acls(s
->runtime_journal
, 0);
381 (void) cache_space_refresh(s
, &s
->runtime_storage
);
382 patch_min_use(&s
->runtime_storage
);
389 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
390 _cleanup_free_
char *p
= NULL
;
397 /* A rotate that fails to create the new journal (ENOSPC) leaves the
398 * rotated journal as NULL. Unless we revisit opening, even after
399 * space is made available we'll continue to return NULL indefinitely.
401 * system_journal_open() is a noop if the journals are already open, so
402 * we can just call it here to recover from failed rotates (or anything
403 * else that's left the journals as NULL).
405 * Fixes https://github.com/systemd/systemd/issues/3968 */
406 (void) system_journal_open(s
, false);
408 /* We split up user logs only on /var, not on /run. If the
409 * runtime file is open, we write to it exclusively, in order
410 * to guarantee proper order as soon as we flush /run to
411 * /var and close the runtime file. */
413 if (s
->runtime_journal
)
414 return s
->runtime_journal
;
416 if (uid_for_system_journal(uid
))
417 return s
->system_journal
;
419 r
= sd_id128_get_machine(&machine
);
421 return s
->system_journal
;
423 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
427 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
428 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
429 return s
->system_journal
;
431 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
432 /* Too many open? Then let's close one */
433 f
= ordered_hashmap_steal_first(s
->user_journals
);
435 (void) journal_file_close(f
);
438 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
440 return s
->system_journal
;
442 server_add_acls(f
, uid
);
444 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
446 (void) journal_file_close(f
);
447 return s
->system_journal
;
453 static int do_rotate(
466 r
= journal_file_rotate(f
, s
->compress
, seal
, s
->deferred_closes
);
469 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
471 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
474 server_add_acls(*f
, uid
);
479 void server_rotate(Server
*s
) {
485 log_debug("Rotating...");
487 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
488 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
490 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
491 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
493 ordered_hashmap_replace(s
->user_journals
, k
, f
);
495 /* Old file has been closed and deallocated */
496 ordered_hashmap_remove(s
->user_journals
, k
);
499 /* Perform any deferred closes which aren't still offlining. */
500 SET_FOREACH(f
, s
->deferred_closes
, i
)
501 if (!journal_file_is_offlining(f
)) {
502 (void) set_remove(s
->deferred_closes
, f
);
503 (void) journal_file_close(f
);
507 void server_sync(Server
*s
) {
512 if (s
->system_journal
) {
513 r
= journal_file_set_offline(s
->system_journal
, false);
515 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
518 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
519 r
= journal_file_set_offline(f
, false);
521 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
524 if (s
->sync_event_source
) {
525 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
527 log_error_errno(r
, "Failed to disable sync timer source: %m");
530 s
->sync_scheduled
= false;
533 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
540 (void) cache_space_refresh(s
, storage
);
543 server_space_usage_message(s
, storage
);
545 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
546 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
547 &s
->oldest_file_usec
, verbose
);
548 if (r
< 0 && r
!= -ENOENT
)
549 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
551 cache_space_invalidate(&storage
->space
);
554 int server_vacuum(Server
*s
, bool verbose
) {
557 log_debug("Vacuuming...");
559 s
->oldest_file_usec
= 0;
561 if (s
->system_journal
)
562 do_vacuum(s
, &s
->system_storage
, verbose
);
563 if (s
->runtime_journal
)
564 do_vacuum(s
, &s
->runtime_storage
, verbose
);
569 static void server_cache_machine_id(Server
*s
) {
575 r
= sd_id128_get_machine(&id
);
579 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
582 static void server_cache_boot_id(Server
*s
) {
588 r
= sd_id128_get_boot(&id
);
592 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
595 static void server_cache_hostname(Server
*s
) {
596 _cleanup_free_
char *t
= NULL
;
601 t
= gethostname_malloc();
605 x
= strappend("_HOSTNAME=", t
);
609 free(s
->hostname_field
);
610 s
->hostname_field
= x
;
613 static bool shall_try_append_again(JournalFile
*f
, int r
) {
616 case -E2BIG
: /* Hit configured limit */
617 case -EFBIG
: /* Hit fs limit */
618 case -EDQUOT
: /* Quota limit hit */
619 case -ENOSPC
: /* Disk full */
620 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
623 case -EIO
: /* I/O error of some kind (mmap) */
624 log_warning("%s: IO error, rotating.", f
->path
);
627 case -EHOSTDOWN
: /* Other machine */
628 log_info("%s: Journal file from other machine, rotating.", f
->path
);
631 case -EBUSY
: /* Unclean shutdown */
632 log_info("%s: Unclean shutdown, rotating.", f
->path
);
635 case -EPROTONOSUPPORT
: /* Unsupported feature */
636 log_info("%s: Unsupported feature, rotating.", f
->path
);
639 case -EBADMSG
: /* Corrupted */
640 case -ENODATA
: /* Truncated */
641 case -ESHUTDOWN
: /* Already archived */
642 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
645 case -EIDRM
: /* Journal file has been deleted */
646 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
649 case -ETXTBSY
: /* Journal file is from the future */
650 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
658 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
659 bool vacuumed
= false, rotate
= false;
660 struct dual_timestamp ts
;
668 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
669 * the source time, and not even the time the event was originally seen, but instead simply the time we started
670 * processing it, as we want strictly linear ordering in what we write out.) */
671 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
672 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
674 if (ts
.realtime
< s
->last_realtime_clock
) {
675 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
676 * regular operation. However, when it does happen, then we should make sure that we start fresh files
677 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
678 * bisection works correctly. */
680 log_debug("Time jumped backwards, rotating.");
684 f
= find_journal(s
, uid
);
688 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
689 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
696 server_vacuum(s
, false);
699 f
= find_journal(s
, uid
);
704 s
->last_realtime_clock
= ts
.realtime
;
706 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
708 server_schedule_sync(s
, priority
);
712 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
713 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
718 server_vacuum(s
, false);
720 f
= find_journal(s
, uid
);
724 log_debug("Retrying write.");
725 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
727 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
729 server_schedule_sync(s
, priority
);
732 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
733 if (isset(value)) { \
735 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
736 sprintf(k, field "=" format, value); \
737 iovec[n++] = IOVEC_MAKE_STRING(k); \
740 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
741 if (!isempty(value)) { \
743 k = strjoina(field "=", value); \
744 iovec[n++] = IOVEC_MAKE_STRING(k); \
747 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
748 if (!sd_id128_is_null(value)) { \
750 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
751 sd_id128_to_string(value, stpcpy(k, field "=")); \
752 iovec[n++] = IOVEC_MAKE_STRING(k); \
755 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
756 if (value_size > 0) { \
758 k = newa(char, STRLEN(field "=") + value_size + 1); \
759 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
760 iovec[n++] = IOVEC_MAKE_STRING(k); \
763 static void dispatch_message_real(
765 struct iovec
*iovec
, size_t n
, size_t m
,
766 const ClientContext
*c
,
767 const struct timeval
*tv
,
771 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
779 N_IOVEC_META_FIELDS
+
780 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
781 client_context_extra_fields_n_iovec(c
) <= m
);
784 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
785 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
786 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
788 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
789 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
790 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
793 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
795 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
796 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
798 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
799 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
800 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
801 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
802 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
803 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
804 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
806 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
808 if (c
->extra_fields_n_iovec
> 0) {
809 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
810 n
+= c
->extra_fields_n_iovec
;
816 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
818 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
819 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
820 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
822 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
823 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
824 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
825 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
827 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
829 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
830 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
832 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
833 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
834 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
835 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
836 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
837 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
838 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
840 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
846 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
847 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
850 /* Note that strictly speaking storing the boot id here is
851 * redundant since the entry includes this in-line
852 * anyway. However, we need this indexed, too. */
853 if (!isempty(s
->boot_id_field
))
854 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
856 if (!isempty(s
->machine_id_field
))
857 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
859 if (!isempty(s
->hostname_field
))
860 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
864 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
865 /* Split up strictly by (non-root) UID */
866 journal_uid
= c
->uid
;
867 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
868 /* Split up by login UIDs. We do this only if the
869 * realuid is not root, in order not to accidentally
870 * leak privileged information to the user that is
871 * logged by a privileged process that is part of an
872 * unprivileged session. */
873 journal_uid
= c
->owner_uid
;
877 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
880 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
890 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
891 iovec
= newa(struct iovec
, m
);
893 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
894 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
895 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
897 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
898 assert_cc(6 == LOG_INFO
);
899 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
902 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
905 va_start(ap
, format
);
906 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
907 /* Error handling below */
911 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
914 free(iovec
[k
++].iov_base
);
917 /* We failed to format the message. Emit a warning instead. */
920 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
923 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
924 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
925 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
929 void server_dispatch_message(
931 struct iovec
*iovec
, size_t n
, size_t m
,
933 const struct timeval
*tv
,
937 uint64_t available
= 0;
941 assert(iovec
|| n
== 0);
946 if (LOG_PRI(priority
) > s
->max_level_store
)
949 /* Stop early in case the information will not be stored
951 if (s
->storage
== STORAGE_NONE
)
955 (void) determine_space(s
, &available
, NULL
);
957 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
961 /* Write a suppression message if we suppressed something */
963 server_driver_message(s
, c
->pid
,
964 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
965 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
966 "N_DROPPED=%i", rl
- 1,
970 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
973 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
975 sd_journal
*j
= NULL
;
976 char ts
[FORMAT_TIMESPAN_MAX
];
983 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
986 if (!s
->runtime_journal
)
989 if (require_flag_file
&& !flushed_flag_is_set())
992 (void) system_journal_open(s
, true);
994 if (!s
->system_journal
)
997 log_debug("Flushing to /var...");
999 start
= now(CLOCK_MONOTONIC
);
1001 r
= sd_id128_get_machine(&machine
);
1005 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1007 return log_error_errno(r
, "Failed to read runtime journal: %m");
1009 sd_journal_set_data_threshold(j
, 0);
1011 SD_JOURNAL_FOREACH(j
) {
1015 f
= j
->current_file
;
1016 assert(f
&& f
->current_offset
> 0);
1020 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1022 log_error_errno(r
, "Can't read entry: %m");
1026 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1030 if (!shall_try_append_again(s
->system_journal
, r
)) {
1031 log_error_errno(r
, "Can't write entry: %m");
1036 server_vacuum(s
, false);
1038 if (!s
->system_journal
) {
1039 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1044 log_debug("Retrying write.");
1045 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1047 log_error_errno(r
, "Can't write entry: %m");
1055 journal_file_post_change(s
->system_journal
);
1057 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1060 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1062 sd_journal_close(j
);
1064 server_driver_message(s
, 0, NULL
,
1065 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1066 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1073 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1074 Server
*s
= userdata
;
1075 struct ucred
*ucred
= NULL
;
1076 struct timeval
*tv
= NULL
;
1077 struct cmsghdr
*cmsg
;
1079 size_t label_len
= 0, m
;
1082 int *fds
= NULL
, v
= 0;
1086 struct cmsghdr cmsghdr
;
1088 /* We use NAME_MAX space for the SELinux label
1089 * here. The kernel currently enforces no
1090 * limit, but according to suggestions from
1091 * the SELinux people this will change and it
1092 * will probably be identical to NAME_MAX. For
1093 * now we use that, but this should be updated
1094 * one day when the final limit is known. */
1095 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1096 CMSG_SPACE(sizeof(struct timeval
)) +
1097 CMSG_SPACE(sizeof(int)) + /* fd */
1098 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1101 union sockaddr_union sa
= {};
1103 struct msghdr msghdr
= {
1106 .msg_control
= &control
,
1107 .msg_controllen
= sizeof(control
),
1109 .msg_namelen
= sizeof(sa
),
1113 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1115 if (revents
!= EPOLLIN
) {
1116 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1120 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1122 (void) ioctl(fd
, SIOCINQ
, &v
);
1124 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1125 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1127 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1129 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1132 iovec
.iov_base
= s
->buffer
;
1133 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1135 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1137 if (IN_SET(errno
, EINTR
, EAGAIN
))
1140 return log_error_errno(errno
, "recvmsg() failed: %m");
1143 CMSG_FOREACH(cmsg
, &msghdr
) {
1145 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1146 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1147 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1148 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1149 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1150 cmsg
->cmsg_type
== SCM_SECURITY
) {
1151 label
= (char*) CMSG_DATA(cmsg
);
1152 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1153 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1154 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1155 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1156 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1157 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1158 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1159 fds
= (int*) CMSG_DATA(cmsg
);
1160 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1164 /* And a trailing NUL, just in case */
1167 if (fd
== s
->syslog_fd
) {
1168 if (n
> 0 && n_fds
== 0)
1169 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1171 log_warning("Got file descriptors via syslog socket. Ignoring.");
1173 } else if (fd
== s
->native_fd
) {
1174 if (n
> 0 && n_fds
== 0)
1175 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1176 else if (n
== 0 && n_fds
== 1)
1177 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1179 log_warning("Got too many file descriptors via native socket. Ignoring.");
1182 assert(fd
== s
->audit_fd
);
1184 if (n
> 0 && n_fds
== 0)
1185 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1187 log_warning("Got file descriptors via audit socket. Ignoring.");
1190 close_many(fds
, n_fds
);
1194 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1195 Server
*s
= userdata
;
1200 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1202 (void) server_flush_to_var(s
, false);
1204 server_vacuum(s
, false);
1206 r
= touch("/run/systemd/journal/flushed");
1208 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1210 server_space_usage_message(s
, NULL
);
1214 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1215 Server
*s
= userdata
;
1220 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1222 server_vacuum(s
, true);
1224 if (s
->system_journal
)
1225 patch_min_use(&s
->system_storage
);
1226 if (s
->runtime_journal
)
1227 patch_min_use(&s
->runtime_storage
);
1229 /* Let clients know when the most recent rotation happened. */
1230 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1232 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1237 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1238 Server
*s
= userdata
;
1242 log_received_signal(LOG_INFO
, si
);
1244 sd_event_exit(s
->event
, 0);
1248 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1249 Server
*s
= userdata
;
1254 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1258 /* Let clients know when the most recent sync happened. */
1259 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1261 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1266 static int setup_signals(Server
*s
) {
1271 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1273 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1277 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1281 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1285 /* Let's process SIGTERM late, so that we flush all queued
1286 * messages to disk before we exit */
1287 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1291 /* When journald is invoked on the terminal (when debugging),
1292 * it's useful if C-c is handled equivalent to SIGTERM. */
1293 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1297 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1301 /* SIGRTMIN+1 causes an immediate sync. We process this very
1302 * late, so that everything else queued at this point is
1303 * really written to disk. Clients can watch
1304 * /run/systemd/journal/synced with inotify until its mtime
1305 * changes to see when a sync happened. */
1306 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1310 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1317 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1323 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1325 r
= value
? parse_boolean(value
) : true;
1327 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1329 s
->forward_to_syslog
= r
;
1331 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1333 r
= value
? parse_boolean(value
) : true;
1335 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1337 s
->forward_to_kmsg
= r
;
1339 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1341 r
= value
? parse_boolean(value
) : true;
1343 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1345 s
->forward_to_console
= r
;
1347 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1349 r
= value
? parse_boolean(value
) : true;
1351 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1353 s
->forward_to_wall
= r
;
1355 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1357 if (proc_cmdline_value_missing(key
, value
))
1360 r
= log_level_from_string(value
);
1362 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1364 s
->max_level_console
= r
;
1366 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1368 if (proc_cmdline_value_missing(key
, value
))
1371 r
= log_level_from_string(value
);
1373 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1375 s
->max_level_store
= r
;
1377 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1379 if (proc_cmdline_value_missing(key
, value
))
1382 r
= log_level_from_string(value
);
1384 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1386 s
->max_level_syslog
= r
;
1388 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1390 if (proc_cmdline_value_missing(key
, value
))
1393 r
= log_level_from_string(value
);
1395 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1397 s
->max_level_kmsg
= r
;
1399 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1401 if (proc_cmdline_value_missing(key
, value
))
1404 r
= log_level_from_string(value
);
1406 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1408 s
->max_level_wall
= r
;
1410 } else if (startswith(key
, "systemd.journald"))
1411 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1413 /* do not warn about state here, since probably systemd already did */
1417 static int server_parse_config_file(Server
*s
) {
1420 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1421 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1423 config_item_perf_lookup
, journald_gperf_lookup
,
1424 CONFIG_PARSE_WARN
, s
);
1427 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1428 Server
*s
= userdata
;
1436 int server_schedule_sync(Server
*s
, int priority
) {
1441 if (priority
<= LOG_CRIT
) {
1442 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1447 if (s
->sync_scheduled
)
1450 if (s
->sync_interval_usec
> 0) {
1453 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1457 when
+= s
->sync_interval_usec
;
1459 if (!s
->sync_event_source
) {
1460 r
= sd_event_add_time(
1462 &s
->sync_event_source
,
1465 server_dispatch_sync
, s
);
1469 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1471 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1475 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1480 s
->sync_scheduled
= true;
1486 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1487 Server
*s
= userdata
;
1491 server_cache_hostname(s
);
1495 static int server_open_hostname(Server
*s
) {
1500 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1501 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1502 if (s
->hostname_fd
< 0)
1503 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1505 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1507 /* kernels prior to 3.2 don't support polling this file. Ignore
1510 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1511 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1515 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1518 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1520 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1525 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1526 Server
*s
= userdata
;
1530 assert(s
->notify_event_source
== es
);
1531 assert(s
->notify_fd
== fd
);
1533 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1534 * message on it. Either it's the watchdog event, the initial
1535 * READY=1 event or an stdout stream event. If there's nothing
1536 * to write anymore, turn our event source off. The next time
1537 * there's something to send it will be turned on again. */
1539 if (!s
->sent_notify_ready
) {
1540 static const char p
[] =
1542 "STATUS=Processing requests...";
1545 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1547 if (errno
== EAGAIN
)
1550 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1553 s
->sent_notify_ready
= true;
1554 log_debug("Sent READY=1 notification.");
1556 } else if (s
->send_watchdog
) {
1558 static const char p
[] =
1563 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1565 if (errno
== EAGAIN
)
1568 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1571 s
->send_watchdog
= false;
1572 log_debug("Sent WATCHDOG=1 notification.");
1574 } else if (s
->stdout_streams_notify_queue
)
1575 /* Dispatch one stream notification event */
1576 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1578 /* Leave us enabled if there's still more to do. */
1579 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1582 /* There was nothing to do anymore, let's turn ourselves off. */
1583 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1585 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1590 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1591 Server
*s
= userdata
;
1596 s
->send_watchdog
= true;
1598 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1600 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1602 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1604 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1606 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1608 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1613 static int server_connect_notify(Server
*s
) {
1614 union sockaddr_union sa
= {
1615 .un
.sun_family
= AF_UNIX
,
1621 assert(s
->notify_fd
< 0);
1622 assert(!s
->notify_event_source
);
1625 So here's the problem: we'd like to send notification
1626 messages to PID 1, but we cannot do that via sd_notify(),
1627 since that's synchronous, and we might end up blocking on
1628 it. Specifically: given that PID 1 might block on
1629 dbus-daemon during IPC, and dbus-daemon is logging to us,
1630 and might hence block on us, we might end up in a deadlock
1631 if we block on sending PID 1 notification messages — by
1632 generating a full blocking circle. To avoid this, let's
1633 create a non-blocking socket, and connect it to the
1634 notification socket, and then wait for POLLOUT before we
1635 send anything. This should efficiently avoid any deadlocks,
1636 as we'll never block on PID 1, hence PID 1 can safely block
1637 on dbus-daemon which can safely block on us again.
1639 Don't think that this issue is real? It is, see:
1640 https://github.com/systemd/systemd/issues/1505
1643 e
= getenv("NOTIFY_SOCKET");
1647 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1648 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1652 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1653 log_error("NOTIFY_SOCKET path too long: %s", e
);
1657 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1658 if (s
->notify_fd
< 0)
1659 return log_error_errno(errno
, "Failed to create notify socket: %m");
1661 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1663 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1664 if (sa
.un
.sun_path
[0] == '@')
1665 sa
.un
.sun_path
[0] = 0;
1667 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1669 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1671 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1673 return log_error_errno(r
, "Failed to watch notification socket: %m");
1675 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1676 s
->send_watchdog
= true;
1678 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1680 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1683 /* This should fire pretty soon, which we'll use to send the
1689 int server_init(Server
*s
) {
1690 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1697 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1700 s
->read_kmsg
= true;
1702 s
->watchdog_usec
= USEC_INFINITY
;
1704 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1705 s
->sync_scheduled
= false;
1707 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1708 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1710 s
->forward_to_wall
= true;
1712 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1714 s
->max_level_store
= LOG_DEBUG
;
1715 s
->max_level_syslog
= LOG_DEBUG
;
1716 s
->max_level_kmsg
= LOG_NOTICE
;
1717 s
->max_level_console
= LOG_INFO
;
1718 s
->max_level_wall
= LOG_EMERG
;
1720 s
->line_max
= DEFAULT_LINE_MAX
;
1722 journal_reset_metrics(&s
->system_storage
.metrics
);
1723 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1725 server_parse_config_file(s
);
1727 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1729 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1731 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1732 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1733 s
->rate_limit_interval
, s
->rate_limit_burst
);
1734 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1737 (void) mkdir_p("/run/systemd/journal", 0755);
1739 s
->user_journals
= ordered_hashmap_new(NULL
);
1740 if (!s
->user_journals
)
1743 s
->mmap
= mmap_cache_new();
1747 s
->deferred_closes
= set_new(NULL
);
1748 if (!s
->deferred_closes
)
1751 r
= sd_event_default(&s
->event
);
1753 return log_error_errno(r
, "Failed to create event loop: %m");
1755 n
= sd_listen_fds(true);
1757 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1759 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1761 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1763 if (s
->native_fd
>= 0) {
1764 log_error("Too many native sockets passed.");
1770 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1772 if (s
->stdout_fd
>= 0) {
1773 log_error("Too many stdout sockets passed.");
1779 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1780 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1782 if (s
->syslog_fd
>= 0) {
1783 log_error("Too many /dev/log sockets passed.");
1789 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1791 if (s
->audit_fd
>= 0) {
1792 log_error("Too many audit sockets passed.");
1806 r
= fdset_put(fds
, fd
);
1812 /* Try to restore streams, but don't bother if this fails */
1813 (void) server_restore_streams(s
, fds
);
1815 if (fdset_size(fds
) > 0) {
1816 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1817 fds
= fdset_free(fds
);
1820 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1822 /* always open stdout, syslog, native, and kmsg sockets */
1824 /* systemd-journald.socket: /run/systemd/journal/stdout */
1825 r
= server_open_stdout_socket(s
);
1829 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1830 r
= server_open_syslog_socket(s
);
1834 /* systemd-journald.socket: /run/systemd/journal/socket */
1835 r
= server_open_native_socket(s
);
1840 r
= server_open_dev_kmsg(s
);
1844 /* Unless we got *some* sockets and not audit, open audit socket */
1845 if (s
->audit_fd
>= 0 || no_sockets
) {
1846 r
= server_open_audit(s
);
1851 r
= server_open_kernel_seqnum(s
);
1855 r
= server_open_hostname(s
);
1859 r
= setup_signals(s
);
1863 s
->udev
= udev_new();
1867 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1871 r
= cg_get_root_path(&s
->cgroup_root
);
1875 server_cache_hostname(s
);
1876 server_cache_boot_id(s
);
1877 server_cache_machine_id(s
);
1879 s
->runtime_storage
.name
= "Runtime journal";
1880 s
->system_storage
.name
= "System journal";
1882 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1883 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1884 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1887 (void) server_connect_notify(s
);
1889 (void) client_context_acquire_default(s
);
1891 return system_journal_open(s
, false);
1894 void server_maybe_append_tags(Server
*s
) {
1900 n
= now(CLOCK_REALTIME
);
1902 if (s
->system_journal
)
1903 journal_file_maybe_append_tag(s
->system_journal
, n
);
1905 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1906 journal_file_maybe_append_tag(f
, n
);
1910 void server_done(Server
*s
) {
1913 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1915 while (s
->stdout_streams
)
1916 stdout_stream_free(s
->stdout_streams
);
1918 client_context_flush_all(s
);
1920 if (s
->system_journal
)
1921 (void) journal_file_close(s
->system_journal
);
1923 if (s
->runtime_journal
)
1924 (void) journal_file_close(s
->runtime_journal
);
1926 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1928 sd_event_source_unref(s
->syslog_event_source
);
1929 sd_event_source_unref(s
->native_event_source
);
1930 sd_event_source_unref(s
->stdout_event_source
);
1931 sd_event_source_unref(s
->dev_kmsg_event_source
);
1932 sd_event_source_unref(s
->audit_event_source
);
1933 sd_event_source_unref(s
->sync_event_source
);
1934 sd_event_source_unref(s
->sigusr1_event_source
);
1935 sd_event_source_unref(s
->sigusr2_event_source
);
1936 sd_event_source_unref(s
->sigterm_event_source
);
1937 sd_event_source_unref(s
->sigint_event_source
);
1938 sd_event_source_unref(s
->sigrtmin1_event_source
);
1939 sd_event_source_unref(s
->hostname_event_source
);
1940 sd_event_source_unref(s
->notify_event_source
);
1941 sd_event_source_unref(s
->watchdog_event_source
);
1942 sd_event_unref(s
->event
);
1944 safe_close(s
->syslog_fd
);
1945 safe_close(s
->native_fd
);
1946 safe_close(s
->stdout_fd
);
1947 safe_close(s
->dev_kmsg_fd
);
1948 safe_close(s
->audit_fd
);
1949 safe_close(s
->hostname_fd
);
1950 safe_close(s
->notify_fd
);
1953 journal_rate_limit_free(s
->rate_limit
);
1955 if (s
->kernel_seqnum
)
1956 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1960 free(s
->cgroup_root
);
1961 free(s
->hostname_field
);
1962 free(s
->runtime_storage
.path
);
1963 free(s
->system_storage
.path
);
1966 mmap_cache_unref(s
->mmap
);
1968 udev_unref(s
->udev
);
1971 static const char* const storage_table
[_STORAGE_MAX
] = {
1972 [STORAGE_AUTO
] = "auto",
1973 [STORAGE_VOLATILE
] = "volatile",
1974 [STORAGE_PERSISTENT
] = "persistent",
1975 [STORAGE_NONE
] = "none"
1978 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1979 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1981 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1982 [SPLIT_LOGIN
] = "login",
1983 [SPLIT_UID
] = "uid",
1984 [SPLIT_NONE
] = "none",
1987 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1988 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1990 int config_parse_line_max(
1992 const char *filename
,
1994 const char *section
,
1995 unsigned section_line
,
2010 if (isempty(rvalue
))
2011 /* Empty assignment means default */
2012 *sz
= DEFAULT_LINE_MAX
;
2016 r
= parse_size(rvalue
, 1024, &v
);
2018 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2023 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2024 * terminal size is 80ch, and it might make sense to break one character before the natural
2025 * line break would occur on that. */
2026 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2028 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2029 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2030 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2031 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2032 * fail much earlier anyway. */
2033 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);