1 /* SPDX-License-Identifier: LGPL-2.1+ */
3 This file is part of systemd.
5 Copyright 2011 Lennart Poettering
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
22 #include <selinux/selinux.h>
24 #include <sys/ioctl.h>
26 #include <sys/signalfd.h>
27 #include <sys/statvfs.h>
28 #include <linux/sockios.h>
31 #include "sd-daemon.h"
32 #include "sd-journal.h"
33 #include "sd-messages.h"
36 #include "alloc-util.h"
37 #include "audit-util.h"
38 #include "cgroup-util.h"
39 #include "conf-parser.h"
40 #include "dirent-util.h"
41 #include "extract-word.h"
44 #include "format-util.h"
47 #include "hostname-util.h"
48 #include "id128-util.h"
50 #include "journal-authenticate.h"
51 #include "journal-file.h"
52 #include "journal-internal.h"
53 #include "journal-vacuum.h"
54 #include "journald-audit.h"
55 #include "journald-context.h"
56 #include "journald-kmsg.h"
57 #include "journald-native.h"
58 #include "journald-rate-limit.h"
59 #include "journald-server.h"
60 #include "journald-stream.h"
61 #include "journald-syslog.h"
65 #include "parse-util.h"
66 #include "proc-cmdline.h"
67 #include "process-util.h"
69 #include "selinux-util.h"
70 #include "signal-util.h"
71 #include "socket-util.h"
72 #include "stdio-util.h"
73 #include "string-table.h"
74 #include "string-util.h"
75 #include "syslog-util.h"
76 #include "user-util.h"
78 #define USER_JOURNALS_MAX 1024
80 #define DEFAULT_SYNC_INTERVAL_USEC (5*USEC_PER_MINUTE)
81 #define DEFAULT_RATE_LIMIT_INTERVAL (30*USEC_PER_SEC)
82 #define DEFAULT_RATE_LIMIT_BURST 10000
83 #define DEFAULT_MAX_FILE_USEC USEC_PER_MONTH
85 #define RECHECK_SPACE_USEC (30*USEC_PER_SEC)
87 #define NOTIFY_SNDBUF_SIZE (8*1024*1024)
89 /* The period to insert between posting changes for coalescing */
90 #define POST_CHANGE_TIMER_INTERVAL_USEC (250*USEC_PER_MSEC)
92 /* Pick a good default that is likely to fit into AF_UNIX and AF_INET SOCK_DGRAM datagrams, and even leaves some room
93 * for a bit of additional metadata. */
94 #define DEFAULT_LINE_MAX (48*1024)
96 static int determine_path_usage(Server
*s
, const char *path
, uint64_t *ret_used
, uint64_t *ret_free
) {
97 _cleanup_closedir_
DIR *d
= NULL
;
106 return log_full_errno(errno
== ENOENT
? LOG_DEBUG
: LOG_ERR
,
107 errno
, "Failed to open %s: %m", path
);
109 if (fstatvfs(dirfd(d
), &ss
) < 0)
110 return log_error_errno(errno
, "Failed to fstatvfs(%s): %m", path
);
112 *ret_free
= ss
.f_bsize
* ss
.f_bavail
;
114 FOREACH_DIRENT_ALL(de
, d
, break) {
117 if (!endswith(de
->d_name
, ".journal") &&
118 !endswith(de
->d_name
, ".journal~"))
121 if (fstatat(dirfd(d
), de
->d_name
, &st
, AT_SYMLINK_NOFOLLOW
) < 0) {
122 log_debug_errno(errno
, "Failed to stat %s/%s, ignoring: %m", path
, de
->d_name
);
126 if (!S_ISREG(st
.st_mode
))
129 *ret_used
+= (uint64_t) st
.st_blocks
* 512UL;
135 static void cache_space_invalidate(JournalStorageSpace
*space
) {
139 static int cache_space_refresh(Server
*s
, JournalStorage
*storage
) {
140 JournalStorageSpace
*space
;
141 JournalMetrics
*metrics
;
142 uint64_t vfs_used
, vfs_avail
, avail
;
148 metrics
= &storage
->metrics
;
149 space
= &storage
->space
;
151 ts
= now(CLOCK_MONOTONIC
);
153 if (space
->timestamp
!= 0 && space
->timestamp
+ RECHECK_SPACE_USEC
> ts
)
156 r
= determine_path_usage(s
, storage
->path
, &vfs_used
, &vfs_avail
);
160 space
->vfs_used
= vfs_used
;
161 space
->vfs_available
= vfs_avail
;
163 avail
= LESS_BY(vfs_avail
, metrics
->keep_free
);
165 space
->limit
= MIN(MAX(vfs_used
+ avail
, metrics
->min_use
), metrics
->max_use
);
166 space
->available
= LESS_BY(space
->limit
, vfs_used
);
167 space
->timestamp
= ts
;
171 static void patch_min_use(JournalStorage
*storage
) {
174 /* Let's bump the min_use limit to the current usage on disk. We do
175 * this when starting up and first opening the journal files. This way
176 * sudden spikes in disk usage will not cause journald to vacuum files
177 * without bounds. Note that this means that only a restart of journald
178 * will make it reset this value. */
180 storage
->metrics
.min_use
= MAX(storage
->metrics
.min_use
, storage
->space
.vfs_used
);
184 static int determine_space(Server
*s
, uint64_t *available
, uint64_t *limit
) {
190 js
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
192 r
= cache_space_refresh(s
, js
);
195 *available
= js
->space
.available
;
197 *limit
= js
->space
.limit
;
202 void server_space_usage_message(Server
*s
, JournalStorage
*storage
) {
203 char fb1
[FORMAT_BYTES_MAX
], fb2
[FORMAT_BYTES_MAX
], fb3
[FORMAT_BYTES_MAX
],
204 fb4
[FORMAT_BYTES_MAX
], fb5
[FORMAT_BYTES_MAX
], fb6
[FORMAT_BYTES_MAX
];
205 JournalMetrics
*metrics
;
210 storage
= s
->system_journal
? &s
->system_storage
: &s
->runtime_storage
;
212 if (cache_space_refresh(s
, storage
) < 0)
215 metrics
= &storage
->metrics
;
216 format_bytes(fb1
, sizeof(fb1
), storage
->space
.vfs_used
);
217 format_bytes(fb2
, sizeof(fb2
), metrics
->max_use
);
218 format_bytes(fb3
, sizeof(fb3
), metrics
->keep_free
);
219 format_bytes(fb4
, sizeof(fb4
), storage
->space
.vfs_available
);
220 format_bytes(fb5
, sizeof(fb5
), storage
->space
.limit
);
221 format_bytes(fb6
, sizeof(fb6
), storage
->space
.available
);
223 server_driver_message(s
, 0,
224 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_USAGE_STR
,
225 LOG_MESSAGE("%s (%s) is %s, max %s, %s free.",
226 storage
->name
, storage
->path
, fb1
, fb5
, fb6
),
227 "JOURNAL_NAME=%s", storage
->name
,
228 "JOURNAL_PATH=%s", storage
->path
,
229 "CURRENT_USE=%"PRIu64
, storage
->space
.vfs_used
,
230 "CURRENT_USE_PRETTY=%s", fb1
,
231 "MAX_USE=%"PRIu64
, metrics
->max_use
,
232 "MAX_USE_PRETTY=%s", fb2
,
233 "DISK_KEEP_FREE=%"PRIu64
, metrics
->keep_free
,
234 "DISK_KEEP_FREE_PRETTY=%s", fb3
,
235 "DISK_AVAILABLE=%"PRIu64
, storage
->space
.vfs_available
,
236 "DISK_AVAILABLE_PRETTY=%s", fb4
,
237 "LIMIT=%"PRIu64
, storage
->space
.limit
,
238 "LIMIT_PRETTY=%s", fb5
,
239 "AVAILABLE=%"PRIu64
, storage
->space
.available
,
240 "AVAILABLE_PRETTY=%s", fb6
,
244 static bool uid_for_system_journal(uid_t uid
) {
246 /* Returns true if the specified UID shall get its data stored in the system journal*/
248 return uid_is_system(uid
) || uid_is_dynamic(uid
) || uid
== UID_NOBODY
;
251 static void server_add_acls(JournalFile
*f
, uid_t uid
) {
258 if (uid_for_system_journal(uid
))
261 r
= add_acls_for_user(f
->fd
, uid
);
263 log_warning_errno(r
, "Failed to set ACL on %s, ignoring: %m", f
->path
);
267 static int open_journal(
273 JournalMetrics
*metrics
,
283 r
= journal_file_open_reliably(fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
,
284 seal
, metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
286 r
= journal_file_open(-1, fname
, flags
, 0640, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
,
287 metrics
, s
->mmap
, s
->deferred_closes
, NULL
, &f
);
292 r
= journal_file_enable_post_change_timer(f
, s
->event
, POST_CHANGE_TIMER_INTERVAL_USEC
);
294 (void) journal_file_close(f
);
302 static bool flushed_flag_is_set(void) {
303 return access("/run/systemd/journal/flushed", F_OK
) >= 0;
306 static int system_journal_open(Server
*s
, bool flush_requested
) {
310 if (!s
->system_journal
&&
311 IN_SET(s
->storage
, STORAGE_PERSISTENT
, STORAGE_AUTO
) &&
312 (flush_requested
|| flushed_flag_is_set())) {
314 /* If in auto mode: first try to create the machine
315 * path, but not the prefix.
317 * If in persistent mode: create /var/log/journal and
318 * the machine path */
320 if (s
->storage
== STORAGE_PERSISTENT
)
321 (void) mkdir_p("/var/log/journal/", 0755);
323 (void) mkdir(s
->system_storage
.path
, 0755);
325 fn
= strjoina(s
->system_storage
.path
, "/system.journal");
326 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &s
->system_journal
);
328 server_add_acls(s
->system_journal
, 0);
329 (void) cache_space_refresh(s
, &s
->system_storage
);
330 patch_min_use(&s
->system_storage
);
332 if (!IN_SET(r
, -ENOENT
, -EROFS
))
333 log_warning_errno(r
, "Failed to open system journal: %m");
338 /* If the runtime journal is open, and we're post-flush, we're
339 * recovering from a failed system journal rotate (ENOSPC)
340 * for which the runtime journal was reopened.
342 * Perform an implicit flush to var, leaving the runtime
343 * journal closed, now that the system journal is back.
345 if (!flush_requested
)
346 (void) server_flush_to_var(s
, true);
349 if (!s
->runtime_journal
&&
350 (s
->storage
!= STORAGE_NONE
)) {
352 fn
= strjoina(s
->runtime_storage
.path
, "/system.journal");
354 if (s
->system_journal
) {
356 /* Try to open the runtime journal, but only
357 * if it already exists, so that we can flush
358 * it into the system journal */
360 r
= open_journal(s
, false, fn
, O_RDWR
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
363 log_warning_errno(r
, "Failed to open runtime journal: %m");
370 /* OK, we really need the runtime journal, so create
371 * it if necessary. */
373 (void) mkdir("/run/log", 0755);
374 (void) mkdir("/run/log/journal", 0755);
375 (void) mkdir_parents(fn
, 0750);
377 r
= open_journal(s
, true, fn
, O_RDWR
|O_CREAT
, false, &s
->runtime_storage
.metrics
, &s
->runtime_journal
);
379 return log_error_errno(r
, "Failed to open runtime journal: %m");
382 if (s
->runtime_journal
) {
383 server_add_acls(s
->runtime_journal
, 0);
384 (void) cache_space_refresh(s
, &s
->runtime_storage
);
385 patch_min_use(&s
->runtime_storage
);
392 static JournalFile
* find_journal(Server
*s
, uid_t uid
) {
393 _cleanup_free_
char *p
= NULL
;
400 /* A rotate that fails to create the new journal (ENOSPC) leaves the
401 * rotated journal as NULL. Unless we revisit opening, even after
402 * space is made available we'll continue to return NULL indefinitely.
404 * system_journal_open() is a noop if the journals are already open, so
405 * we can just call it here to recover from failed rotates (or anything
406 * else that's left the journals as NULL).
408 * Fixes https://github.com/systemd/systemd/issues/3968 */
409 (void) system_journal_open(s
, false);
411 /* We split up user logs only on /var, not on /run. If the
412 * runtime file is open, we write to it exclusively, in order
413 * to guarantee proper order as soon as we flush /run to
414 * /var and close the runtime file. */
416 if (s
->runtime_journal
)
417 return s
->runtime_journal
;
419 if (uid_for_system_journal(uid
))
420 return s
->system_journal
;
422 r
= sd_id128_get_machine(&machine
);
424 return s
->system_journal
;
426 f
= ordered_hashmap_get(s
->user_journals
, UID_TO_PTR(uid
));
430 if (asprintf(&p
, "/var/log/journal/" SD_ID128_FORMAT_STR
"/user-"UID_FMT
".journal",
431 SD_ID128_FORMAT_VAL(machine
), uid
) < 0)
432 return s
->system_journal
;
434 while (ordered_hashmap_size(s
->user_journals
) >= USER_JOURNALS_MAX
) {
435 /* Too many open? Then let's close one */
436 f
= ordered_hashmap_steal_first(s
->user_journals
);
438 (void) journal_file_close(f
);
441 r
= open_journal(s
, true, p
, O_RDWR
|O_CREAT
, s
->seal
, &s
->system_storage
.metrics
, &f
);
443 return s
->system_journal
;
445 server_add_acls(f
, uid
);
447 r
= ordered_hashmap_put(s
->user_journals
, UID_TO_PTR(uid
), f
);
449 (void) journal_file_close(f
);
450 return s
->system_journal
;
456 static int do_rotate(
469 r
= journal_file_rotate(f
, s
->compress
.enabled
, s
->compress
.threshold_bytes
, seal
, s
->deferred_closes
);
472 return log_error_errno(r
, "Failed to rotate %s: %m", (*f
)->path
);
474 return log_error_errno(r
, "Failed to create new %s journal: %m", name
);
477 server_add_acls(*f
, uid
);
482 void server_rotate(Server
*s
) {
488 log_debug("Rotating...");
490 (void) do_rotate(s
, &s
->runtime_journal
, "runtime", false, 0);
491 (void) do_rotate(s
, &s
->system_journal
, "system", s
->seal
, 0);
493 ORDERED_HASHMAP_FOREACH_KEY(f
, k
, s
->user_journals
, i
) {
494 r
= do_rotate(s
, &f
, "user", s
->seal
, PTR_TO_UID(k
));
496 ordered_hashmap_replace(s
->user_journals
, k
, f
);
498 /* Old file has been closed and deallocated */
499 ordered_hashmap_remove(s
->user_journals
, k
);
502 /* Perform any deferred closes which aren't still offlining. */
503 SET_FOREACH(f
, s
->deferred_closes
, i
)
504 if (!journal_file_is_offlining(f
)) {
505 (void) set_remove(s
->deferred_closes
, f
);
506 (void) journal_file_close(f
);
510 void server_sync(Server
*s
) {
515 if (s
->system_journal
) {
516 r
= journal_file_set_offline(s
->system_journal
, false);
518 log_warning_errno(r
, "Failed to sync system journal, ignoring: %m");
521 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
) {
522 r
= journal_file_set_offline(f
, false);
524 log_warning_errno(r
, "Failed to sync user journal, ignoring: %m");
527 if (s
->sync_event_source
) {
528 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_OFF
);
530 log_error_errno(r
, "Failed to disable sync timer source: %m");
533 s
->sync_scheduled
= false;
536 static void do_vacuum(Server
*s
, JournalStorage
*storage
, bool verbose
) {
543 (void) cache_space_refresh(s
, storage
);
546 server_space_usage_message(s
, storage
);
548 r
= journal_directory_vacuum(storage
->path
, storage
->space
.limit
,
549 storage
->metrics
.n_max_files
, s
->max_retention_usec
,
550 &s
->oldest_file_usec
, verbose
);
551 if (r
< 0 && r
!= -ENOENT
)
552 log_warning_errno(r
, "Failed to vacuum %s, ignoring: %m", storage
->path
);
554 cache_space_invalidate(&storage
->space
);
557 int server_vacuum(Server
*s
, bool verbose
) {
560 log_debug("Vacuuming...");
562 s
->oldest_file_usec
= 0;
564 if (s
->system_journal
)
565 do_vacuum(s
, &s
->system_storage
, verbose
);
566 if (s
->runtime_journal
)
567 do_vacuum(s
, &s
->runtime_storage
, verbose
);
572 static void server_cache_machine_id(Server
*s
) {
578 r
= sd_id128_get_machine(&id
);
582 sd_id128_to_string(id
, stpcpy(s
->machine_id_field
, "_MACHINE_ID="));
585 static void server_cache_boot_id(Server
*s
) {
591 r
= sd_id128_get_boot(&id
);
595 sd_id128_to_string(id
, stpcpy(s
->boot_id_field
, "_BOOT_ID="));
598 static void server_cache_hostname(Server
*s
) {
599 _cleanup_free_
char *t
= NULL
;
604 t
= gethostname_malloc();
608 x
= strappend("_HOSTNAME=", t
);
612 free(s
->hostname_field
);
613 s
->hostname_field
= x
;
616 static bool shall_try_append_again(JournalFile
*f
, int r
) {
619 case -E2BIG
: /* Hit configured limit */
620 case -EFBIG
: /* Hit fs limit */
621 case -EDQUOT
: /* Quota limit hit */
622 case -ENOSPC
: /* Disk full */
623 log_debug("%s: Allocation limit reached, rotating.", f
->path
);
626 case -EIO
: /* I/O error of some kind (mmap) */
627 log_warning("%s: IO error, rotating.", f
->path
);
630 case -EHOSTDOWN
: /* Other machine */
631 log_info("%s: Journal file from other machine, rotating.", f
->path
);
634 case -EBUSY
: /* Unclean shutdown */
635 log_info("%s: Unclean shutdown, rotating.", f
->path
);
638 case -EPROTONOSUPPORT
: /* Unsupported feature */
639 log_info("%s: Unsupported feature, rotating.", f
->path
);
642 case -EBADMSG
: /* Corrupted */
643 case -ENODATA
: /* Truncated */
644 case -ESHUTDOWN
: /* Already archived */
645 log_warning("%s: Journal file corrupted, rotating.", f
->path
);
648 case -EIDRM
: /* Journal file has been deleted */
649 log_warning("%s: Journal file has been deleted, rotating.", f
->path
);
652 case -ETXTBSY
: /* Journal file is from the future */
653 log_warning("%s: Journal file is from the future, rotating.", f
->path
);
661 static void write_to_journal(Server
*s
, uid_t uid
, struct iovec
*iovec
, unsigned n
, int priority
) {
662 bool vacuumed
= false, rotate
= false;
663 struct dual_timestamp ts
;
671 /* Get the closest, linearized time we have for this log event from the event loop. (Note that we do not use
672 * the source time, and not even the time the event was originally seen, but instead simply the time we started
673 * processing it, as we want strictly linear ordering in what we write out.) */
674 assert_se(sd_event_now(s
->event
, CLOCK_REALTIME
, &ts
.realtime
) >= 0);
675 assert_se(sd_event_now(s
->event
, CLOCK_MONOTONIC
, &ts
.monotonic
) >= 0);
677 if (ts
.realtime
< s
->last_realtime_clock
) {
678 /* When the time jumps backwards, let's immediately rotate. Of course, this should not happen during
679 * regular operation. However, when it does happen, then we should make sure that we start fresh files
680 * to ensure that the entries in the journal files are strictly ordered by time, in order to ensure
681 * bisection works correctly. */
683 log_debug("Time jumped backwards, rotating.");
687 f
= find_journal(s
, uid
);
691 if (journal_file_rotate_suggested(f
, s
->max_file_usec
)) {
692 log_debug("%s: Journal header limits reached or header out-of-date, rotating.", f
->path
);
699 server_vacuum(s
, false);
702 f
= find_journal(s
, uid
);
707 s
->last_realtime_clock
= ts
.realtime
;
709 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
711 server_schedule_sync(s
, priority
);
715 if (vacuumed
|| !shall_try_append_again(f
, r
)) {
716 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes), ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
721 server_vacuum(s
, false);
723 f
= find_journal(s
, uid
);
727 log_debug("Retrying write.");
728 r
= journal_file_append_entry(f
, &ts
, iovec
, n
, &s
->seqnum
, NULL
, NULL
);
730 log_error_errno(r
, "Failed to write entry (%d items, %zu bytes) despite vacuuming, ignoring: %m", n
, IOVEC_TOTAL_SIZE(iovec
, n
));
732 server_schedule_sync(s
, priority
);
735 #define IOVEC_ADD_NUMERIC_FIELD(iovec, n, value, type, isset, format, field) \
736 if (isset(value)) { \
738 k = newa(char, STRLEN(field "=") + DECIMAL_STR_MAX(type) + 1); \
739 sprintf(k, field "=" format, value); \
740 iovec[n++] = IOVEC_MAKE_STRING(k); \
743 #define IOVEC_ADD_STRING_FIELD(iovec, n, value, field) \
744 if (!isempty(value)) { \
746 k = strjoina(field "=", value); \
747 iovec[n++] = IOVEC_MAKE_STRING(k); \
750 #define IOVEC_ADD_ID128_FIELD(iovec, n, value, field) \
751 if (!sd_id128_is_null(value)) { \
753 k = newa(char, STRLEN(field "=") + SD_ID128_STRING_MAX); \
754 sd_id128_to_string(value, stpcpy(k, field "=")); \
755 iovec[n++] = IOVEC_MAKE_STRING(k); \
758 #define IOVEC_ADD_SIZED_FIELD(iovec, n, value, value_size, field) \
759 if (value_size > 0) { \
761 k = newa(char, STRLEN(field "=") + value_size + 1); \
762 *((char*) mempcpy(stpcpy(k, field "="), value, value_size)) = 0; \
763 iovec[n++] = IOVEC_MAKE_STRING(k); \
766 static void dispatch_message_real(
768 struct iovec
*iovec
, size_t n
, size_t m
,
769 const ClientContext
*c
,
770 const struct timeval
*tv
,
774 char source_time
[sizeof("_SOURCE_REALTIME_TIMESTAMP=") + DECIMAL_STR_MAX(usec_t
)];
782 N_IOVEC_META_FIELDS
+
783 (pid_is_valid(object_pid
) ? N_IOVEC_OBJECT_FIELDS
: 0) +
784 client_context_extra_fields_n_iovec(c
) <= m
);
787 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "_PID");
788 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "_UID");
789 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "_GID");
791 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->comm
, "_COMM");
792 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->exe
, "_EXE");
793 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cmdline
, "_CMDLINE");
794 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->capeff
, "_CAP_EFFECTIVE");
796 IOVEC_ADD_SIZED_FIELD(iovec
, n
, c
->label
, c
->label_size
, "_SELINUX_CONTEXT");
798 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "_AUDIT_SESSION");
799 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "_AUDIT_LOGINUID");
801 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->cgroup
, "_SYSTEMD_CGROUP");
802 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->session
, "_SYSTEMD_SESSION");
803 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, c
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "_SYSTEMD_OWNER_UID");
804 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->unit
, "_SYSTEMD_UNIT");
805 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_unit
, "_SYSTEMD_USER_UNIT");
806 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->slice
, "_SYSTEMD_SLICE");
807 IOVEC_ADD_STRING_FIELD(iovec
, n
, c
->user_slice
, "_SYSTEMD_USER_SLICE");
809 IOVEC_ADD_ID128_FIELD(iovec
, n
, c
->invocation_id
, "_SYSTEMD_INVOCATION_ID");
811 if (c
->extra_fields_n_iovec
> 0) {
812 memcpy(iovec
+ n
, c
->extra_fields_iovec
, c
->extra_fields_n_iovec
* sizeof(struct iovec
));
813 n
+= c
->extra_fields_n_iovec
;
819 if (pid_is_valid(object_pid
) && client_context_get(s
, object_pid
, NULL
, NULL
, 0, NULL
, &o
) >= 0) {
821 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->pid
, pid_t
, pid_is_valid
, PID_FMT
, "OBJECT_PID");
822 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_UID");
823 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->gid
, gid_t
, gid_is_valid
, GID_FMT
, "OBJECT_GID");
825 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->comm
, "OBJECT_COMM");
826 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->exe
, "OBJECT_EXE");
827 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cmdline
, "OBJECT_CMDLINE");
828 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->capeff
, "OBJECT_CAP_EFFECTIVE");
830 IOVEC_ADD_SIZED_FIELD(iovec
, n
, o
->label
, o
->label_size
, "OBJECT_SELINUX_CONTEXT");
832 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->auditid
, uint32_t, audit_session_is_valid
, "%" PRIu32
, "OBJECT_AUDIT_SESSION");
833 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->loginuid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_AUDIT_LOGINUID");
835 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->cgroup
, "OBJECT_SYSTEMD_CGROUP");
836 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->session
, "OBJECT_SYSTEMD_SESSION");
837 IOVEC_ADD_NUMERIC_FIELD(iovec
, n
, o
->owner_uid
, uid_t
, uid_is_valid
, UID_FMT
, "OBJECT_SYSTEMD_OWNER_UID");
838 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->unit
, "OBJECT_SYSTEMD_UNIT");
839 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_unit
, "OBJECT_SYSTEMD_USER_UNIT");
840 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->slice
, "OBJECT_SYSTEMD_SLICE");
841 IOVEC_ADD_STRING_FIELD(iovec
, n
, o
->user_slice
, "OBJECT_SYSTEMD_USER_SLICE");
843 IOVEC_ADD_ID128_FIELD(iovec
, n
, o
->invocation_id
, "OBJECT_SYSTEMD_INVOCATION_ID=");
849 sprintf(source_time
, "_SOURCE_REALTIME_TIMESTAMP=" USEC_FMT
, timeval_load(tv
));
850 iovec
[n
++] = IOVEC_MAKE_STRING(source_time
);
853 /* Note that strictly speaking storing the boot id here is
854 * redundant since the entry includes this in-line
855 * anyway. However, we need this indexed, too. */
856 if (!isempty(s
->boot_id_field
))
857 iovec
[n
++] = IOVEC_MAKE_STRING(s
->boot_id_field
);
859 if (!isempty(s
->machine_id_field
))
860 iovec
[n
++] = IOVEC_MAKE_STRING(s
->machine_id_field
);
862 if (!isempty(s
->hostname_field
))
863 iovec
[n
++] = IOVEC_MAKE_STRING(s
->hostname_field
);
867 if (s
->split_mode
== SPLIT_UID
&& c
&& uid_is_valid(c
->uid
))
868 /* Split up strictly by (non-root) UID */
869 journal_uid
= c
->uid
;
870 else if (s
->split_mode
== SPLIT_LOGIN
&& c
&& c
->uid
> 0 && uid_is_valid(c
->owner_uid
))
871 /* Split up by login UIDs. We do this only if the
872 * realuid is not root, in order not to accidentally
873 * leak privileged information to the user that is
874 * logged by a privileged process that is part of an
875 * unprivileged session. */
876 journal_uid
= c
->owner_uid
;
880 write_to_journal(s
, journal_uid
, iovec
, n
, priority
);
883 void server_driver_message(Server
*s
, pid_t object_pid
, const char *message_id
, const char *format
, ...) {
893 m
= N_IOVEC_META_FIELDS
+ 5 + N_IOVEC_PAYLOAD_FIELDS
+ client_context_extra_fields_n_iovec(s
->my_context
) + N_IOVEC_OBJECT_FIELDS
;
894 iovec
= newa(struct iovec
, m
);
896 assert_cc(3 == LOG_FAC(LOG_DAEMON
));
897 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_FACILITY=3");
898 iovec
[n
++] = IOVEC_MAKE_STRING("SYSLOG_IDENTIFIER=systemd-journald");
900 iovec
[n
++] = IOVEC_MAKE_STRING("_TRANSPORT=driver");
901 assert_cc(6 == LOG_INFO
);
902 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=6");
905 iovec
[n
++] = IOVEC_MAKE_STRING(message_id
);
908 va_start(ap
, format
);
909 r
= log_format_iovec(iovec
, m
, &n
, false, 0, format
, ap
);
910 /* Error handling below */
914 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
917 free(iovec
[k
++].iov_base
);
920 /* We failed to format the message. Emit a warning instead. */
923 xsprintf(buf
, "MESSAGE=Entry printing failed: %s", strerror(-r
));
926 iovec
[n
++] = IOVEC_MAKE_STRING("PRIORITY=4");
927 iovec
[n
++] = IOVEC_MAKE_STRING(buf
);
928 dispatch_message_real(s
, iovec
, n
, m
, s
->my_context
, NULL
, LOG_INFO
, object_pid
);
932 void server_dispatch_message(
934 struct iovec
*iovec
, size_t n
, size_t m
,
936 const struct timeval
*tv
,
940 uint64_t available
= 0;
944 assert(iovec
|| n
== 0);
949 if (LOG_PRI(priority
) > s
->max_level_store
)
952 /* Stop early in case the information will not be stored
954 if (s
->storage
== STORAGE_NONE
)
958 (void) determine_space(s
, &available
, NULL
);
960 rl
= journal_rate_limit_test(s
->rate_limit
, c
->unit
, priority
& LOG_PRIMASK
, available
);
964 /* Write a suppression message if we suppressed something */
966 server_driver_message(s
, c
->pid
,
967 "MESSAGE_ID=" SD_MESSAGE_JOURNAL_DROPPED_STR
,
968 LOG_MESSAGE("Suppressed %i messages from %s", rl
- 1, c
->unit
),
969 "N_DROPPED=%i", rl
- 1,
973 dispatch_message_real(s
, iovec
, n
, m
, c
, tv
, priority
, object_pid
);
976 int server_flush_to_var(Server
*s
, bool require_flag_file
) {
978 sd_journal
*j
= NULL
;
979 char ts
[FORMAT_TIMESPAN_MAX
];
986 if (!IN_SET(s
->storage
, STORAGE_AUTO
, STORAGE_PERSISTENT
))
989 if (!s
->runtime_journal
)
992 if (require_flag_file
&& !flushed_flag_is_set())
995 (void) system_journal_open(s
, true);
997 if (!s
->system_journal
)
1000 log_debug("Flushing to /var...");
1002 start
= now(CLOCK_MONOTONIC
);
1004 r
= sd_id128_get_machine(&machine
);
1008 r
= sd_journal_open(&j
, SD_JOURNAL_RUNTIME_ONLY
);
1010 return log_error_errno(r
, "Failed to read runtime journal: %m");
1012 sd_journal_set_data_threshold(j
, 0);
1014 SD_JOURNAL_FOREACH(j
) {
1018 f
= j
->current_file
;
1019 assert(f
&& f
->current_offset
> 0);
1023 r
= journal_file_move_to_object(f
, OBJECT_ENTRY
, f
->current_offset
, &o
);
1025 log_error_errno(r
, "Can't read entry: %m");
1029 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1033 if (!shall_try_append_again(s
->system_journal
, r
)) {
1034 log_error_errno(r
, "Can't write entry: %m");
1039 server_vacuum(s
, false);
1041 if (!s
->system_journal
) {
1042 log_notice("Didn't flush runtime journal since rotation of system journal wasn't successful.");
1047 log_debug("Retrying write.");
1048 r
= journal_file_copy_entry(f
, s
->system_journal
, o
, f
->current_offset
, NULL
, NULL
, NULL
);
1050 log_error_errno(r
, "Can't write entry: %m");
1058 journal_file_post_change(s
->system_journal
);
1060 s
->runtime_journal
= journal_file_close(s
->runtime_journal
);
1063 (void) rm_rf("/run/log/journal", REMOVE_ROOT
);
1065 sd_journal_close(j
);
1067 server_driver_message(s
, 0, NULL
,
1068 LOG_MESSAGE("Time spent on flushing to /var is %s for %u entries.",
1069 format_timespan(ts
, sizeof(ts
), now(CLOCK_MONOTONIC
) - start
, 0),
1076 int server_process_datagram(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1077 Server
*s
= userdata
;
1078 struct ucred
*ucred
= NULL
;
1079 struct timeval
*tv
= NULL
;
1080 struct cmsghdr
*cmsg
;
1082 size_t label_len
= 0, m
;
1085 int *fds
= NULL
, v
= 0;
1089 struct cmsghdr cmsghdr
;
1091 /* We use NAME_MAX space for the SELinux label
1092 * here. The kernel currently enforces no
1093 * limit, but according to suggestions from
1094 * the SELinux people this will change and it
1095 * will probably be identical to NAME_MAX. For
1096 * now we use that, but this should be updated
1097 * one day when the final limit is known. */
1098 uint8_t buf
[CMSG_SPACE(sizeof(struct ucred
)) +
1099 CMSG_SPACE(sizeof(struct timeval
)) +
1100 CMSG_SPACE(sizeof(int)) + /* fd */
1101 CMSG_SPACE(NAME_MAX
)]; /* selinux label */
1104 union sockaddr_union sa
= {};
1106 struct msghdr msghdr
= {
1109 .msg_control
= &control
,
1110 .msg_controllen
= sizeof(control
),
1112 .msg_namelen
= sizeof(sa
),
1116 assert(fd
== s
->native_fd
|| fd
== s
->syslog_fd
|| fd
== s
->audit_fd
);
1118 if (revents
!= EPOLLIN
) {
1119 log_error("Got invalid event from epoll for datagram fd: %"PRIx32
, revents
);
1123 /* Try to get the right size, if we can. (Not all sockets support SIOCINQ, hence we just try, but don't rely on
1125 (void) ioctl(fd
, SIOCINQ
, &v
);
1127 /* Fix it up, if it is too small. We use the same fixed value as auditd here. Awful! */
1128 m
= PAGE_ALIGN(MAX3((size_t) v
+ 1,
1130 ALIGN(sizeof(struct nlmsghdr
)) + ALIGN((size_t) MAX_AUDIT_MESSAGE_LENGTH
)) + 1);
1132 if (!GREEDY_REALLOC(s
->buffer
, s
->buffer_size
, m
))
1135 iovec
.iov_base
= s
->buffer
;
1136 iovec
.iov_len
= s
->buffer_size
- 1; /* Leave room for trailing NUL we add later */
1138 n
= recvmsg(fd
, &msghdr
, MSG_DONTWAIT
|MSG_CMSG_CLOEXEC
);
1140 if (IN_SET(errno
, EINTR
, EAGAIN
))
1143 return log_error_errno(errno
, "recvmsg() failed: %m");
1146 CMSG_FOREACH(cmsg
, &msghdr
) {
1148 if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1149 cmsg
->cmsg_type
== SCM_CREDENTIALS
&&
1150 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct ucred
)))
1151 ucred
= (struct ucred
*) CMSG_DATA(cmsg
);
1152 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1153 cmsg
->cmsg_type
== SCM_SECURITY
) {
1154 label
= (char*) CMSG_DATA(cmsg
);
1155 label_len
= cmsg
->cmsg_len
- CMSG_LEN(0);
1156 } else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1157 cmsg
->cmsg_type
== SO_TIMESTAMP
&&
1158 cmsg
->cmsg_len
== CMSG_LEN(sizeof(struct timeval
)))
1159 tv
= (struct timeval
*) CMSG_DATA(cmsg
);
1160 else if (cmsg
->cmsg_level
== SOL_SOCKET
&&
1161 cmsg
->cmsg_type
== SCM_RIGHTS
) {
1162 fds
= (int*) CMSG_DATA(cmsg
);
1163 n_fds
= (cmsg
->cmsg_len
- CMSG_LEN(0)) / sizeof(int);
1167 /* And a trailing NUL, just in case */
1170 if (fd
== s
->syslog_fd
) {
1171 if (n
> 0 && n_fds
== 0)
1172 server_process_syslog_message(s
, strstrip(s
->buffer
), ucred
, tv
, label
, label_len
);
1174 log_warning("Got file descriptors via syslog socket. Ignoring.");
1176 } else if (fd
== s
->native_fd
) {
1177 if (n
> 0 && n_fds
== 0)
1178 server_process_native_message(s
, s
->buffer
, n
, ucred
, tv
, label
, label_len
);
1179 else if (n
== 0 && n_fds
== 1)
1180 server_process_native_file(s
, fds
[0], ucred
, tv
, label
, label_len
);
1182 log_warning("Got too many file descriptors via native socket. Ignoring.");
1185 assert(fd
== s
->audit_fd
);
1187 if (n
> 0 && n_fds
== 0)
1188 server_process_audit_message(s
, s
->buffer
, n
, ucred
, &sa
, msghdr
.msg_namelen
);
1190 log_warning("Got file descriptors via audit socket. Ignoring.");
1193 close_many(fds
, n_fds
);
1197 static int dispatch_sigusr1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1198 Server
*s
= userdata
;
1203 log_info("Received request to flush runtime journal from PID " PID_FMT
, si
->ssi_pid
);
1205 (void) server_flush_to_var(s
, false);
1207 server_vacuum(s
, false);
1209 r
= touch("/run/systemd/journal/flushed");
1211 log_warning_errno(r
, "Failed to touch /run/systemd/journal/flushed, ignoring: %m");
1213 server_space_usage_message(s
, NULL
);
1217 static int dispatch_sigusr2(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1218 Server
*s
= userdata
;
1223 log_info("Received request to rotate journal from PID " PID_FMT
, si
->ssi_pid
);
1225 server_vacuum(s
, true);
1227 if (s
->system_journal
)
1228 patch_min_use(&s
->system_storage
);
1229 if (s
->runtime_journal
)
1230 patch_min_use(&s
->runtime_storage
);
1232 /* Let clients know when the most recent rotation happened. */
1233 r
= write_timestamp_file_atomic("/run/systemd/journal/rotated", now(CLOCK_MONOTONIC
));
1235 log_warning_errno(r
, "Failed to write /run/systemd/journal/rotated, ignoring: %m");
1240 static int dispatch_sigterm(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1241 Server
*s
= userdata
;
1245 log_received_signal(LOG_INFO
, si
);
1247 sd_event_exit(s
->event
, 0);
1251 static int dispatch_sigrtmin1(sd_event_source
*es
, const struct signalfd_siginfo
*si
, void *userdata
) {
1252 Server
*s
= userdata
;
1257 log_debug("Received request to sync from PID " PID_FMT
, si
->ssi_pid
);
1261 /* Let clients know when the most recent sync happened. */
1262 r
= write_timestamp_file_atomic("/run/systemd/journal/synced", now(CLOCK_MONOTONIC
));
1264 log_warning_errno(r
, "Failed to write /run/systemd/journal/synced, ignoring: %m");
1269 static int setup_signals(Server
*s
) {
1274 assert_se(sigprocmask_many(SIG_SETMASK
, NULL
, SIGINT
, SIGTERM
, SIGUSR1
, SIGUSR2
, SIGRTMIN
+1, -1) >= 0);
1276 r
= sd_event_add_signal(s
->event
, &s
->sigusr1_event_source
, SIGUSR1
, dispatch_sigusr1
, s
);
1280 r
= sd_event_add_signal(s
->event
, &s
->sigusr2_event_source
, SIGUSR2
, dispatch_sigusr2
, s
);
1284 r
= sd_event_add_signal(s
->event
, &s
->sigterm_event_source
, SIGTERM
, dispatch_sigterm
, s
);
1288 /* Let's process SIGTERM late, so that we flush all queued
1289 * messages to disk before we exit */
1290 r
= sd_event_source_set_priority(s
->sigterm_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1294 /* When journald is invoked on the terminal (when debugging),
1295 * it's useful if C-c is handled equivalent to SIGTERM. */
1296 r
= sd_event_add_signal(s
->event
, &s
->sigint_event_source
, SIGINT
, dispatch_sigterm
, s
);
1300 r
= sd_event_source_set_priority(s
->sigint_event_source
, SD_EVENT_PRIORITY_NORMAL
+20);
1304 /* SIGRTMIN+1 causes an immediate sync. We process this very
1305 * late, so that everything else queued at this point is
1306 * really written to disk. Clients can watch
1307 * /run/systemd/journal/synced with inotify until its mtime
1308 * changes to see when a sync happened. */
1309 r
= sd_event_add_signal(s
->event
, &s
->sigrtmin1_event_source
, SIGRTMIN
+1, dispatch_sigrtmin1
, s
);
1313 r
= sd_event_source_set_priority(s
->sigrtmin1_event_source
, SD_EVENT_PRIORITY_NORMAL
+15);
1320 static int parse_proc_cmdline_item(const char *key
, const char *value
, void *data
) {
1326 if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_syslog")) {
1328 r
= value
? parse_boolean(value
) : true;
1330 log_warning("Failed to parse forward to syslog switch \"%s\". Ignoring.", value
);
1332 s
->forward_to_syslog
= r
;
1334 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_kmsg")) {
1336 r
= value
? parse_boolean(value
) : true;
1338 log_warning("Failed to parse forward to kmsg switch \"%s\". Ignoring.", value
);
1340 s
->forward_to_kmsg
= r
;
1342 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_console")) {
1344 r
= value
? parse_boolean(value
) : true;
1346 log_warning("Failed to parse forward to console switch \"%s\". Ignoring.", value
);
1348 s
->forward_to_console
= r
;
1350 } else if (proc_cmdline_key_streq(key
, "systemd.journald.forward_to_wall")) {
1352 r
= value
? parse_boolean(value
) : true;
1354 log_warning("Failed to parse forward to wall switch \"%s\". Ignoring.", value
);
1356 s
->forward_to_wall
= r
;
1358 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_console")) {
1360 if (proc_cmdline_value_missing(key
, value
))
1363 r
= log_level_from_string(value
);
1365 log_warning("Failed to parse max level console value \"%s\". Ignoring.", value
);
1367 s
->max_level_console
= r
;
1369 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_store")) {
1371 if (proc_cmdline_value_missing(key
, value
))
1374 r
= log_level_from_string(value
);
1376 log_warning("Failed to parse max level store value \"%s\". Ignoring.", value
);
1378 s
->max_level_store
= r
;
1380 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_syslog")) {
1382 if (proc_cmdline_value_missing(key
, value
))
1385 r
= log_level_from_string(value
);
1387 log_warning("Failed to parse max level syslog value \"%s\". Ignoring.", value
);
1389 s
->max_level_syslog
= r
;
1391 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_kmsg")) {
1393 if (proc_cmdline_value_missing(key
, value
))
1396 r
= log_level_from_string(value
);
1398 log_warning("Failed to parse max level kmsg value \"%s\". Ignoring.", value
);
1400 s
->max_level_kmsg
= r
;
1402 } else if (proc_cmdline_key_streq(key
, "systemd.journald.max_level_wall")) {
1404 if (proc_cmdline_value_missing(key
, value
))
1407 r
= log_level_from_string(value
);
1409 log_warning("Failed to parse max level wall value \"%s\". Ignoring.", value
);
1411 s
->max_level_wall
= r
;
1413 } else if (startswith(key
, "systemd.journald"))
1414 log_warning("Unknown journald kernel command line option \"%s\". Ignoring.", key
);
1416 /* do not warn about state here, since probably systemd already did */
1420 static int server_parse_config_file(Server
*s
) {
1423 return config_parse_many_nulstr(PKGSYSCONFDIR
"/journald.conf",
1424 CONF_PATHS_NULSTR("systemd/journald.conf.d"),
1426 config_item_perf_lookup
, journald_gperf_lookup
,
1427 CONFIG_PARSE_WARN
, s
);
1430 static int server_dispatch_sync(sd_event_source
*es
, usec_t t
, void *userdata
) {
1431 Server
*s
= userdata
;
1439 int server_schedule_sync(Server
*s
, int priority
) {
1444 if (priority
<= LOG_CRIT
) {
1445 /* Immediately sync to disk when this is of priority CRIT, ALERT, EMERG */
1450 if (s
->sync_scheduled
)
1453 if (s
->sync_interval_usec
> 0) {
1456 r
= sd_event_now(s
->event
, CLOCK_MONOTONIC
, &when
);
1460 when
+= s
->sync_interval_usec
;
1462 if (!s
->sync_event_source
) {
1463 r
= sd_event_add_time(
1465 &s
->sync_event_source
,
1468 server_dispatch_sync
, s
);
1472 r
= sd_event_source_set_priority(s
->sync_event_source
, SD_EVENT_PRIORITY_IMPORTANT
);
1474 r
= sd_event_source_set_time(s
->sync_event_source
, when
);
1478 r
= sd_event_source_set_enabled(s
->sync_event_source
, SD_EVENT_ONESHOT
);
1483 s
->sync_scheduled
= true;
1489 static int dispatch_hostname_change(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1490 Server
*s
= userdata
;
1494 server_cache_hostname(s
);
1498 static int server_open_hostname(Server
*s
) {
1503 s
->hostname_fd
= open("/proc/sys/kernel/hostname",
1504 O_RDONLY
|O_CLOEXEC
|O_NONBLOCK
|O_NOCTTY
);
1505 if (s
->hostname_fd
< 0)
1506 return log_error_errno(errno
, "Failed to open /proc/sys/kernel/hostname: %m");
1508 r
= sd_event_add_io(s
->event
, &s
->hostname_event_source
, s
->hostname_fd
, 0, dispatch_hostname_change
, s
);
1510 /* kernels prior to 3.2 don't support polling this file. Ignore
1513 log_warning_errno(r
, "Failed to register hostname fd in event loop, ignoring: %m");
1514 s
->hostname_fd
= safe_close(s
->hostname_fd
);
1518 return log_error_errno(r
, "Failed to register hostname fd in event loop: %m");
1521 r
= sd_event_source_set_priority(s
->hostname_event_source
, SD_EVENT_PRIORITY_IMPORTANT
-10);
1523 return log_error_errno(r
, "Failed to adjust priority of host name event source: %m");
1528 static int dispatch_notify_event(sd_event_source
*es
, int fd
, uint32_t revents
, void *userdata
) {
1529 Server
*s
= userdata
;
1533 assert(s
->notify_event_source
== es
);
1534 assert(s
->notify_fd
== fd
);
1536 /* The $NOTIFY_SOCKET is writable again, now send exactly one
1537 * message on it. Either it's the watchdog event, the initial
1538 * READY=1 event or an stdout stream event. If there's nothing
1539 * to write anymore, turn our event source off. The next time
1540 * there's something to send it will be turned on again. */
1542 if (!s
->sent_notify_ready
) {
1543 static const char p
[] =
1545 "STATUS=Processing requests...";
1548 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1550 if (errno
== EAGAIN
)
1553 return log_error_errno(errno
, "Failed to send READY=1 notification message: %m");
1556 s
->sent_notify_ready
= true;
1557 log_debug("Sent READY=1 notification.");
1559 } else if (s
->send_watchdog
) {
1561 static const char p
[] =
1566 l
= send(s
->notify_fd
, p
, strlen(p
), MSG_DONTWAIT
);
1568 if (errno
== EAGAIN
)
1571 return log_error_errno(errno
, "Failed to send WATCHDOG=1 notification message: %m");
1574 s
->send_watchdog
= false;
1575 log_debug("Sent WATCHDOG=1 notification.");
1577 } else if (s
->stdout_streams_notify_queue
)
1578 /* Dispatch one stream notification event */
1579 stdout_stream_send_notify(s
->stdout_streams_notify_queue
);
1581 /* Leave us enabled if there's still more to do. */
1582 if (s
->send_watchdog
|| s
->stdout_streams_notify_queue
)
1585 /* There was nothing to do anymore, let's turn ourselves off. */
1586 r
= sd_event_source_set_enabled(es
, SD_EVENT_OFF
);
1588 return log_error_errno(r
, "Failed to turn off notify event source: %m");
1593 static int dispatch_watchdog(sd_event_source
*es
, uint64_t usec
, void *userdata
) {
1594 Server
*s
= userdata
;
1599 s
->send_watchdog
= true;
1601 r
= sd_event_source_set_enabled(s
->notify_event_source
, SD_EVENT_ON
);
1603 log_warning_errno(r
, "Failed to turn on notify event source: %m");
1605 r
= sd_event_source_set_time(s
->watchdog_event_source
, usec
+ s
->watchdog_usec
/ 2);
1607 return log_error_errno(r
, "Failed to restart watchdog event source: %m");
1609 r
= sd_event_source_set_enabled(s
->watchdog_event_source
, SD_EVENT_ON
);
1611 return log_error_errno(r
, "Failed to enable watchdog event source: %m");
1616 static int server_connect_notify(Server
*s
) {
1617 union sockaddr_union sa
= {
1618 .un
.sun_family
= AF_UNIX
,
1624 assert(s
->notify_fd
< 0);
1625 assert(!s
->notify_event_source
);
1628 So here's the problem: we'd like to send notification
1629 messages to PID 1, but we cannot do that via sd_notify(),
1630 since that's synchronous, and we might end up blocking on
1631 it. Specifically: given that PID 1 might block on
1632 dbus-daemon during IPC, and dbus-daemon is logging to us,
1633 and might hence block on us, we might end up in a deadlock
1634 if we block on sending PID 1 notification messages — by
1635 generating a full blocking circle. To avoid this, let's
1636 create a non-blocking socket, and connect it to the
1637 notification socket, and then wait for POLLOUT before we
1638 send anything. This should efficiently avoid any deadlocks,
1639 as we'll never block on PID 1, hence PID 1 can safely block
1640 on dbus-daemon which can safely block on us again.
1642 Don't think that this issue is real? It is, see:
1643 https://github.com/systemd/systemd/issues/1505
1646 e
= getenv("NOTIFY_SOCKET");
1650 if (!IN_SET(e
[0], '@', '/') || e
[1] == 0) {
1651 log_error("NOTIFY_SOCKET set to an invalid value: %s", e
);
1655 if (strlen(e
) > sizeof(sa
.un
.sun_path
)) {
1656 log_error("NOTIFY_SOCKET path too long: %s", e
);
1660 s
->notify_fd
= socket(AF_UNIX
, SOCK_DGRAM
|SOCK_CLOEXEC
|SOCK_NONBLOCK
, 0);
1661 if (s
->notify_fd
< 0)
1662 return log_error_errno(errno
, "Failed to create notify socket: %m");
1664 (void) fd_inc_sndbuf(s
->notify_fd
, NOTIFY_SNDBUF_SIZE
);
1666 strncpy(sa
.un
.sun_path
, e
, sizeof(sa
.un
.sun_path
));
1667 if (sa
.un
.sun_path
[0] == '@')
1668 sa
.un
.sun_path
[0] = 0;
1670 r
= connect(s
->notify_fd
, &sa
.sa
, SOCKADDR_UN_LEN(sa
.un
));
1672 return log_error_errno(errno
, "Failed to connect to notify socket: %m");
1674 r
= sd_event_add_io(s
->event
, &s
->notify_event_source
, s
->notify_fd
, EPOLLOUT
, dispatch_notify_event
, s
);
1676 return log_error_errno(r
, "Failed to watch notification socket: %m");
1678 if (sd_watchdog_enabled(false, &s
->watchdog_usec
) > 0) {
1679 s
->send_watchdog
= true;
1681 r
= sd_event_add_time(s
->event
, &s
->watchdog_event_source
, CLOCK_MONOTONIC
, now(CLOCK_MONOTONIC
) + s
->watchdog_usec
/2, s
->watchdog_usec
/4, dispatch_watchdog
, s
);
1683 return log_error_errno(r
, "Failed to add watchdog time event: %m");
1686 /* This should fire pretty soon, which we'll use to send the
1692 int server_init(Server
*s
) {
1693 _cleanup_fdset_free_ FDSet
*fds
= NULL
;
1700 s
->syslog_fd
= s
->native_fd
= s
->stdout_fd
= s
->dev_kmsg_fd
= s
->audit_fd
= s
->hostname_fd
= s
->notify_fd
= -1;
1701 s
->compress
.enabled
= true;
1702 s
->compress
.threshold_bytes
= (uint64_t) -1;
1704 s
->read_kmsg
= true;
1706 s
->watchdog_usec
= USEC_INFINITY
;
1708 s
->sync_interval_usec
= DEFAULT_SYNC_INTERVAL_USEC
;
1709 s
->sync_scheduled
= false;
1711 s
->rate_limit_interval
= DEFAULT_RATE_LIMIT_INTERVAL
;
1712 s
->rate_limit_burst
= DEFAULT_RATE_LIMIT_BURST
;
1714 s
->forward_to_wall
= true;
1716 s
->max_file_usec
= DEFAULT_MAX_FILE_USEC
;
1718 s
->max_level_store
= LOG_DEBUG
;
1719 s
->max_level_syslog
= LOG_DEBUG
;
1720 s
->max_level_kmsg
= LOG_NOTICE
;
1721 s
->max_level_console
= LOG_INFO
;
1722 s
->max_level_wall
= LOG_EMERG
;
1724 s
->line_max
= DEFAULT_LINE_MAX
;
1726 journal_reset_metrics(&s
->system_storage
.metrics
);
1727 journal_reset_metrics(&s
->runtime_storage
.metrics
);
1729 server_parse_config_file(s
);
1731 r
= proc_cmdline_parse(parse_proc_cmdline_item
, s
, PROC_CMDLINE_STRIP_RD_PREFIX
);
1733 log_warning_errno(r
, "Failed to parse kernel command line, ignoring: %m");
1735 if (!!s
->rate_limit_interval
^ !!s
->rate_limit_burst
) {
1736 log_debug("Setting both rate limit interval and burst from "USEC_FMT
",%u to 0,0",
1737 s
->rate_limit_interval
, s
->rate_limit_burst
);
1738 s
->rate_limit_interval
= s
->rate_limit_burst
= 0;
1741 (void) mkdir_p("/run/systemd/journal", 0755);
1743 s
->user_journals
= ordered_hashmap_new(NULL
);
1744 if (!s
->user_journals
)
1747 s
->mmap
= mmap_cache_new();
1751 s
->deferred_closes
= set_new(NULL
);
1752 if (!s
->deferred_closes
)
1755 r
= sd_event_default(&s
->event
);
1757 return log_error_errno(r
, "Failed to create event loop: %m");
1759 n
= sd_listen_fds(true);
1761 return log_error_errno(n
, "Failed to read listening file descriptors from environment: %m");
1763 for (fd
= SD_LISTEN_FDS_START
; fd
< SD_LISTEN_FDS_START
+ n
; fd
++) {
1765 if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/socket", 0) > 0) {
1767 if (s
->native_fd
>= 0) {
1768 log_error("Too many native sockets passed.");
1774 } else if (sd_is_socket_unix(fd
, SOCK_STREAM
, 1, "/run/systemd/journal/stdout", 0) > 0) {
1776 if (s
->stdout_fd
>= 0) {
1777 log_error("Too many stdout sockets passed.");
1783 } else if (sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/dev/log", 0) > 0 ||
1784 sd_is_socket_unix(fd
, SOCK_DGRAM
, -1, "/run/systemd/journal/dev-log", 0) > 0) {
1786 if (s
->syslog_fd
>= 0) {
1787 log_error("Too many /dev/log sockets passed.");
1793 } else if (sd_is_socket(fd
, AF_NETLINK
, SOCK_RAW
, -1) > 0) {
1795 if (s
->audit_fd
>= 0) {
1796 log_error("Too many audit sockets passed.");
1810 r
= fdset_put(fds
, fd
);
1816 /* Try to restore streams, but don't bother if this fails */
1817 (void) server_restore_streams(s
, fds
);
1819 if (fdset_size(fds
) > 0) {
1820 log_warning("%u unknown file descriptors passed, closing.", fdset_size(fds
));
1821 fds
= fdset_free(fds
);
1824 no_sockets
= s
->native_fd
< 0 && s
->stdout_fd
< 0 && s
->syslog_fd
< 0 && s
->audit_fd
< 0;
1826 /* always open stdout, syslog, native, and kmsg sockets */
1828 /* systemd-journald.socket: /run/systemd/journal/stdout */
1829 r
= server_open_stdout_socket(s
);
1833 /* systemd-journald-dev-log.socket: /run/systemd/journal/dev-log */
1834 r
= server_open_syslog_socket(s
);
1838 /* systemd-journald.socket: /run/systemd/journal/socket */
1839 r
= server_open_native_socket(s
);
1844 r
= server_open_dev_kmsg(s
);
1848 /* Unless we got *some* sockets and not audit, open audit socket */
1849 if (s
->audit_fd
>= 0 || no_sockets
) {
1850 r
= server_open_audit(s
);
1855 r
= server_open_kernel_seqnum(s
);
1859 r
= server_open_hostname(s
);
1863 r
= setup_signals(s
);
1867 s
->udev
= udev_new();
1871 s
->rate_limit
= journal_rate_limit_new(s
->rate_limit_interval
, s
->rate_limit_burst
);
1875 r
= cg_get_root_path(&s
->cgroup_root
);
1879 server_cache_hostname(s
);
1880 server_cache_boot_id(s
);
1881 server_cache_machine_id(s
);
1883 s
->runtime_storage
.name
= "Runtime journal";
1884 s
->system_storage
.name
= "System journal";
1886 s
->runtime_storage
.path
= strjoin("/run/log/journal/", SERVER_MACHINE_ID(s
));
1887 s
->system_storage
.path
= strjoin("/var/log/journal/", SERVER_MACHINE_ID(s
));
1888 if (!s
->runtime_storage
.path
|| !s
->system_storage
.path
)
1891 (void) server_connect_notify(s
);
1893 (void) client_context_acquire_default(s
);
1895 return system_journal_open(s
, false);
1898 void server_maybe_append_tags(Server
*s
) {
1904 n
= now(CLOCK_REALTIME
);
1906 if (s
->system_journal
)
1907 journal_file_maybe_append_tag(s
->system_journal
, n
);
1909 ORDERED_HASHMAP_FOREACH(f
, s
->user_journals
, i
)
1910 journal_file_maybe_append_tag(f
, n
);
1914 void server_done(Server
*s
) {
1917 set_free_with_destructor(s
->deferred_closes
, journal_file_close
);
1919 while (s
->stdout_streams
)
1920 stdout_stream_free(s
->stdout_streams
);
1922 client_context_flush_all(s
);
1924 if (s
->system_journal
)
1925 (void) journal_file_close(s
->system_journal
);
1927 if (s
->runtime_journal
)
1928 (void) journal_file_close(s
->runtime_journal
);
1930 ordered_hashmap_free_with_destructor(s
->user_journals
, journal_file_close
);
1932 sd_event_source_unref(s
->syslog_event_source
);
1933 sd_event_source_unref(s
->native_event_source
);
1934 sd_event_source_unref(s
->stdout_event_source
);
1935 sd_event_source_unref(s
->dev_kmsg_event_source
);
1936 sd_event_source_unref(s
->audit_event_source
);
1937 sd_event_source_unref(s
->sync_event_source
);
1938 sd_event_source_unref(s
->sigusr1_event_source
);
1939 sd_event_source_unref(s
->sigusr2_event_source
);
1940 sd_event_source_unref(s
->sigterm_event_source
);
1941 sd_event_source_unref(s
->sigint_event_source
);
1942 sd_event_source_unref(s
->sigrtmin1_event_source
);
1943 sd_event_source_unref(s
->hostname_event_source
);
1944 sd_event_source_unref(s
->notify_event_source
);
1945 sd_event_source_unref(s
->watchdog_event_source
);
1946 sd_event_unref(s
->event
);
1948 safe_close(s
->syslog_fd
);
1949 safe_close(s
->native_fd
);
1950 safe_close(s
->stdout_fd
);
1951 safe_close(s
->dev_kmsg_fd
);
1952 safe_close(s
->audit_fd
);
1953 safe_close(s
->hostname_fd
);
1954 safe_close(s
->notify_fd
);
1957 journal_rate_limit_free(s
->rate_limit
);
1959 if (s
->kernel_seqnum
)
1960 munmap(s
->kernel_seqnum
, sizeof(uint64_t));
1964 free(s
->cgroup_root
);
1965 free(s
->hostname_field
);
1966 free(s
->runtime_storage
.path
);
1967 free(s
->system_storage
.path
);
1970 mmap_cache_unref(s
->mmap
);
1972 udev_unref(s
->udev
);
1975 static const char* const storage_table
[_STORAGE_MAX
] = {
1976 [STORAGE_AUTO
] = "auto",
1977 [STORAGE_VOLATILE
] = "volatile",
1978 [STORAGE_PERSISTENT
] = "persistent",
1979 [STORAGE_NONE
] = "none"
1982 DEFINE_STRING_TABLE_LOOKUP(storage
, Storage
);
1983 DEFINE_CONFIG_PARSE_ENUM(config_parse_storage
, storage
, Storage
, "Failed to parse storage setting");
1985 static const char* const split_mode_table
[_SPLIT_MAX
] = {
1986 [SPLIT_LOGIN
] = "login",
1987 [SPLIT_UID
] = "uid",
1988 [SPLIT_NONE
] = "none",
1991 DEFINE_STRING_TABLE_LOOKUP(split_mode
, SplitMode
);
1992 DEFINE_CONFIG_PARSE_ENUM(config_parse_split_mode
, split_mode
, SplitMode
, "Failed to parse split mode setting");
1994 int config_parse_line_max(
1996 const char *filename
,
1998 const char *section
,
1999 unsigned section_line
,
2014 if (isempty(rvalue
))
2015 /* Empty assignment means default */
2016 *sz
= DEFAULT_LINE_MAX
;
2020 r
= parse_size(rvalue
, 1024, &v
);
2022 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse LineMax= value, ignoring: %s", rvalue
);
2027 /* Why specify 79 here as minimum line length? Simply, because the most common traditional
2028 * terminal size is 80ch, and it might make sense to break one character before the natural
2029 * line break would occur on that. */
2030 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too small, clamping to 79: %s", rvalue
);
2032 } else if (v
> (uint64_t) (SSIZE_MAX
-1)) {
2033 /* So, why specify SSIZE_MAX-1 here? Because that's one below the largest size value read()
2034 * can return, and we need one extra byte for the trailing NUL byte. Of course IRL such large
2035 * memory allocations will fail anyway, hence this limit is mostly theoretical anyway, as we'll
2036 * fail much earlier anyway. */
2037 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0, "LineMax= too large, clamping to %" PRIu64
": %s", (uint64_t) (SSIZE_MAX
-1), rvalue
);
2046 int config_parse_compress(const char* unit
,
2047 const char *filename
,
2049 const char *section
,
2050 unsigned section_line
,
2056 JournalCompressOptions
* compress
= data
;
2059 if (streq(rvalue
, "1")) {
2060 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2061 "Compress= ambiguously specified as 1, enabling compression with default threshold");
2062 compress
->enabled
= true;
2063 } else if (streq(rvalue
, "0")) {
2064 log_syntax(unit
, LOG_WARNING
, filename
, line
, 0,
2065 "Compress= ambiguously specified as 0, disabling compression");
2066 compress
->enabled
= false;
2067 } else if ((r
= parse_boolean(rvalue
)) >= 0)
2068 compress
->enabled
= r
;
2069 else if (parse_size(rvalue
, 1024, &compress
->threshold_bytes
) == 0)
2070 compress
->enabled
= true;
2071 else if (isempty(rvalue
)) {
2072 compress
->enabled
= true;
2073 compress
->threshold_bytes
= (uint64_t) -1;
2075 log_syntax(unit
, LOG_ERR
, filename
, line
, r
, "Failed to parse Compress= value, ignoring: %s", rvalue
);